Path: blob/master/thirdparty/pcre2/src/pcre2_jit_compile.c
21790 views
/*************************************************1* Perl-Compatible Regular Expressions *2*************************************************/34/* PCRE is a library of functions to support regular expressions whose syntax5and semantics are as close as possible to those of the Perl 5 language.67Written by Philip Hazel8This module by Zoltan Herczeg9Original API code Copyright (c) 1997-2012 University of Cambridge10New API code Copyright (c) 2016-2024 University of Cambridge1112-----------------------------------------------------------------------------13Redistribution and use in source and binary forms, with or without14modification, are permitted provided that the following conditions are met:1516* Redistributions of source code must retain the above copyright notice,17this list of conditions and the following disclaimer.1819* Redistributions in binary form must reproduce the above copyright20notice, this list of conditions and the following disclaimer in the21documentation and/or other materials provided with the distribution.2223* Neither the name of the University of Cambridge nor the names of its24contributors may be used to endorse or promote products derived from25this software without specific prior written permission.2627THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE37POSSIBILITY OF SUCH DAMAGE.38-----------------------------------------------------------------------------39*/4041#if defined(__has_feature)42#if __has_feature(memory_sanitizer)43#include <sanitizer/msan_interface.h>44#endif /* __has_feature(memory_sanitizer) */45#endif /* defined(__has_feature) */4647#include "pcre2_internal.h"4849#ifdef SUPPORT_JIT5051/* All-in-one: Since we use the JIT compiler only from here,52we just include it. This way we don't need to touch the build53system files. */5455#define SLJIT_CONFIG_AUTO 156#define SLJIT_CONFIG_STATIC 157#define SLJIT_VERBOSE 05859#ifdef PCRE2_DEBUG60#define SLJIT_DEBUG 161#else62#define SLJIT_DEBUG 063#endif6465#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)66#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)6768static void * pcre2_jit_malloc(size_t size, void *allocator_data)69{70pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);71return allocator->malloc(size, allocator->memory_data);72}7374static void pcre2_jit_free(void *ptr, void *allocator_data)75{76pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);77allocator->free(ptr, allocator->memory_data);78}7980#include "../deps/sljit/sljit_src/sljitLir.c"8182#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED83#error Unsupported architecture84#endif8586/* Defines for debugging purposes. */8788/* 1 - Use unoptimized capturing brackets.892 - Enable capture_last_ptr (includes option 1). */90/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */9192/* 1 - Always have a control head. */93/* #define DEBUG_FORCE_CONTROL_HEAD 1 */9495/* Allocate memory for the regex stack on the real machine stack.96Fast, but limited size. */97#define MACHINE_STACK_SIZE 327689899/* Growth rate for stack allocated by the OS. Should be the multiply100of page size. */101#define STACK_GROWTH_RATE 8192102103/* Enable to check that the allocation could destroy temporaries. */104#if defined SLJIT_DEBUG && SLJIT_DEBUG105#define DESTROY_REGISTERS 1106#endif107108/*109Short summary about the backtracking mechanism empolyed by the jit code generator:110111The code generator follows the recursive nature of the PERL compatible regular112expressions. The basic blocks of regular expressions are condition checkers113whose execute different commands depending on the result of the condition check.114The relationship between the operators can be horizontal (concatenation) and115vertical (sub-expression) (See struct backtrack_common for more details).116117'ab' - 'a' and 'b' regexps are concatenated118'a+' - 'a' is the sub-expression of the '+' operator119120The condition checkers are boolean (true/false) checkers. Machine code is generated121for the checker itself and for the actions depending on the result of the checker.122The 'true' case is called as the matching path (expected path), and the other is called as123the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken124branches on the matching path.125126Greedy star operator (*) :127Matching path: match happens.128Backtrack path: match failed.129Non-greedy star operator (*?) :130Matching path: no need to perform a match.131Backtrack path: match is required.132133The following example shows how the code generated for a capturing bracket134with two alternatives. Let A, B, C, D are arbirary regular expressions, and135we have the following regular expression:136137A(B|C)D138139The generated code will be the following:140141A matching path142'(' matching path (pushing arguments to the stack)143B matching path144')' matching path (pushing arguments to the stack)145D matching path146return with successful match147148D backtrack path149')' backtrack path (If we arrived from "C" jump to the backtrack of "C")150B backtrack path151C expected path152jump to D matching path153C backtrack path154A backtrack path155156Notice, that the order of backtrack code paths are the opposite of the fast157code paths. In this way the topmost value on the stack is always belong158to the current backtrack code path. The backtrack path must check159whether there is a next alternative. If so, it needs to jump back to160the matching path eventually. Otherwise it needs to clear out its own stack161frame and continue the execution on the backtrack code paths.162*/163164/*165Saved stack frames:166167Atomic blocks and asserts require reloading the values of private data168when the backtrack mechanism performed. Because of OP_RECURSE, the data169are not necessarly known in compile time, thus we need a dynamic restore170mechanism.171172The stack frames are stored in a chain list, and have the following format:173([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]174175Thus we can restore the private data to a particular point in the stack.176*/177178typedef struct jit_arguments {179/* Pointers first. */180struct sljit_stack *stack;181PCRE2_SPTR str;182PCRE2_SPTR begin;183PCRE2_SPTR end;184pcre2_match_data *match_data;185PCRE2_SPTR startchar_ptr;186PCRE2_UCHAR *mark_ptr;187int (*callout)(pcre2_callout_block *, void *);188void *callout_data;189/* Everything else after. */190sljit_uw offset_limit;191sljit_u32 limit_match;192sljit_u32 oveccount;193sljit_u32 options;194} jit_arguments;195196#define JIT_NUMBER_OF_COMPILE_MODES 3197198typedef struct executable_functions {199void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];200void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];201sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];202sljit_u32 top_bracket;203sljit_u32 limit_match;204} executable_functions;205206typedef struct jump_list {207struct sljit_jump *jump;208struct jump_list *next;209} jump_list;210211typedef struct stub_list {212struct sljit_jump *start;213struct sljit_label *quit;214struct stub_list *next;215} stub_list;216217enum frame_types {218no_frame = -1,219no_stack = -2220};221222enum control_types {223type_mark = 0,224type_then_trap = 1225};226227enum early_fail_types {228type_skip = 0,229type_fail = 1,230type_fail_range = 2231};232233typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);234235/* The following structure is the key data type for the recursive236code generator. It is allocated by compile_matchingpath, and contains237the arguments for compile_backtrackingpath. Must be the first member238of its descendants. */239typedef struct backtrack_common {240/* Backtracking path of an opcode, which falls back241to our opcode, if it cannot resume matching. */242struct backtrack_common *prev;243/* Backtracks for opcodes without backtracking path.244These opcodes are between 'prev' and the current245opcode, and they never resume the match. */246jump_list *simple_backtracks;247/* Internal backtracking list for block constructs248which contains other opcodes, such as brackets,249asserts, conditionals, etc. */250struct backtrack_common *top;251/* Backtracks used internally by the opcode. For component252opcodes, this list is also used by those opcodes without253backtracking path which follows the 'top' backtrack. */254jump_list *own_backtracks;255/* Opcode pointer. */256PCRE2_SPTR cc;257} backtrack_common;258259typedef struct assert_backtrack {260backtrack_common common;261jump_list *condfailed;262/* Less than 0 if a frame is not needed. */263int framesize;264/* Points to our private memory word on the stack. */265int private_data_ptr;266/* For iterators. */267struct sljit_label *matchingpath;268} assert_backtrack;269270typedef struct bracket_backtrack {271backtrack_common common;272/* Where to coninue if an alternative is successfully matched. */273struct sljit_label *alternative_matchingpath;274/* For rmin and rmax iterators. */275struct sljit_label *recursive_matchingpath;276/* For greedy ? operator. */277struct sljit_label *zero_matchingpath;278/* Contains the branches of a failed condition. */279union {280/* Both for OP_COND, OP_SCOND, OP_ASSERT_SCS. */281jump_list *no_capture;282assert_backtrack *assert;283/* For OP_ONCE. Less than 0 if not needed. */284int framesize;285} u;286/* For brackets with >3 alternatives. */287struct sljit_jump *matching_mov_addr;288/* Points to our private memory word on the stack. */289int private_data_ptr;290} bracket_backtrack;291292typedef struct bracketpos_backtrack {293backtrack_common common;294/* Points to our private memory word on the stack. */295int private_data_ptr;296/* Reverting stack is needed. */297int framesize;298/* Allocated stack size. */299int stacksize;300} bracketpos_backtrack;301302typedef struct braminzero_backtrack {303backtrack_common common;304struct sljit_label *matchingpath;305} braminzero_backtrack;306307typedef struct char_iterator_backtrack {308backtrack_common common;309/* Next iteration. */310struct sljit_label *matchingpath;311/* Creating a range based on the next character. */312struct {313unsigned int othercasebit;314PCRE2_UCHAR chr;315BOOL charpos_enabled;316} charpos;317} char_iterator_backtrack;318319typedef struct ref_iterator_backtrack {320backtrack_common common;321/* Next iteration. */322struct sljit_label *matchingpath;323} ref_iterator_backtrack;324325typedef struct recurse_entry {326struct recurse_entry *next;327/* Contains the function entry label. */328struct sljit_label *entry_label;329/* Contains the function entry label. */330struct sljit_label *backtrack_label;331/* Collects the entry calls until the function is not created. */332jump_list *entry_calls;333/* Collects the backtrack calls until the function is not created. */334jump_list *backtrack_calls;335/* Points to the starting opcode. */336sljit_sw start;337/* Start of caller arguments. */338PCRE2_SPTR arg_start;339/* Size of caller arguments in bytes. */340sljit_uw arg_size;341} recurse_entry;342343typedef struct recurse_backtrack {344backtrack_common common;345/* Return to the matching path. */346struct sljit_label *matchingpath;347/* Recursive pattern. */348recurse_entry *entry;349/* Pattern is inlined. */350BOOL inlined_pattern;351} recurse_backtrack;352353typedef struct vreverse_backtrack {354backtrack_common common;355/* Return to the matching path. */356struct sljit_label *matchingpath;357} vreverse_backtrack;358359#define OP_THEN_TRAP OP_TABLE_LENGTH360361typedef struct then_trap_backtrack {362backtrack_common common;363/* If then_trap is not NULL, this structure contains the real364then_trap for the backtracking path. */365struct then_trap_backtrack *then_trap;366/* Points to the starting opcode. */367sljit_sw start;368/* Exit point for the then opcodes of this alternative. */369jump_list *quit;370/* Frame size of the current alternative. */371int framesize;372} then_trap_backtrack;373374#define MAX_N_CHARS 12375#define MAX_DIFF_CHARS 5376377typedef struct fast_forward_char_data {378/* Number of characters in the chars array, 255 for any character. */379sljit_u8 count;380/* Number of last UTF-8 characters in the chars array. */381sljit_u8 last_count;382/* Available characters in the current position. */383PCRE2_UCHAR chars[MAX_DIFF_CHARS];384} fast_forward_char_data;385386#define MAX_CLASS_RANGE_SIZE 4387#define MAX_CLASS_CHARS_SIZE 3388389typedef struct compiler_common {390/* The sljit ceneric compiler. */391struct sljit_compiler *compiler;392/* Compiled regular expression. */393pcre2_real_code *re;394/* First byte code. */395PCRE2_SPTR start;396/* Maps private data offset to each opcode. */397sljit_s32 *private_data_ptrs;398/* Chain list of read-only data ptrs. */399void *read_only_data_head;400/* Bitset which tells which capture brackets can be optimized. */401sljit_u8 *optimized_cbrackets;402/* Bitset for tracking capture bracket status. */403sljit_u8 *cbracket_bitset;404/* Tells whether the starting offset is a target of then. */405sljit_u8 *then_offsets;406/* Current position where a THEN must jump. */407then_trap_backtrack *then_trap;408/* Starting offset of private data for capturing brackets. */409sljit_s32 cbra_ptr;410#if defined SLJIT_DEBUG && SLJIT_DEBUG411/* End offset of locals for assertions. */412sljit_s32 locals_size;413#endif414/* Output vector starting point. Must be divisible by 2. */415sljit_s32 ovector_start;416/* Points to the starting character of the current match. */417sljit_s32 start_ptr;418/* Last known position of the requested byte. */419sljit_s32 req_char_ptr;420/* Head of the last recursion. */421sljit_s32 recursive_head_ptr;422/* First inspected character for partial matching.423(Needed for avoiding zero length partial matches.) */424sljit_s32 start_used_ptr;425/* Starting pointer for partial soft matches. */426sljit_s32 hit_start;427/* Pointer of the match end position. */428sljit_s32 match_end_ptr;429/* Points to the marked string. */430sljit_s32 mark_ptr;431/* Head of the recursive control verb management chain.432Each item must have a previous offset and type433(see control_types) values. See do_search_mark. */434sljit_s32 control_head_ptr;435/* The offset of the saved STR_END in the outermost436scan substring block. Since scan substring restores437STR_END after a match, it is enough to restore438STR_END inside a scan substring block. */439sljit_s32 restore_end_ptr;440/* Points to the last matched capture block index. */441sljit_s32 capture_last_ptr;442/* Fast forward skipping byte code pointer. */443PCRE2_SPTR fast_forward_bc_ptr;444/* Locals used by fast fail optimization. */445sljit_s32 early_fail_start_ptr;446sljit_s32 early_fail_end_ptr;447/* Byte length of optimized_cbrackets and cbracket_bitset. */448sljit_u32 cbracket_bitset_length;449/* Variables used by recursive call generator. */450sljit_s32 recurse_bitset_size;451uint8_t *recurse_bitset;452453/* Flipped and lower case tables. */454const sljit_u8 *fcc;455sljit_sw lcc;456/* Mode can be PCRE2_JIT_COMPLETE and others. */457int mode;458/* TRUE, when empty match is accepted for partial matching. */459BOOL allow_empty_partial;460/* TRUE, when minlength is greater than 0. */461BOOL might_be_empty;462/* \K is found in the pattern. */463BOOL has_set_som;464/* (*SKIP:arg) is found in the pattern. */465BOOL has_skip_arg;466/* (*THEN) is found in the pattern. */467BOOL has_then;468/* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */469BOOL has_skip_in_assert_back;470/* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */471BOOL local_quit_available;472/* Currently in a positive assertion. */473BOOL in_positive_assertion;474/* Newline control. */475int nltype;476sljit_u32 nlmax;477sljit_u32 nlmin;478int newline;479int bsr_nltype;480sljit_u32 bsr_nlmax;481sljit_u32 bsr_nlmin;482/* Dollar endonly. */483int endonly;484/* Tables. */485sljit_sw ctypes;486/* Named capturing brackets. */487PCRE2_SPTR name_table;488sljit_sw name_count;489sljit_sw name_entry_size;490491/* Labels and jump lists. */492struct sljit_label *partialmatchlabel;493struct sljit_label *quit_label;494struct sljit_label *abort_label;495struct sljit_label *accept_label;496struct sljit_label *ff_newline_shortcut;497stub_list *stubs;498recurse_entry *entries;499recurse_entry *currententry;500jump_list *partialmatch;501jump_list *quit;502jump_list *positive_assertion_quit;503jump_list *abort;504jump_list *failed_match;505jump_list *accept;506jump_list *calllimit;507jump_list *stackalloc;508jump_list *revertframes;509jump_list *wordboundary;510jump_list *ucp_wordboundary;511jump_list *anynewline;512jump_list *hspace;513jump_list *vspace;514jump_list *casefulcmp;515jump_list *caselesscmp;516jump_list *reset_match;517/* Same as reset_match, but resets the STR_PTR as well. */518jump_list *restart_match;519BOOL unset_backref;520BOOL alt_circumflex;521#ifdef SUPPORT_UNICODE522BOOL utf;523BOOL invalid_utf;524BOOL ucp;525/* Points to saving area for iref. */526jump_list *getucd;527jump_list *getucdtype;528#if PCRE2_CODE_UNIT_WIDTH == 8529jump_list *utfreadchar;530jump_list *utfreadtype8;531jump_list *utfpeakcharback;532#endif533#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16534jump_list *utfreadchar_invalid;535jump_list *utfreadnewline_invalid;536jump_list *utfmoveback_invalid;537jump_list *utfpeakcharback_invalid;538#endif539#endif /* SUPPORT_UNICODE */540} compiler_common;541542/* For byte_sequence_compare. */543544typedef struct compare_context {545int length;546int sourcereg;547#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED548int ucharptr;549union {550sljit_s32 asint;551sljit_u16 asushort;552#if PCRE2_CODE_UNIT_WIDTH == 8553sljit_u8 asbyte;554sljit_u8 asuchars[4];555#elif PCRE2_CODE_UNIT_WIDTH == 16556sljit_u16 asuchars[2];557#elif PCRE2_CODE_UNIT_WIDTH == 32558sljit_u32 asuchars[1];559#endif560} c;561union {562sljit_s32 asint;563sljit_u16 asushort;564#if PCRE2_CODE_UNIT_WIDTH == 8565sljit_u8 asbyte;566sljit_u8 asuchars[4];567#elif PCRE2_CODE_UNIT_WIDTH == 16568sljit_u16 asuchars[2];569#elif PCRE2_CODE_UNIT_WIDTH == 32570sljit_u32 asuchars[1];571#endif572} oc;573#endif574} compare_context;575576/* Undefine sljit macros. */577#undef CMP578579/* Used for accessing the elements of the stack. */580#define STACK(i) ((i) * SSIZE_OF(sw))581582#ifdef SLJIT_PREF_SHIFT_REG583#if SLJIT_PREF_SHIFT_REG == SLJIT_R2584/* Nothing. */585#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3586#define SHIFT_REG_IS_R3587#else588#error "Unsupported shift register"589#endif590#endif591592#define TMP1 SLJIT_R0593#ifdef SHIFT_REG_IS_R3594#define TMP2 SLJIT_R3595#define TMP3 SLJIT_R2596#else597#define TMP2 SLJIT_R2598#define TMP3 SLJIT_R3599#endif600#define STR_PTR SLJIT_R1601#define STR_END SLJIT_S0602#define STACK_TOP SLJIT_S1603#define STACK_LIMIT SLJIT_S2604#define COUNT_MATCH SLJIT_S3605#define ARGUMENTS SLJIT_S4606#define RETURN_ADDR SLJIT_R4607608#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)609#define HAS_VIRTUAL_REGISTERS 1610#else611#define HAS_VIRTUAL_REGISTERS 0612#endif613614/* Local space layout. */615/* Max limit of recursions. */616#define LIMIT_MATCH (0 * sizeof(sljit_sw))617/* Local variables. Their number is computed by check_opcode_types. */618#define LOCAL0 (1 * sizeof(sljit_sw))619#define LOCAL1 (2 * sizeof(sljit_sw))620#define LOCAL2 (3 * sizeof(sljit_sw))621#define LOCAL3 (4 * sizeof(sljit_sw))622#define LOCAL4 (5 * sizeof(sljit_sw))623/* The output vector is stored on the stack, and contains pointers624to characters. The vector data is divided into two groups: the first625group contains the start / end character pointers, and the second is626the start pointers when the end of the capturing group has not yet reached. */627#define OVECTOR_START (common->ovector_start)628#define OVECTOR(i) (OVECTOR_START + (i) * SSIZE_OF(sw))629#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * SSIZE_OF(sw))630#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])631632#if PCRE2_CODE_UNIT_WIDTH == 8633#define MOV_UCHAR SLJIT_MOV_U8634#define IN_UCHARS(x) (x)635#elif PCRE2_CODE_UNIT_WIDTH == 16636#define MOV_UCHAR SLJIT_MOV_U16637#define UCHAR_SHIFT (1)638#define IN_UCHARS(x) ((x) * 2)639#elif PCRE2_CODE_UNIT_WIDTH == 32640#define MOV_UCHAR SLJIT_MOV_U32641#define UCHAR_SHIFT (2)642#define IN_UCHARS(x) ((x) * 4)643#else644#error Unsupported compiling mode645#endif646647/* Shortcuts. */648#define DEFINE_COMPILER \649struct sljit_compiler *compiler = common->compiler650#define OP1(op, dst, dstw, src, srcw) \651sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))652#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \653sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))654#define OP2U(op, src1, src1w, src2, src2w) \655sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))656#define OP_SRC(op, src, srcw) \657sljit_emit_op_src(compiler, (op), (src), (srcw))658#define LABEL() \659sljit_emit_label(compiler)660#define JUMP(type) \661sljit_emit_jump(compiler, (type))662#define JUMPTO(type, label) \663sljit_set_label(sljit_emit_jump(compiler, (type)), (label))664#define JUMPHERE(jump) \665sljit_set_label((jump), sljit_emit_label(compiler))666#define SET_LABEL(jump, label) \667sljit_set_label((jump), (label))668#define CMP(type, src1, src1w, src2, src2w) \669sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))670#define CMPTO(type, src1, src1w, src2, src2w, label) \671sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))672#define OP_FLAGS(op, dst, dstw, type) \673sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))674#define SELECT(type, dst_reg, src1, src1w, src2_reg) \675sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg))676#define GET_LOCAL_BASE(dst, dstw, offset) \677sljit_get_local_base(compiler, (dst), (dstw), (offset))678679#define READ_CHAR_MAX ((sljit_u32)0xffffffff)680681#define INVALID_UTF_CHAR -1682#define UNASSIGNED_UTF_CHAR 888683684#if defined SUPPORT_UNICODE685#if PCRE2_CODE_UNIT_WIDTH == 8686687#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \688{ \689if (ptr[0] <= 0x7f) \690c = *ptr++; \691else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \692{ \693c = ptr[1] - 0x80; \694\695if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \696{ \697c |= (ptr[0] - 0xc0) << 6; \698ptr += 2; \699} \700else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \701{ \702c = c << 6 | (ptr[2] - 0x80); \703\704if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \705{ \706c |= (ptr[0] - 0xe0) << 12; \707ptr += 3; \708\709if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \710{ \711invalid_action; \712} \713} \714else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \715{ \716c = c << 6 | (ptr[3] - 0x80); \717\718if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \719{ \720c |= (ptr[0] - 0xf0) << 18; \721ptr += 4; \722\723if (c >= 0x110000 || c < 0x10000) \724{ \725invalid_action; \726} \727} \728else \729{ \730invalid_action; \731} \732} \733else \734{ \735invalid_action; \736} \737} \738else \739{ \740invalid_action; \741} \742} \743else \744{ \745invalid_action; \746} \747}748749#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \750{ \751c = ptr[-1]; \752if (c <= 0x7f) \753ptr--; \754else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \755{ \756c -= 0x80; \757\758if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \759{ \760c |= (ptr[-2] - 0xc0) << 6; \761ptr -= 2; \762} \763else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \764{ \765c = c << 6 | (ptr[-2] - 0x80); \766\767if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \768{ \769c |= (ptr[-3] - 0xe0) << 12; \770ptr -= 3; \771\772if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \773{ \774invalid_action; \775} \776} \777else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \778{ \779c = c << 6 | (ptr[-3] - 0x80); \780\781if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \782{ \783c |= (ptr[-4] - 0xf0) << 18; \784ptr -= 4; \785\786if (c >= 0x110000 || c < 0x10000) \787{ \788invalid_action; \789} \790} \791else \792{ \793invalid_action; \794} \795} \796else \797{ \798invalid_action; \799} \800} \801else \802{ \803invalid_action; \804} \805} \806else \807{ \808invalid_action; \809} \810}811812#elif PCRE2_CODE_UNIT_WIDTH == 16813814#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \815{ \816if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \817c = *ptr++; \818else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \819{ \820c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \821ptr += 2; \822} \823else \824{ \825invalid_action; \826} \827}828829#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \830{ \831c = ptr[-1]; \832if (c < 0xd800 || c >= 0xe000) \833ptr--; \834else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \835{ \836c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \837ptr -= 2; \838} \839else \840{ \841invalid_action; \842} \843}844845846#elif PCRE2_CODE_UNIT_WIDTH == 32847848#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \849{ \850if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \851c = *ptr++; \852else \853{ \854invalid_action; \855} \856}857858#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \859{ \860c = ptr[-1]; \861if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \862ptr--; \863else \864{ \865invalid_action; \866} \867}868869#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */870#endif /* SUPPORT_UNICODE */871872static PCRE2_SPTR bracketend(PCRE2_SPTR cc)873{874SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND));875do cc += GET(cc, 1); while (*cc == OP_ALT);876SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);877cc += 1 + LINK_SIZE;878return cc;879}880881static int no_alternatives(PCRE2_SPTR cc)882{883int count = 0;884SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERT_SCS) || (*cc >= OP_ONCE && *cc <= OP_SCOND));885do886{887cc += GET(cc, 1);888count++;889}890while (*cc == OP_ALT);891SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);892return count;893}894895static BOOL find_vreverse(PCRE2_SPTR cc)896{897SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT || *cc == OP_ASSERTBACK_NA);898899do900{901if (cc[1 + LINK_SIZE] == OP_VREVERSE)902return TRUE;903cc += GET(cc, 1);904}905while (*cc == OP_ALT);906907return FALSE;908}909910/* Functions whose might need modification for all new supported opcodes:911next_opcode912check_opcode_types913set_private_data_ptrs914get_framesize915init_frame916get_recurse_data_length917copy_recurse_data918compile_matchingpath919compile_backtrackingpath920*/921922static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)923{924SLJIT_UNUSED_ARG(common);925switch(*cc)926{927case OP_SOD:928case OP_SOM:929case OP_SET_SOM:930case OP_NOT_WORD_BOUNDARY:931case OP_WORD_BOUNDARY:932case OP_NOT_DIGIT:933case OP_DIGIT:934case OP_NOT_WHITESPACE:935case OP_WHITESPACE:936case OP_NOT_WORDCHAR:937case OP_WORDCHAR:938case OP_ANY:939case OP_ALLANY:940case OP_NOTPROP:941case OP_PROP:942case OP_ANYNL:943case OP_NOT_HSPACE:944case OP_HSPACE:945case OP_NOT_VSPACE:946case OP_VSPACE:947case OP_EXTUNI:948case OP_EODN:949case OP_EOD:950case OP_CIRC:951case OP_CIRCM:952case OP_DOLL:953case OP_DOLLM:954case OP_CRSTAR:955case OP_CRMINSTAR:956case OP_CRPLUS:957case OP_CRMINPLUS:958case OP_CRQUERY:959case OP_CRMINQUERY:960case OP_CRRANGE:961case OP_CRMINRANGE:962case OP_CRPOSSTAR:963case OP_CRPOSPLUS:964case OP_CRPOSQUERY:965case OP_CRPOSRANGE:966case OP_CLASS:967case OP_NCLASS:968case OP_REF:969case OP_REFI:970case OP_DNREF:971case OP_DNREFI:972case OP_RECURSE:973case OP_CALLOUT:974case OP_ALT:975case OP_KET:976case OP_KETRMAX:977case OP_KETRMIN:978case OP_KETRPOS:979case OP_REVERSE:980case OP_VREVERSE:981case OP_ASSERT:982case OP_ASSERT_NOT:983case OP_ASSERTBACK:984case OP_ASSERTBACK_NOT:985case OP_ASSERT_NA:986case OP_ASSERTBACK_NA:987case OP_ASSERT_SCS:988case OP_ONCE:989case OP_SCRIPT_RUN:990case OP_BRA:991case OP_BRAPOS:992case OP_CBRA:993case OP_CBRAPOS:994case OP_COND:995case OP_SBRA:996case OP_SBRAPOS:997case OP_SCBRA:998case OP_SCBRAPOS:999case OP_SCOND:1000case OP_CREF:1001case OP_DNCREF:1002case OP_RREF:1003case OP_DNRREF:1004case OP_FALSE:1005case OP_TRUE:1006case OP_BRAZERO:1007case OP_BRAMINZERO:1008case OP_BRAPOSZERO:1009case OP_PRUNE:1010case OP_SKIP:1011case OP_THEN:1012case OP_COMMIT:1013case OP_FAIL:1014case OP_ACCEPT:1015case OP_ASSERT_ACCEPT:1016case OP_CLOSE:1017case OP_SKIPZERO:1018case OP_NOT_UCP_WORD_BOUNDARY:1019case OP_UCP_WORD_BOUNDARY:1020return cc + PRIV(OP_lengths)[*cc];10211022case OP_CHAR:1023case OP_CHARI:1024case OP_NOT:1025case OP_NOTI:1026case OP_STAR:1027case OP_MINSTAR:1028case OP_PLUS:1029case OP_MINPLUS:1030case OP_QUERY:1031case OP_MINQUERY:1032case OP_UPTO:1033case OP_MINUPTO:1034case OP_EXACT:1035case OP_POSSTAR:1036case OP_POSPLUS:1037case OP_POSQUERY:1038case OP_POSUPTO:1039case OP_STARI:1040case OP_MINSTARI:1041case OP_PLUSI:1042case OP_MINPLUSI:1043case OP_QUERYI:1044case OP_MINQUERYI:1045case OP_UPTOI:1046case OP_MINUPTOI:1047case OP_EXACTI:1048case OP_POSSTARI:1049case OP_POSPLUSI:1050case OP_POSQUERYI:1051case OP_POSUPTOI:1052case OP_NOTSTAR:1053case OP_NOTMINSTAR:1054case OP_NOTPLUS:1055case OP_NOTMINPLUS:1056case OP_NOTQUERY:1057case OP_NOTMINQUERY:1058case OP_NOTUPTO:1059case OP_NOTMINUPTO:1060case OP_NOTEXACT:1061case OP_NOTPOSSTAR:1062case OP_NOTPOSPLUS:1063case OP_NOTPOSQUERY:1064case OP_NOTPOSUPTO:1065case OP_NOTSTARI:1066case OP_NOTMINSTARI:1067case OP_NOTPLUSI:1068case OP_NOTMINPLUSI:1069case OP_NOTQUERYI:1070case OP_NOTMINQUERYI:1071case OP_NOTUPTOI:1072case OP_NOTMINUPTOI:1073case OP_NOTEXACTI:1074case OP_NOTPOSSTARI:1075case OP_NOTPOSPLUSI:1076case OP_NOTPOSQUERYI:1077case OP_NOTPOSUPTOI:1078cc += PRIV(OP_lengths)[*cc];1079#ifdef SUPPORT_UNICODE1080if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);1081#endif1082return cc;10831084/* Special cases. */1085case OP_TYPESTAR:1086case OP_TYPEMINSTAR:1087case OP_TYPEPLUS:1088case OP_TYPEMINPLUS:1089case OP_TYPEQUERY:1090case OP_TYPEMINQUERY:1091case OP_TYPEUPTO:1092case OP_TYPEMINUPTO:1093case OP_TYPEEXACT:1094case OP_TYPEPOSSTAR:1095case OP_TYPEPOSPLUS:1096case OP_TYPEPOSQUERY:1097case OP_TYPEPOSUPTO:1098return cc + PRIV(OP_lengths)[*cc] - 1;10991100case OP_ANYBYTE:1101#ifdef SUPPORT_UNICODE1102if (common->utf) return NULL;1103#endif1104return cc + 1;11051106case OP_CALLOUT_STR:1107return cc + GET(cc, 1 + 2*LINK_SIZE);11081109#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 81110case OP_ECLASS:1111case OP_XCLASS:1112SLJIT_COMPILE_ASSERT(OP_XCLASS + 1 == OP_ECLASS && OP_CLASS + 1 == OP_NCLASS && OP_NCLASS < OP_XCLASS, class_byte_code_order);1113return cc + GET(cc, 1);1114#endif11151116case OP_MARK:1117case OP_COMMIT_ARG:1118case OP_PRUNE_ARG:1119case OP_SKIP_ARG:1120case OP_THEN_ARG:1121return cc + 1 + 2 + cc[1];11221123default:1124SLJIT_UNREACHABLE();1125return NULL;1126}1127}11281129static sljit_s32 ref_update_local_size(compiler_common *common, PCRE2_SPTR cc, sljit_s32 current_locals_size)1130{1131/* Depends on do_casefulcmp(), do_caselesscmp(), and compile_ref_matchingpath() */1132int locals_size = 2 * SSIZE_OF(sw);1133SLJIT_UNUSED_ARG(common);11341135#ifdef SUPPORT_UNICODE1136if ((*cc == OP_REFI || *cc == OP_DNREFI) && (common->utf || common->ucp))1137locals_size = 3 * SSIZE_OF(sw);1138#endif11391140cc += PRIV(OP_lengths)[*cc];1141/* Although do_casefulcmp() uses only one local, the allocate_stack()1142calls during the repeat destroys LOCAL1 variables. */1143if (*cc >= OP_CRSTAR && *cc <= OP_CRPOSRANGE)1144locals_size += 2 * SSIZE_OF(sw);11451146return (current_locals_size >= locals_size) ? current_locals_size : locals_size;1147}11481149static SLJIT_INLINE BOOL is_optimized_cbracket(compiler_common *common, sljit_s32 capture_index)1150{1151sljit_u8 bit = (sljit_u8)(1 << (capture_index & 0x7));1152return (common->optimized_cbrackets[capture_index >> 3] & bit) != 0;1153}11541155static SLJIT_INLINE void clear_optimized_cbracket(compiler_common *common, sljit_s32 capture_index)1156{1157sljit_u8 mask = (sljit_u8)~(1 << (capture_index & 0x7));1158common->optimized_cbrackets[capture_index >> 3] &= mask;1159}11601161static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)1162{1163int count;1164PCRE2_SPTR slot;1165PCRE2_SPTR assert_back_end = cc - 1;1166PCRE2_SPTR assert_na_end = cc - 1;1167sljit_s32 locals_size = 2 * SSIZE_OF(sw);1168BOOL set_recursive_head = FALSE;1169BOOL set_capture_last = FALSE;1170BOOL set_mark = FALSE;11711172/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */1173while (cc < ccend)1174{1175switch(*cc)1176{1177case OP_SET_SOM:1178common->has_set_som = TRUE;1179common->might_be_empty = TRUE;1180cc += 1;1181break;11821183case OP_TYPEUPTO:1184case OP_TYPEEXACT:1185if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))1186locals_size = 3 * SSIZE_OF(sw);1187cc += (2 + IMM2_SIZE) - 1;1188break;11891190case OP_TYPEPOSSTAR:1191case OP_TYPEPOSPLUS:1192case OP_TYPEPOSQUERY:1193if (cc[1] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))1194locals_size = 3 * SSIZE_OF(sw);1195cc += 2 - 1;1196break;11971198case OP_TYPEPOSUPTO:1199#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 321200if (common->utf && locals_size <= 3 * SSIZE_OF(sw))1201locals_size = 3 * SSIZE_OF(sw);1202#endif1203if (cc[1 + IMM2_SIZE] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF(sw))1204locals_size = 3 * SSIZE_OF(sw);1205cc += (2 + IMM2_SIZE) - 1;1206break;12071208case OP_REFI:1209case OP_REF:1210locals_size = ref_update_local_size(common, cc, locals_size);1211clear_optimized_cbracket(common, GET2(cc, 1));1212cc += PRIV(OP_lengths)[*cc];1213break;12141215case OP_ASSERT_NA:1216case OP_ASSERTBACK_NA:1217case OP_ASSERT_SCS:1218slot = bracketend(cc);1219if (slot > assert_na_end)1220assert_na_end = slot;1221cc += 1 + LINK_SIZE;1222break;12231224case OP_CBRAPOS:1225case OP_SCBRAPOS:1226clear_optimized_cbracket(common, GET2(cc, 1 + LINK_SIZE));1227cc += 1 + LINK_SIZE + IMM2_SIZE;1228break;12291230case OP_COND:1231case OP_SCOND:1232/* Only AUTO_CALLOUT can insert this opcode. We do1233not intend to support this case. */1234if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)1235return FALSE;1236cc += 1 + LINK_SIZE;1237break;12381239case OP_CREF:1240clear_optimized_cbracket(common, GET2(cc, 1));1241cc += 1 + IMM2_SIZE;1242break;12431244case OP_DNREFI:1245case OP_DNREF:1246locals_size = ref_update_local_size(common, cc, locals_size);1247PCRE2_FALLTHROUGH /* Fall through */1248case OP_DNCREF:1249count = GET2(cc, 1 + IMM2_SIZE);1250slot = common->name_table + GET2(cc, 1) * common->name_entry_size;1251while (count-- > 0)1252{1253clear_optimized_cbracket(common, GET2(slot, 0));1254slot += common->name_entry_size;1255}1256cc += PRIV(OP_lengths)[*cc];1257break;12581259case OP_RECURSE:1260/* Set its value only once. */1261set_recursive_head = TRUE;1262cc += 1 + LINK_SIZE;1263while (*cc == OP_CREF)1264{1265clear_optimized_cbracket(common, GET2(cc, 1));1266cc += 1 + IMM2_SIZE;1267}1268break;12691270case OP_CALLOUT:1271case OP_CALLOUT_STR:1272set_capture_last = TRUE;1273cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);1274break;12751276case OP_ASSERTBACK:1277slot = bracketend(cc);1278if (slot > assert_back_end)1279assert_back_end = slot;1280cc += 1 + LINK_SIZE;1281break;12821283case OP_THEN_ARG:1284common->has_then = TRUE;1285common->control_head_ptr = 1;1286PCRE2_FALLTHROUGH /* Fall through */12871288case OP_COMMIT_ARG:1289case OP_PRUNE_ARG:1290case OP_MARK:1291set_mark = TRUE;1292cc += 1 + 2 + cc[1];1293break;12941295case OP_THEN:1296common->has_then = TRUE;1297common->control_head_ptr = 1;1298cc += 1;1299break;13001301case OP_SKIP:1302if (cc < assert_back_end)1303common->has_skip_in_assert_back = TRUE;1304cc += 1;1305break;13061307case OP_SKIP_ARG:1308common->control_head_ptr = 1;1309common->has_skip_arg = TRUE;1310if (cc < assert_back_end)1311common->has_skip_in_assert_back = TRUE;1312cc += 1 + 2 + cc[1];1313break;13141315case OP_ASSERT_ACCEPT:1316if (cc < assert_na_end)1317return FALSE;1318cc++;1319break;13201321#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 321322case OP_CRPOSRANGE:1323/* The second value can be 0 for infinite repeats. */1324if (common->utf && GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE) && locals_size <= 3 * SSIZE_OF(sw))1325locals_size = 3 * SSIZE_OF(sw);1326cc += 1 + 2 * IMM2_SIZE;1327break;13281329case OP_POSUPTO:1330case OP_POSUPTOI:1331case OP_NOTPOSUPTO:1332case OP_NOTPOSUPTOI:1333if (common->utf && locals_size <= 3 * SSIZE_OF(sw))1334locals_size = 3 * SSIZE_OF(sw);1335#endif1336PCRE2_FALLTHROUGH /* Fall through */1337default:1338cc = next_opcode(common, cc);1339if (cc == NULL)1340return FALSE;1341break;1342}1343}13441345SLJIT_ASSERT((locals_size & (SSIZE_OF(sw) - 1)) == 0);1346#if defined SLJIT_DEBUG && SLJIT_DEBUG1347common->locals_size = locals_size;1348#endif13491350if (locals_size > 0)1351common->ovector_start += locals_size;13521353if (set_mark)1354{1355SLJIT_ASSERT(common->mark_ptr == 0);1356common->mark_ptr = common->ovector_start;1357common->ovector_start += sizeof(sljit_sw);1358}13591360if (set_recursive_head)1361{1362SLJIT_ASSERT(common->recursive_head_ptr == 0);1363common->recursive_head_ptr = common->ovector_start;1364common->ovector_start += sizeof(sljit_sw);1365}13661367if (set_capture_last)1368{1369SLJIT_ASSERT(common->capture_last_ptr == 0);1370common->capture_last_ptr = common->ovector_start;1371common->ovector_start += sizeof(sljit_sw);1372}13731374return TRUE;1375}13761377#define EARLY_FAIL_ENHANCE_MAX (3 + 3)13781379/*1380Start represent the number of allowed early fail enhancements13811382The 0-2 values has a special meaning:13830 - skip is allowed for all iterators13841 - fail is allowed for all iterators13852 - fail is allowed for greedy iterators13863 - only ranged early fail is allowed1387>3 - (start - 3) number of remaining ranged early fails allowed13881389return: the updated value of start1390*/1391static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc,1392int *private_data_start, sljit_s32 depth, int start)1393{1394PCRE2_SPTR begin = cc;1395PCRE2_SPTR next_alt;1396PCRE2_SPTR end;1397PCRE2_SPTR accelerated_start;1398int result = 0;1399int count, prev_count;14001401SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);1402SLJIT_ASSERT(*cc != OP_CBRA || is_optimized_cbracket(common, GET2(cc, 1 + LINK_SIZE)));1403SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);14041405next_alt = cc + GET(cc, 1);1406if (*next_alt == OP_ALT && start < 1)1407start = 1;14081409do1410{1411count = start;1412cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);14131414while (TRUE)1415{1416accelerated_start = NULL;14171418switch(*cc)1419{1420case OP_SOD:1421case OP_SOM:1422case OP_SET_SOM:1423case OP_NOT_WORD_BOUNDARY:1424case OP_WORD_BOUNDARY:1425case OP_EODN:1426case OP_EOD:1427case OP_CIRC:1428case OP_CIRCM:1429case OP_DOLL:1430case OP_DOLLM:1431case OP_NOT_UCP_WORD_BOUNDARY:1432case OP_UCP_WORD_BOUNDARY:1433/* Zero width assertions. */1434cc++;1435continue;14361437case OP_NOT_DIGIT:1438case OP_DIGIT:1439case OP_NOT_WHITESPACE:1440case OP_WHITESPACE:1441case OP_NOT_WORDCHAR:1442case OP_WORDCHAR:1443case OP_ANY:1444case OP_ALLANY:1445case OP_ANYBYTE:1446case OP_NOT_HSPACE:1447case OP_HSPACE:1448case OP_NOT_VSPACE:1449case OP_VSPACE:1450if (count < 1)1451count = 1;1452cc++;1453continue;14541455case OP_ANYNL:1456case OP_EXTUNI:1457if (count < 3)1458count = 3;1459cc++;1460continue;14611462case OP_NOTPROP:1463case OP_PROP:1464if (count < 1)1465count = 1;1466cc += 1 + 2;1467continue;14681469case OP_CHAR:1470case OP_CHARI:1471case OP_NOT:1472case OP_NOTI:1473if (count < 1)1474count = 1;1475cc += 2;1476#ifdef SUPPORT_UNICODE1477if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);1478#endif1479continue;14801481case OP_TYPEMINSTAR:1482case OP_TYPEMINPLUS:1483if (count == 2)1484count = 3;1485PCRE2_FALLTHROUGH /* Fall through */14861487case OP_TYPESTAR:1488case OP_TYPEPLUS:1489case OP_TYPEPOSSTAR:1490case OP_TYPEPOSPLUS:1491/* The type or prop opcode is skipped in the next iteration. */1492cc += 1;14931494if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)1495{1496accelerated_start = cc - 1;1497break;1498}14991500if (count < 3)1501count = 3;1502continue;15031504case OP_TYPEEXACT:1505if (count < 1)1506count = 1;1507cc += 1 + IMM2_SIZE;1508continue;15091510case OP_TYPEUPTO:1511case OP_TYPEMINUPTO:1512case OP_TYPEPOSUPTO:1513cc += IMM2_SIZE;1514PCRE2_FALLTHROUGH /* Fall through */15151516case OP_TYPEQUERY:1517case OP_TYPEMINQUERY:1518case OP_TYPEPOSQUERY:1519/* The type or prop opcode is skipped in the next iteration. */1520if (count < 3)1521count = 3;1522cc += 1;1523continue;15241525case OP_MINSTAR:1526case OP_MINPLUS:1527case OP_MINSTARI:1528case OP_MINPLUSI:1529case OP_NOTMINSTAR:1530case OP_NOTMINPLUS:1531case OP_NOTMINSTARI:1532case OP_NOTMINPLUSI:1533if (count == 2)1534count = 3;1535PCRE2_FALLTHROUGH /* Fall through */15361537case OP_STAR:1538case OP_PLUS:1539case OP_POSSTAR:1540case OP_POSPLUS:15411542case OP_STARI:1543case OP_PLUSI:1544case OP_POSSTARI:1545case OP_POSPLUSI:15461547case OP_NOTSTAR:1548case OP_NOTPLUS:1549case OP_NOTPOSSTAR:1550case OP_NOTPOSPLUS:15511552case OP_NOTSTARI:1553case OP_NOTPLUSI:1554case OP_NOTPOSSTARI:1555case OP_NOTPOSPLUSI:1556accelerated_start = cc;1557cc += 2;1558#ifdef SUPPORT_UNICODE1559if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);1560#endif1561break;15621563case OP_EXACT:1564if (count < 1)1565count = 1;1566cc += 2 + IMM2_SIZE;1567#ifdef SUPPORT_UNICODE1568if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);1569#endif1570continue;15711572case OP_UPTO:1573case OP_MINUPTO:1574case OP_POSUPTO:1575case OP_UPTOI:1576case OP_MINUPTOI:1577case OP_EXACTI:1578case OP_POSUPTOI:1579case OP_NOTUPTO:1580case OP_NOTMINUPTO:1581case OP_NOTEXACT:1582case OP_NOTPOSUPTO:1583case OP_NOTUPTOI:1584case OP_NOTMINUPTOI:1585case OP_NOTEXACTI:1586case OP_NOTPOSUPTOI:1587cc += IMM2_SIZE;1588PCRE2_FALLTHROUGH /* Fall through */15891590case OP_QUERY:1591case OP_MINQUERY:1592case OP_POSQUERY:1593case OP_QUERYI:1594case OP_MINQUERYI:1595case OP_POSQUERYI:1596case OP_NOTQUERY:1597case OP_NOTMINQUERY:1598case OP_NOTPOSQUERY:1599case OP_NOTQUERYI:1600case OP_NOTMINQUERYI:1601case OP_NOTPOSQUERYI:1602if (count < 3)1603count = 3;1604cc += 2;1605#ifdef SUPPORT_UNICODE1606if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);1607#endif1608continue;16091610case OP_CLASS:1611case OP_NCLASS:1612#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 81613case OP_XCLASS:1614case OP_ECLASS:1615accelerated_start = cc;1616cc += (*cc >= OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR)));1617#else1618accelerated_start = cc;1619cc += (1 + (32 / sizeof(PCRE2_UCHAR)));1620#endif16211622switch (*cc)1623{1624case OP_CRMINSTAR:1625case OP_CRMINPLUS:1626if (count == 2)1627count = 3;1628PCRE2_FALLTHROUGH /* Fall through */16291630case OP_CRSTAR:1631case OP_CRPLUS:1632case OP_CRPOSSTAR:1633case OP_CRPOSPLUS:1634cc++;1635break;16361637case OP_CRRANGE:1638case OP_CRMINRANGE:1639case OP_CRPOSRANGE:1640if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))1641{1642/* Exact repeat. */1643cc += 1 + 2 * IMM2_SIZE;1644if (count < 1)1645count = 1;1646continue;1647}16481649cc += 2 * IMM2_SIZE;1650PCRE2_FALLTHROUGH /* Fall through */1651case OP_CRQUERY:1652case OP_CRMINQUERY:1653case OP_CRPOSQUERY:1654cc++;1655if (count < 3)1656count = 3;1657continue;16581659default:1660/* No repeat. */1661if (count < 1)1662count = 1;1663continue;1664}1665break;16661667case OP_BRA:1668case OP_CBRA:1669prev_count = count;1670if (count < 1)1671count = 1;16721673if (depth >= 4)1674break;16751676if (count < 3 && cc[GET(cc, 1)] == OP_ALT)1677count = 3;16781679end = bracketend(cc);1680if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && !is_optimized_cbracket(common, GET2(cc, 1 + LINK_SIZE))))1681break;16821683prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count);16841685if (prev_count > count)1686count = prev_count;16871688if (PRIVATE_DATA(cc) != 0)1689common->private_data_ptrs[begin - common->start] = 1;16901691if (count < EARLY_FAIL_ENHANCE_MAX)1692{1693cc = end;1694continue;1695}1696break;16971698case OP_KET:1699SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);1700if (cc >= next_alt)1701break;1702cc += 1 + LINK_SIZE;1703continue;1704}17051706if (accelerated_start == NULL)1707break;17081709if (count == 0)1710{1711common->fast_forward_bc_ptr = accelerated_start;1712common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;1713*private_data_start += sizeof(sljit_sw);1714count = 4;1715}1716else if (count < 3)1717{1718common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;17191720if (common->early_fail_start_ptr == 0)1721common->early_fail_start_ptr = *private_data_start;17221723*private_data_start += sizeof(sljit_sw);1724common->early_fail_end_ptr = *private_data_start;17251726if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)1727return EARLY_FAIL_ENHANCE_MAX;17281729count = 4;1730}1731else1732{1733common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;17341735if (common->early_fail_start_ptr == 0)1736common->early_fail_start_ptr = *private_data_start;17371738*private_data_start += 2 * sizeof(sljit_sw);1739common->early_fail_end_ptr = *private_data_start;17401741if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)1742return EARLY_FAIL_ENHANCE_MAX;17431744count++;1745}17461747/* Cannot be part of a repeat. */1748common->private_data_ptrs[begin - common->start] = 1;17491750if (count >= EARLY_FAIL_ENHANCE_MAX)1751break;1752}17531754if (*cc != OP_ALT && *cc != OP_KET)1755result = EARLY_FAIL_ENHANCE_MAX;1756else if (result < count)1757result = count;17581759cc = next_alt;1760next_alt = cc + GET(cc, 1);1761}1762while (*cc == OP_ALT);17631764return result;1765}17661767static int get_class_iterator_size(PCRE2_SPTR cc)1768{1769sljit_u32 min;1770sljit_u32 max;1771switch(*cc)1772{1773case OP_CRSTAR:1774case OP_CRPLUS:1775return 2;17761777case OP_CRMINSTAR:1778case OP_CRMINPLUS:1779case OP_CRQUERY:1780case OP_CRMINQUERY:1781return 1;17821783case OP_CRRANGE:1784case OP_CRMINRANGE:1785min = GET2(cc, 1);1786max = GET2(cc, 1 + IMM2_SIZE);1787if (max == 0)1788return (*cc == OP_CRRANGE) ? 2 : 1;1789max -= min;1790if (max > (sljit_u32)(*cc == OP_CRRANGE ? 0 : 1))1791max = 2;1792return max;17931794default:1795return 0;1796}1797}17981799static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)1800{1801PCRE2_SPTR end = bracketend(begin);1802PCRE2_SPTR next;1803PCRE2_SPTR next_end;1804PCRE2_SPTR max_end;1805PCRE2_UCHAR type;1806sljit_sw length = end - begin;1807sljit_s32 min, max, i;18081809/* Detect fixed iterations first. */1810if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)1811return FALSE;18121813/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/1814* Skip the check of the second part. */1815if (PRIVATE_DATA(end - LINK_SIZE) != 0)1816return TRUE;18171818next = end;1819min = 1;1820while (1)1821{1822if (*next != *begin)1823break;1824next_end = bracketend(next);1825if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)1826break;1827next = next_end;1828min++;1829}18301831if (min == 2)1832return FALSE;18331834max = 0;1835max_end = next;1836if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)1837{1838type = *next;1839while (1)1840{1841if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)1842break;1843next_end = bracketend(next + 2 + LINK_SIZE);1844if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)1845break;1846next = next_end;1847max++;1848}18491850if (next[0] == type && next[1] == *begin && max >= 1)1851{1852next_end = bracketend(next + 1);1853if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)1854{1855for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)1856if (*next_end != OP_KET)1857break;18581859if (i == max)1860{1861/* Patterns must fit into an int32 even for link-size=4. */1862common->private_data_ptrs[max_end - common->start - LINK_SIZE] = (sljit_s32)(next_end - max_end);1863common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;1864/* +2 the original and the last. */1865common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;1866if (min == 1)1867return TRUE;1868min--;1869max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);1870}1871}1872}1873}18741875if (min >= 3)1876{1877common->private_data_ptrs[end - common->start - LINK_SIZE] = (sljit_s32)(max_end - end);1878common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;1879common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;1880return TRUE;1881}18821883return FALSE;1884}18851886#define CASE_ITERATOR_PRIVATE_DATA_1 \1887case OP_MINSTAR: \1888case OP_MINPLUS: \1889case OP_QUERY: \1890case OP_MINQUERY: \1891case OP_MINSTARI: \1892case OP_MINPLUSI: \1893case OP_QUERYI: \1894case OP_MINQUERYI: \1895case OP_NOTMINSTAR: \1896case OP_NOTMINPLUS: \1897case OP_NOTQUERY: \1898case OP_NOTMINQUERY: \1899case OP_NOTMINSTARI: \1900case OP_NOTMINPLUSI: \1901case OP_NOTQUERYI: \1902case OP_NOTMINQUERYI:19031904#define CASE_ITERATOR_PRIVATE_DATA_2A \1905case OP_STAR: \1906case OP_PLUS: \1907case OP_STARI: \1908case OP_PLUSI: \1909case OP_NOTSTAR: \1910case OP_NOTPLUS: \1911case OP_NOTSTARI: \1912case OP_NOTPLUSI:19131914#define CASE_ITERATOR_PRIVATE_DATA_2B \1915case OP_UPTO: \1916case OP_MINUPTO: \1917case OP_UPTOI: \1918case OP_MINUPTOI: \1919case OP_NOTUPTO: \1920case OP_NOTMINUPTO: \1921case OP_NOTUPTOI: \1922case OP_NOTMINUPTOI:19231924#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \1925case OP_TYPEMINSTAR: \1926case OP_TYPEMINPLUS: \1927case OP_TYPEQUERY: \1928case OP_TYPEMINQUERY:19291930#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \1931case OP_TYPESTAR: \1932case OP_TYPEPLUS:19331934#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \1935case OP_TYPEUPTO: \1936case OP_TYPEMINUPTO:19371938static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)1939{1940PCRE2_SPTR cc = common->start;1941PCRE2_SPTR alternative;1942PCRE2_SPTR end = NULL;1943int private_data_ptr = *private_data_start;1944int space, size, bracketlen;1945BOOL repeat_check = TRUE;19461947while (cc < ccend)1948{1949space = 0;1950size = 0;1951bracketlen = 0;1952if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)1953break;19541955/* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */1956if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))1957{1958if (detect_repeat(common, cc))1959{1960/* These brackets are converted to repeats, so no global1961based single character repeat is allowed. */1962if (cc >= end)1963end = bracketend(cc);1964}1965}1966repeat_check = TRUE;19671968switch(*cc)1969{1970case OP_KET:1971if (common->private_data_ptrs[cc + 1 - common->start] != 0)1972{1973common->private_data_ptrs[cc - common->start] = private_data_ptr;1974private_data_ptr += sizeof(sljit_sw);1975cc += common->private_data_ptrs[cc + 1 - common->start];1976}1977cc += 1 + LINK_SIZE;1978break;19791980case OP_ASSERT:1981case OP_ASSERT_NOT:1982case OP_ASSERTBACK:1983case OP_ASSERTBACK_NOT:1984case OP_ASSERT_NA:1985case OP_ONCE:1986case OP_SCRIPT_RUN:1987case OP_BRAPOS:1988case OP_SBRA:1989case OP_SBRAPOS:1990case OP_SCOND:1991common->private_data_ptrs[cc - common->start] = private_data_ptr;1992private_data_ptr += sizeof(sljit_sw);1993bracketlen = 1 + LINK_SIZE;1994break;19951996case OP_ASSERTBACK_NA:1997common->private_data_ptrs[cc - common->start] = private_data_ptr;1998private_data_ptr += sizeof(sljit_sw);19992000if (find_vreverse(cc))2001{2002common->private_data_ptrs[cc + 1 - common->start] = 1;2003private_data_ptr += sizeof(sljit_sw);2004}20052006bracketlen = 1 + LINK_SIZE;2007break;20082009case OP_ASSERT_SCS:2010common->private_data_ptrs[cc - common->start] = private_data_ptr;2011private_data_ptr += 2 * sizeof(sljit_sw);2012bracketlen = 1 + LINK_SIZE;2013break;20142015case OP_CBRAPOS:2016case OP_SCBRAPOS:2017common->private_data_ptrs[cc - common->start] = private_data_ptr;2018private_data_ptr += sizeof(sljit_sw);2019bracketlen = 1 + LINK_SIZE + IMM2_SIZE;2020break;20212022case OP_COND:2023/* Might be a hidden SCOND. */2024common->private_data_ptrs[cc - common->start] = 0;2025alternative = cc + GET(cc, 1);2026if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)2027{2028common->private_data_ptrs[cc - common->start] = private_data_ptr;2029private_data_ptr += sizeof(sljit_sw);2030}2031bracketlen = 1 + LINK_SIZE;2032break;20332034case OP_BRA:2035bracketlen = 1 + LINK_SIZE;2036break;20372038case OP_CBRA:2039case OP_SCBRA:2040bracketlen = 1 + LINK_SIZE + IMM2_SIZE;2041break;20422043case OP_BRAZERO:2044case OP_BRAMINZERO:2045case OP_BRAPOSZERO:2046size = 1;2047repeat_check = FALSE;2048break;20492050CASE_ITERATOR_PRIVATE_DATA_12051size = -2;2052space = 1;2053break;20542055CASE_ITERATOR_PRIVATE_DATA_2A2056size = -2;2057space = 2;2058break;20592060CASE_ITERATOR_PRIVATE_DATA_2B2061size = -(2 + IMM2_SIZE);2062space = 2;2063break;20642065CASE_ITERATOR_TYPE_PRIVATE_DATA_12066size = 1;2067space = 1;2068break;20692070CASE_ITERATOR_TYPE_PRIVATE_DATA_2A2071size = 1;2072if (cc[1] != OP_EXTUNI)2073space = 2;2074break;20752076case OP_TYPEUPTO:2077size = 1 + IMM2_SIZE;2078if (cc[1 + IMM2_SIZE] != OP_EXTUNI)2079space = 2;2080break;20812082case OP_TYPEMINUPTO:2083size = 1 + IMM2_SIZE;2084space = 2;2085break;20862087case OP_CLASS:2088case OP_NCLASS:2089size = 1 + 32 / sizeof(PCRE2_UCHAR);2090space = get_class_iterator_size(cc + size);2091break;20922093#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 82094case OP_XCLASS:2095case OP_ECLASS:2096size = GET(cc, 1);2097space = get_class_iterator_size(cc + size);2098break;2099#endif21002101default:2102cc = next_opcode(common, cc);2103SLJIT_ASSERT(cc != NULL);2104break;2105}21062107/* Character iterators, which are not inside a repeated bracket,2108gets a private slot instead of allocating it on the stack. */2109if (space > 0 && cc >= end)2110{2111common->private_data_ptrs[cc - common->start] = private_data_ptr;2112private_data_ptr += sizeof(sljit_sw) * space;2113}21142115if (size != 0)2116{2117if (size < 0)2118{2119cc += -size;2120#ifdef SUPPORT_UNICODE2121if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);2122#endif2123}2124else2125cc += size;2126}21272128if (bracketlen > 0)2129{2130if (cc >= end)2131{2132end = bracketend(cc);2133if (end[-1 - LINK_SIZE] == OP_KET)2134end = NULL;2135}2136cc += bracketlen;2137}2138}2139*private_data_start = private_data_ptr;2140}21412142static SLJIT_INLINE BOOL is_cbracket_processed(compiler_common *common, sljit_s32 capture_index)2143{2144sljit_u8 bit = (sljit_u8)(1 << (capture_index & 0x7));2145sljit_u8 *ptr = common->cbracket_bitset + (capture_index >> 3);2146sljit_u8 value = *ptr;21472148*ptr |= bit;2149return (value & bit) != 0;2150}21512152/* Returns with a frame_types (always < 0) if no need for frame. */2153static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)2154{2155int length = 0;2156int possessive = 0;2157int offset;2158BOOL stack_restore = FALSE;2159BOOL setsom_found = recursive;2160BOOL setmark_found = recursive;2161/* The last capture is a local variable even for recursions. */2162BOOL capture_last_found = FALSE;21632164#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD2165SLJIT_ASSERT(common->control_head_ptr != 0);2166*needs_control_head = TRUE;2167#else2168*needs_control_head = FALSE;2169#endif21702171memset(common->cbracket_bitset, 0, common->cbracket_bitset_length);21722173if (ccend == NULL)2174{2175ccend = bracketend(cc) - (1 + LINK_SIZE);2176if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))2177{2178possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;2179/* This is correct regardless of common->capture_last_ptr. */2180capture_last_found = TRUE;2181}2182cc = next_opcode(common, cc);2183}21842185SLJIT_ASSERT(cc != NULL);2186while (cc < ccend)2187switch(*cc)2188{2189case OP_SET_SOM:2190SLJIT_ASSERT(common->has_set_som);2191stack_restore = TRUE;2192if (!setsom_found)2193{2194length += 2;2195setsom_found = TRUE;2196}2197cc += 1;2198break;21992200case OP_MARK:2201case OP_COMMIT_ARG:2202case OP_PRUNE_ARG:2203case OP_THEN_ARG:2204SLJIT_ASSERT(common->mark_ptr != 0);2205stack_restore = TRUE;2206if (!setmark_found)2207{2208length += 2;2209setmark_found = TRUE;2210}2211if (common->control_head_ptr != 0)2212*needs_control_head = TRUE;2213cc += 1 + 2 + cc[1];2214break;22152216case OP_RECURSE:2217stack_restore = TRUE;2218if (common->has_set_som && !setsom_found)2219{2220length += 2;2221setsom_found = TRUE;2222}2223if (common->mark_ptr != 0 && !setmark_found)2224{2225length += 2;2226setmark_found = TRUE;2227}2228if (common->capture_last_ptr != 0 && !capture_last_found)2229{2230length += 2;2231capture_last_found = TRUE;2232}22332234cc += 1 + LINK_SIZE;2235while (*cc == OP_CREF)2236{2237offset = GET2(cc, 1);2238if (!is_cbracket_processed(common, offset))2239length += 3;2240cc += 1 + IMM2_SIZE;2241}2242break;22432244case OP_CBRA:2245case OP_CBRAPOS:2246case OP_SCBRA:2247case OP_SCBRAPOS:2248stack_restore = TRUE;2249if (common->capture_last_ptr != 0 && !capture_last_found)2250{2251length += 2;2252capture_last_found = TRUE;2253}22542255offset = GET2(cc, 1 + LINK_SIZE);2256if (!is_cbracket_processed(common, offset))2257length += 3;2258cc += 1 + LINK_SIZE + IMM2_SIZE;2259break;22602261case OP_THEN:2262stack_restore = TRUE;2263if (common->control_head_ptr != 0)2264*needs_control_head = TRUE;2265cc ++;2266break;22672268default:2269stack_restore = TRUE;2270PCRE2_FALLTHROUGH /* Fall through */22712272case OP_NOT_WORD_BOUNDARY:2273case OP_WORD_BOUNDARY:2274case OP_NOT_DIGIT:2275case OP_DIGIT:2276case OP_NOT_WHITESPACE:2277case OP_WHITESPACE:2278case OP_NOT_WORDCHAR:2279case OP_WORDCHAR:2280case OP_ANY:2281case OP_ALLANY:2282case OP_ANYBYTE:2283case OP_NOTPROP:2284case OP_PROP:2285case OP_ANYNL:2286case OP_NOT_HSPACE:2287case OP_HSPACE:2288case OP_NOT_VSPACE:2289case OP_VSPACE:2290case OP_EXTUNI:2291case OP_EODN:2292case OP_EOD:2293case OP_CIRC:2294case OP_CIRCM:2295case OP_DOLL:2296case OP_DOLLM:2297case OP_CHAR:2298case OP_CHARI:2299case OP_NOT:2300case OP_NOTI:23012302case OP_EXACT:2303case OP_POSSTAR:2304case OP_POSPLUS:2305case OP_POSQUERY:2306case OP_POSUPTO:23072308case OP_EXACTI:2309case OP_POSSTARI:2310case OP_POSPLUSI:2311case OP_POSQUERYI:2312case OP_POSUPTOI:23132314case OP_NOTEXACT:2315case OP_NOTPOSSTAR:2316case OP_NOTPOSPLUS:2317case OP_NOTPOSQUERY:2318case OP_NOTPOSUPTO:23192320case OP_NOTEXACTI:2321case OP_NOTPOSSTARI:2322case OP_NOTPOSPLUSI:2323case OP_NOTPOSQUERYI:2324case OP_NOTPOSUPTOI:23252326case OP_TYPEEXACT:2327case OP_TYPEPOSSTAR:2328case OP_TYPEPOSPLUS:2329case OP_TYPEPOSQUERY:2330case OP_TYPEPOSUPTO:23312332case OP_CLASS:2333case OP_NCLASS:2334case OP_XCLASS:2335case OP_ECLASS:23362337case OP_CALLOUT:2338case OP_CALLOUT_STR:23392340case OP_NOT_UCP_WORD_BOUNDARY:2341case OP_UCP_WORD_BOUNDARY:23422343cc = next_opcode(common, cc);2344SLJIT_ASSERT(cc != NULL);2345break;2346}23472348/* Possessive quantifiers can use a special case. */2349if (SLJIT_UNLIKELY(possessive == length))2350return stack_restore ? no_frame : no_stack;23512352if (length > 0)2353return length + 1;2354return stack_restore ? no_frame : no_stack;2355}23562357static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)2358{2359DEFINE_COMPILER;2360BOOL setsom_found = FALSE;2361BOOL setmark_found = FALSE;2362/* The last capture is a local variable even for recursions. */2363BOOL capture_last_found = FALSE;2364int offset;23652366/* >= 1 + shortest item size (2) */2367SLJIT_UNUSED_ARG(stacktop);2368SLJIT_ASSERT(stackpos >= stacktop + 2);23692370memset(common->cbracket_bitset, 0, common->cbracket_bitset_length);23712372stackpos = STACK(stackpos);2373if (ccend == NULL)2374{2375ccend = bracketend(cc) - (1 + LINK_SIZE);2376if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)2377cc = next_opcode(common, cc);2378}23792380/* The data is restored by do_revertframes(). */2381SLJIT_ASSERT(cc != NULL);2382while (cc < ccend)2383switch(*cc)2384{2385case OP_SET_SOM:2386SLJIT_ASSERT(common->has_set_som);2387if (!setsom_found)2388{2389OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));2390OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));2391stackpos -= SSIZE_OF(sw);2392OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2393stackpos -= SSIZE_OF(sw);2394setsom_found = TRUE;2395}2396cc += 1;2397break;23982399case OP_MARK:2400case OP_COMMIT_ARG:2401case OP_PRUNE_ARG:2402case OP_THEN_ARG:2403SLJIT_ASSERT(common->mark_ptr != 0);2404if (!setmark_found)2405{2406OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);2407OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);2408stackpos -= SSIZE_OF(sw);2409OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2410stackpos -= SSIZE_OF(sw);2411setmark_found = TRUE;2412}2413cc += 1 + 2 + cc[1];2414break;24152416case OP_RECURSE:2417if (common->has_set_som && !setsom_found)2418{2419OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));2420OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));2421stackpos -= SSIZE_OF(sw);2422OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2423stackpos -= SSIZE_OF(sw);2424setsom_found = TRUE;2425}2426if (common->mark_ptr != 0 && !setmark_found)2427{2428OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);2429OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);2430stackpos -= SSIZE_OF(sw);2431OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2432stackpos -= SSIZE_OF(sw);2433setmark_found = TRUE;2434}2435if (common->capture_last_ptr != 0 && !capture_last_found)2436{2437OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);2438OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);2439stackpos -= SSIZE_OF(sw);2440OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2441stackpos -= SSIZE_OF(sw);2442capture_last_found = TRUE;2443}2444cc += 1 + LINK_SIZE;2445while (*cc == OP_CREF)2446{2447offset = GET2(cc, 1);2448if (!is_cbracket_processed(common, offset))2449{2450offset <<= 1;2451OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));2452stackpos -= SSIZE_OF(sw);2453OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));2454OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));2455OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2456stackpos -= SSIZE_OF(sw);2457OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);2458stackpos -= SSIZE_OF(sw);2459}2460cc += 1 + IMM2_SIZE;2461}2462break;24632464case OP_CBRA:2465case OP_CBRAPOS:2466case OP_SCBRA:2467case OP_SCBRAPOS:2468if (common->capture_last_ptr != 0 && !capture_last_found)2469{2470OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);2471OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);2472stackpos -= SSIZE_OF(sw);2473OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2474stackpos -= SSIZE_OF(sw);2475capture_last_found = TRUE;2476}24772478offset = GET2(cc, 1 + LINK_SIZE);2479if (!is_cbracket_processed(common, offset))2480{2481offset <<= 1;2482OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));2483stackpos -= SSIZE_OF(sw);2484OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));2485OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));2486OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);2487stackpos -= SSIZE_OF(sw);2488OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);2489stackpos -= SSIZE_OF(sw);2490}24912492cc += 1 + LINK_SIZE + IMM2_SIZE;2493break;24942495default:2496cc = next_opcode(common, cc);2497SLJIT_ASSERT(cc != NULL);2498break;2499}25002501OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);2502SLJIT_ASSERT(stackpos == STACK(stacktop));2503}25042505#define RECURSE_TMP_REG_COUNT 325062507typedef struct delayed_mem_copy_status {2508struct sljit_compiler *compiler;2509int store_bases[RECURSE_TMP_REG_COUNT];2510sljit_s32 store_offsets[RECURSE_TMP_REG_COUNT];2511int tmp_regs[RECURSE_TMP_REG_COUNT];2512int saved_tmp_regs[RECURSE_TMP_REG_COUNT];2513int next_tmp_reg;2514} delayed_mem_copy_status;25152516static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)2517{2518int i;25192520for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)2521{2522SLJIT_ASSERT(status->tmp_regs[i] >= 0);2523SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);25242525status->store_bases[i] = -1;2526}2527status->next_tmp_reg = 0;2528status->compiler = common->compiler;2529}25302531static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,2532int store_base, sljit_s32 store_offset)2533{2534struct sljit_compiler *compiler = status->compiler;2535int next_tmp_reg = status->next_tmp_reg;2536int tmp_reg = status->tmp_regs[next_tmp_reg];25372538SLJIT_ASSERT(load_base > 0 && store_base > 0);25392540if (status->store_bases[next_tmp_reg] == -1)2541{2542/* Preserve virtual registers. */2543if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)2544OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);2545}2546else2547OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);25482549OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);2550status->store_bases[next_tmp_reg] = store_base;2551status->store_offsets[next_tmp_reg] = store_offset;25522553status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;2554}25552556static void delayed_mem_copy_finish(delayed_mem_copy_status *status)2557{2558struct sljit_compiler *compiler = status->compiler;2559int next_tmp_reg = status->next_tmp_reg;2560int tmp_reg, saved_tmp_reg, i;25612562for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)2563{2564if (status->store_bases[next_tmp_reg] != -1)2565{2566tmp_reg = status->tmp_regs[next_tmp_reg];2567saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];25682569OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);25702571/* Restore virtual registers. */2572if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)2573OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);2574}25752576next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;2577}2578}25792580#undef RECURSE_TMP_REG_COUNT25812582static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)2583{2584uint8_t *byte;2585uint8_t mask;25862587SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);25882589bit_index >>= SLJIT_WORD_SHIFT;25902591SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);25922593mask = 1 << (bit_index & 0x7);2594byte = common->recurse_bitset + (bit_index >> 3);25952596if (*byte & mask)2597return FALSE;25982599*byte |= mask;2600return TRUE;2601}26022603enum get_recurse_flags {2604recurse_flag_quit_found = (1 << 0),2605recurse_flag_accept_found = (1 << 1),2606recurse_flag_setsom_found = (1 << 2),2607recurse_flag_setmark_found = (1 << 3),2608recurse_flag_control_head_found = (1 << 4),2609recurse_flag_recurse_arg = (1 << 5),2610};26112612static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)2613{2614int length = 1;2615int size, offset;2616PCRE2_SPTR alternative, cref;2617uint32_t recurse_flags = 0;26182619memset(common->recurse_bitset, 0, common->recurse_bitset_size);26202621if (common->currententry->arg_size > 0)2622{2623cref = common->currententry->arg_start;26242625do2626{2627offset = GET2(cref, 1);2628recurse_check_bit(common, OVECTOR(offset << 1));2629cref += 1 + IMM2_SIZE;2630}2631while (*cref == OP_CREF);2632}26332634#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD2635SLJIT_ASSERT(common->control_head_ptr != 0);2636recurse_flags |= recurse_flag_control_head_found;2637#endif26382639/* Calculate the sum of the private machine words. */2640while (cc < ccend)2641{2642size = 0;2643switch(*cc)2644{2645case OP_SET_SOM:2646SLJIT_ASSERT(common->has_set_som);2647recurse_flags |= recurse_flag_setsom_found;2648cc += 1;2649break;26502651case OP_RECURSE:2652if (common->has_set_som)2653recurse_flags |= recurse_flag_setsom_found;2654if (common->mark_ptr != 0)2655recurse_flags |= recurse_flag_setmark_found;2656if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))2657length++;2658cc += 1 + LINK_SIZE;2659if (*cc == OP_CREF)2660recurse_flags |= recurse_flag_recurse_arg;2661break;26622663case OP_KET:2664offset = PRIVATE_DATA(cc);2665if (offset != 0)2666{2667if (recurse_check_bit(common, offset))2668length++;2669SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);2670cc += PRIVATE_DATA(cc + 1);2671}2672cc += 1 + LINK_SIZE;2673break;26742675case OP_ASSERT:2676case OP_ASSERT_NOT:2677case OP_ASSERTBACK:2678case OP_ASSERTBACK_NOT:2679case OP_ASSERT_NA:2680case OP_ASSERTBACK_NA:2681case OP_ONCE:2682case OP_SCRIPT_RUN:2683case OP_BRAPOS:2684case OP_SBRA:2685case OP_SBRAPOS:2686case OP_SCOND:2687SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);2688if (recurse_check_bit(common, PRIVATE_DATA(cc)))2689length++;2690cc += 1 + LINK_SIZE;2691break;26922693case OP_CREF:2694if ((recurse_flags & recurse_flag_recurse_arg) != 0)2695{2696offset = GET2(cc, 1);2697if (recurse_check_bit(common, OVECTOR(offset << 1)))2698{2699SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));2700length += 2;2701}27022703if (cc[1 + IMM2_SIZE] != OP_CREF)2704recurse_flags &= ~(uint32_t)recurse_flag_recurse_arg;2705}2706cc += 1 + IMM2_SIZE;2707break;27082709case OP_ASSERT_SCS:2710SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);2711if (recurse_check_bit(common, PRIVATE_DATA(cc)))2712length += 2;2713cc += 1 + LINK_SIZE;2714break;27152716case OP_CBRA:2717case OP_SCBRA:2718offset = GET2(cc, 1 + LINK_SIZE);2719if (recurse_check_bit(common, OVECTOR(offset << 1)))2720{2721SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));2722length += 2;2723}2724if (!is_optimized_cbracket(common, offset) && recurse_check_bit(common, OVECTOR_PRIV(offset)))2725length++;2726if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))2727length++;2728cc += 1 + LINK_SIZE + IMM2_SIZE;2729break;27302731case OP_CBRAPOS:2732case OP_SCBRAPOS:2733offset = GET2(cc, 1 + LINK_SIZE);2734if (recurse_check_bit(common, OVECTOR(offset << 1)))2735{2736SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));2737length += 2;2738}2739if (recurse_check_bit(common, OVECTOR_PRIV(offset)))2740length++;2741if (recurse_check_bit(common, PRIVATE_DATA(cc)))2742length++;2743if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))2744length++;2745cc += 1 + LINK_SIZE + IMM2_SIZE;2746break;27472748case OP_COND:2749/* Might be a hidden SCOND. */2750alternative = cc + GET(cc, 1);2751if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))2752length++;2753cc += 1 + LINK_SIZE;2754break;27552756CASE_ITERATOR_PRIVATE_DATA_12757offset = PRIVATE_DATA(cc);2758if (offset != 0 && recurse_check_bit(common, offset))2759length++;2760cc += 2;2761#ifdef SUPPORT_UNICODE2762if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);2763#endif2764break;27652766CASE_ITERATOR_PRIVATE_DATA_2A2767offset = PRIVATE_DATA(cc);2768if (offset != 0 && recurse_check_bit(common, offset))2769{2770SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));2771length += 2;2772}2773cc += 2;2774#ifdef SUPPORT_UNICODE2775if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);2776#endif2777break;27782779CASE_ITERATOR_PRIVATE_DATA_2B2780offset = PRIVATE_DATA(cc);2781if (offset != 0 && recurse_check_bit(common, offset))2782{2783SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));2784length += 2;2785}2786cc += 2 + IMM2_SIZE;2787#ifdef SUPPORT_UNICODE2788if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);2789#endif2790break;27912792CASE_ITERATOR_TYPE_PRIVATE_DATA_12793offset = PRIVATE_DATA(cc);2794if (offset != 0 && recurse_check_bit(common, offset))2795length++;2796cc += 1;2797break;27982799CASE_ITERATOR_TYPE_PRIVATE_DATA_2A2800offset = PRIVATE_DATA(cc);2801if (offset != 0 && recurse_check_bit(common, offset))2802{2803SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));2804length += 2;2805}2806cc += 1;2807break;28082809CASE_ITERATOR_TYPE_PRIVATE_DATA_2B2810offset = PRIVATE_DATA(cc);2811if (offset != 0 && recurse_check_bit(common, offset))2812{2813SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));2814length += 2;2815}2816cc += 1 + IMM2_SIZE;2817break;28182819case OP_CLASS:2820case OP_NCLASS:2821#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 82822case OP_XCLASS:2823case OP_ECLASS:2824size = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);2825#else2826size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);2827#endif28282829offset = PRIVATE_DATA(cc);2830if (offset != 0 && recurse_check_bit(common, offset))2831length += get_class_iterator_size(cc + size);2832cc += size;2833break;28342835case OP_MARK:2836case OP_COMMIT_ARG:2837case OP_PRUNE_ARG:2838case OP_THEN_ARG:2839SLJIT_ASSERT(common->mark_ptr != 0);2840recurse_flags |= recurse_flag_setmark_found;2841if (common->control_head_ptr != 0)2842recurse_flags |= recurse_flag_control_head_found;2843if (*cc != OP_MARK)2844recurse_flags |= recurse_flag_quit_found;28452846cc += 1 + 2 + cc[1];2847break;28482849case OP_PRUNE:2850case OP_SKIP:2851case OP_COMMIT:2852recurse_flags |= recurse_flag_quit_found;2853cc++;2854break;28552856case OP_SKIP_ARG:2857recurse_flags |= recurse_flag_quit_found;2858cc += 1 + 2 + cc[1];2859break;28602861case OP_THEN:2862SLJIT_ASSERT(common->control_head_ptr != 0);2863recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;2864cc++;2865break;28662867case OP_ACCEPT:2868case OP_ASSERT_ACCEPT:2869recurse_flags |= recurse_flag_accept_found;2870cc++;2871break;28722873default:2874cc = next_opcode(common, cc);2875SLJIT_ASSERT(cc != NULL);2876break;2877}2878}2879SLJIT_ASSERT(cc == ccend);28802881if (recurse_flags & recurse_flag_control_head_found)2882length++;2883if (recurse_flags & recurse_flag_quit_found)2884{2885if (recurse_flags & recurse_flag_setsom_found)2886length++;2887if (recurse_flags & recurse_flag_setmark_found)2888length++;2889}28902891*result_flags = recurse_flags;2892return length;2893}28942895enum copy_recurse_data_types {2896recurse_copy_from_global,2897recurse_copy_private_to_global,2898recurse_copy_shared_to_global,2899recurse_copy_kept_shared_to_global,2900recurse_swap_global2901};29022903static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,2904int type, int stackptr, int stacktop, uint32_t recurse_flags)2905{2906delayed_mem_copy_status status;2907PCRE2_SPTR alternative, cref;2908sljit_sw private_srcw[2];2909sljit_sw shared_srcw[3];2910sljit_sw kept_shared_srcw[2];2911int private_count, shared_count, kept_shared_count;2912int from_sp, base_reg, offset, i;29132914memset(common->recurse_bitset, 0, common->recurse_bitset_size);29152916if (common->currententry->arg_size > 0)2917{2918cref = common->currententry->arg_start;29192920do2921{2922offset = GET2(cref, 1);2923recurse_check_bit(common, OVECTOR(offset << 1));2924cref += 1 + IMM2_SIZE;2925}2926while (*cref == OP_CREF);2927}29282929#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD2930SLJIT_ASSERT(common->control_head_ptr != 0);2931recurse_check_bit(common, common->control_head_ptr);2932#endif29332934switch (type)2935{2936case recurse_copy_from_global:2937from_sp = TRUE;2938base_reg = STACK_TOP;2939break;29402941case recurse_copy_private_to_global:2942case recurse_copy_shared_to_global:2943case recurse_copy_kept_shared_to_global:2944from_sp = FALSE;2945base_reg = STACK_TOP;2946break;29472948default:2949SLJIT_ASSERT(type == recurse_swap_global);2950from_sp = FALSE;2951base_reg = TMP2;2952break;2953}29542955stackptr = STACK(stackptr);2956stacktop = STACK(stacktop);29572958status.tmp_regs[0] = TMP1;2959status.saved_tmp_regs[0] = TMP1;29602961if (base_reg != TMP2)2962{2963status.tmp_regs[1] = TMP2;2964status.saved_tmp_regs[1] = TMP2;2965}2966else2967{2968status.saved_tmp_regs[1] = RETURN_ADDR;2969if (HAS_VIRTUAL_REGISTERS)2970status.tmp_regs[1] = STR_PTR;2971else2972status.tmp_regs[1] = RETURN_ADDR;2973}29742975status.saved_tmp_regs[2] = TMP3;2976if (HAS_VIRTUAL_REGISTERS)2977status.tmp_regs[2] = STR_END;2978else2979status.tmp_regs[2] = TMP3;29802981delayed_mem_copy_init(&status, common);29822983if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)2984{2985SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);29862987if (!from_sp)2988delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);29892990if (from_sp || type == recurse_swap_global)2991delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);2992}29932994stackptr += sizeof(sljit_sw);29952996#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD2997if (type != recurse_copy_shared_to_global)2998{2999if (!from_sp)3000delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);30013002if (from_sp || type == recurse_swap_global)3003delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);3004}30053006stackptr += sizeof(sljit_sw);3007#endif30083009while (cc < ccend)3010{3011private_count = 0;3012shared_count = 0;3013kept_shared_count = 0;30143015switch(*cc)3016{3017case OP_SET_SOM:3018SLJIT_ASSERT(common->has_set_som);3019if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))3020{3021kept_shared_srcw[0] = OVECTOR(0);3022kept_shared_count = 1;3023}3024cc += 1;3025break;30263027case OP_RECURSE:3028if (recurse_flags & recurse_flag_quit_found)3029{3030if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))3031{3032kept_shared_srcw[0] = OVECTOR(0);3033kept_shared_count = 1;3034}3035if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))3036{3037kept_shared_srcw[kept_shared_count] = common->mark_ptr;3038kept_shared_count++;3039}3040}30413042if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))3043{3044shared_srcw[0] = common->capture_last_ptr;3045shared_count = 1;3046}30473048cc += 1 + LINK_SIZE;3049if (*cc == OP_CREF)3050recurse_flags |= recurse_flag_recurse_arg;3051break;30523053case OP_KET:3054private_srcw[0] = PRIVATE_DATA(cc);3055if (private_srcw[0] != 0)3056{3057if (recurse_check_bit(common, private_srcw[0]))3058private_count = 1;3059SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);3060cc += PRIVATE_DATA(cc + 1);3061}3062cc += 1 + LINK_SIZE;3063break;30643065case OP_ASSERT:3066case OP_ASSERT_NOT:3067case OP_ASSERTBACK:3068case OP_ASSERTBACK_NOT:3069case OP_ASSERT_NA:3070case OP_ASSERTBACK_NA:3071case OP_ONCE:3072case OP_SCRIPT_RUN:3073case OP_BRAPOS:3074case OP_SBRA:3075case OP_SBRAPOS:3076case OP_SCOND:3077private_srcw[0] = PRIVATE_DATA(cc);3078if (recurse_check_bit(common, private_srcw[0]))3079private_count = 1;3080cc += 1 + LINK_SIZE;3081break;30823083case OP_CREF:3084if ((recurse_flags & recurse_flag_recurse_arg) != 0)3085{3086offset = GET2(cc, 1);3087shared_srcw[0] = OVECTOR(offset << 1);3088if (recurse_check_bit(common, shared_srcw[0]))3089{3090shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);3091SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));3092shared_count = 2;3093}30943095if (cc[1 + IMM2_SIZE] != OP_CREF)3096recurse_flags &= ~(uint32_t)recurse_flag_recurse_arg;3097}3098cc += 1 + IMM2_SIZE;3099break;31003101case OP_ASSERT_SCS:3102private_srcw[0] = PRIVATE_DATA(cc);3103private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);3104if (recurse_check_bit(common, private_srcw[0]))3105private_count = 2;3106cc += 1 + LINK_SIZE;3107break;31083109case OP_CBRA:3110case OP_SCBRA:3111offset = GET2(cc, 1 + LINK_SIZE);3112shared_srcw[0] = OVECTOR(offset << 1);3113if (recurse_check_bit(common, shared_srcw[0]))3114{3115shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);3116SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));3117shared_count = 2;3118}31193120if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))3121{3122shared_srcw[shared_count] = common->capture_last_ptr;3123shared_count++;3124}31253126if (!is_optimized_cbracket(common, offset))3127{3128private_srcw[0] = OVECTOR_PRIV(offset);3129if (recurse_check_bit(common, private_srcw[0]))3130private_count = 1;3131}31323133cc += 1 + LINK_SIZE + IMM2_SIZE;3134break;31353136case OP_CBRAPOS:3137case OP_SCBRAPOS:3138offset = GET2(cc, 1 + LINK_SIZE);3139shared_srcw[0] = OVECTOR(offset << 1);3140if (recurse_check_bit(common, shared_srcw[0]))3141{3142shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);3143SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));3144shared_count = 2;3145}31463147if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))3148{3149shared_srcw[shared_count] = common->capture_last_ptr;3150shared_count++;3151}31523153private_srcw[0] = PRIVATE_DATA(cc);3154if (recurse_check_bit(common, private_srcw[0]))3155private_count = 1;31563157offset = OVECTOR_PRIV(offset);3158if (recurse_check_bit(common, offset))3159{3160private_srcw[private_count] = offset;3161private_count++;3162}3163cc += 1 + LINK_SIZE + IMM2_SIZE;3164break;31653166case OP_COND:3167/* Might be a hidden SCOND. */3168alternative = cc + GET(cc, 1);3169if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)3170{3171private_srcw[0] = PRIVATE_DATA(cc);3172if (recurse_check_bit(common, private_srcw[0]))3173private_count = 1;3174}3175cc += 1 + LINK_SIZE;3176break;31773178CASE_ITERATOR_PRIVATE_DATA_13179private_srcw[0] = PRIVATE_DATA(cc);3180if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))3181private_count = 1;3182cc += 2;3183#ifdef SUPPORT_UNICODE3184if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);3185#endif3186break;31873188CASE_ITERATOR_PRIVATE_DATA_2A3189private_srcw[0] = PRIVATE_DATA(cc);3190if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))3191{3192private_count = 2;3193private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);3194SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));3195}3196cc += 2;3197#ifdef SUPPORT_UNICODE3198if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);3199#endif3200break;32013202CASE_ITERATOR_PRIVATE_DATA_2B3203private_srcw[0] = PRIVATE_DATA(cc);3204if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))3205{3206private_count = 2;3207private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);3208SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));3209}3210cc += 2 + IMM2_SIZE;3211#ifdef SUPPORT_UNICODE3212if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);3213#endif3214break;32153216CASE_ITERATOR_TYPE_PRIVATE_DATA_13217private_srcw[0] = PRIVATE_DATA(cc);3218if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))3219private_count = 1;3220cc += 1;3221break;32223223CASE_ITERATOR_TYPE_PRIVATE_DATA_2A3224private_srcw[0] = PRIVATE_DATA(cc);3225if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))3226{3227private_count = 2;3228private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);3229SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));3230}3231cc += 1;3232break;32333234CASE_ITERATOR_TYPE_PRIVATE_DATA_2B3235private_srcw[0] = PRIVATE_DATA(cc);3236if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))3237{3238private_count = 2;3239private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);3240SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));3241}3242cc += 1 + IMM2_SIZE;3243break;32443245case OP_CLASS:3246case OP_NCLASS:3247#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 83248case OP_XCLASS:3249case OP_ECLASS:3250i = (*cc >= OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);3251#else3252i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);3253#endif3254if (PRIVATE_DATA(cc) != 0)3255{3256private_count = 1;3257private_srcw[0] = PRIVATE_DATA(cc);3258switch(get_class_iterator_size(cc + i))3259{3260case 1:3261break;32623263case 2:3264if (recurse_check_bit(common, private_srcw[0]))3265{3266private_count = 2;3267private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);3268SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));3269}3270break;32713272default:3273SLJIT_UNREACHABLE();3274break;3275}3276}3277cc += i;3278break;32793280case OP_MARK:3281case OP_COMMIT_ARG:3282case OP_PRUNE_ARG:3283case OP_THEN_ARG:3284SLJIT_ASSERT(common->mark_ptr != 0);3285if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))3286{3287kept_shared_srcw[0] = common->mark_ptr;3288kept_shared_count = 1;3289}3290if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))3291{3292private_srcw[0] = common->control_head_ptr;3293private_count = 1;3294}3295cc += 1 + 2 + cc[1];3296break;32973298case OP_THEN:3299SLJIT_ASSERT(common->control_head_ptr != 0);3300if (recurse_check_bit(common, common->control_head_ptr))3301{3302private_srcw[0] = common->control_head_ptr;3303private_count = 1;3304}3305cc++;3306break;33073308default:3309cc = next_opcode(common, cc);3310SLJIT_ASSERT(cc != NULL);3311continue;3312}33133314if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)3315{3316SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);33173318for (i = 0; i < private_count; i++)3319{3320SLJIT_ASSERT(private_srcw[i] != 0);33213322if (!from_sp)3323delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, (sljit_s32)private_srcw[i]);33243325if (from_sp || type == recurse_swap_global)3326delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);33273328stackptr += sizeof(sljit_sw);3329}3330}3331else3332stackptr += sizeof(sljit_sw) * private_count;33333334if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)3335{3336SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);33373338for (i = 0; i < shared_count; i++)3339{3340SLJIT_ASSERT(shared_srcw[i] != 0);33413342if (!from_sp)3343delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, (sljit_s32)shared_srcw[i]);33443345if (from_sp || type == recurse_swap_global)3346delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);33473348stackptr += sizeof(sljit_sw);3349}3350}3351else3352stackptr += sizeof(sljit_sw) * shared_count;33533354if (type != recurse_copy_private_to_global && type != recurse_swap_global)3355{3356SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);33573358for (i = 0; i < kept_shared_count; i++)3359{3360SLJIT_ASSERT(kept_shared_srcw[i] != 0);33613362if (!from_sp)3363delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, (sljit_s32)kept_shared_srcw[i]);33643365if (from_sp || type == recurse_swap_global)3366delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);33673368stackptr += sizeof(sljit_sw);3369}3370}3371else3372stackptr += sizeof(sljit_sw) * kept_shared_count;3373}33743375SLJIT_ASSERT(cc == ccend && stackptr == stacktop);33763377delayed_mem_copy_finish(&status);3378}33793380static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)3381{3382PCRE2_SPTR end = bracketend(cc);3383BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;33843385/* Assert captures *THEN verb even if it has no alternatives. */3386if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)3387current_offset = NULL;3388else if (*cc >= OP_ASSERT_NA && *cc <= OP_ASSERT_SCS)3389has_alternatives = TRUE;3390/* Conditional block does never capture. */3391else if (*cc == OP_COND || *cc == OP_SCOND)3392has_alternatives = FALSE;33933394cc = next_opcode(common, cc);33953396if (has_alternatives)3397{3398switch (*cc)3399{3400case OP_REVERSE:3401case OP_CREF:3402cc += 1 + IMM2_SIZE;3403break;3404case OP_VREVERSE:3405case OP_DNCREF:3406cc += 1 + 2 * IMM2_SIZE;3407break;3408}34093410current_offset = common->then_offsets + (cc - common->start);3411}34123413while (cc < end)3414{3415if (*cc >= OP_ASSERT && *cc <= OP_SCOND)3416{3417cc = set_then_offsets(common, cc, current_offset);3418continue;3419}34203421if (*cc == OP_ALT && has_alternatives)3422{3423cc += 1 + LINK_SIZE;34243425if (*cc == OP_REVERSE)3426cc += 1 + IMM2_SIZE;3427else if (*cc == OP_VREVERSE)3428cc += 1 + 2 * IMM2_SIZE;34293430current_offset = common->then_offsets + (cc - common->start);3431continue;3432}34333434if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)3435*current_offset = 1;3436cc = next_opcode(common, cc);3437}34383439cc = end - 1 - LINK_SIZE;34403441/* Ignore repeats. */3442if (*cc == OP_KET && PRIVATE_DATA(cc) != 0)3443end += PRIVATE_DATA(cc + 1);34443445return end;3446}34473448#undef CASE_ITERATOR_PRIVATE_DATA_13449#undef CASE_ITERATOR_PRIVATE_DATA_2A3450#undef CASE_ITERATOR_PRIVATE_DATA_2B3451#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_13452#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A3453#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B34543455static SLJIT_INLINE BOOL is_powerof2(unsigned int value)3456{3457return (value & (value - 1)) == 0;3458}34593460static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)3461{3462while (list != NULL)3463{3464/* sljit_set_label is clever enough to do nothing3465if either the jump or the label is NULL. */3466SET_LABEL(list->jump, label);3467list = list->next;3468}3469}34703471static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)3472{3473jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));3474if (list_item)3475{3476list_item->next = *list;3477list_item->jump = jump;3478*list = list_item;3479}3480}34813482static void add_stub(compiler_common *common, struct sljit_jump *start)3483{3484DEFINE_COMPILER;3485stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));34863487if (list_item)3488{3489list_item->start = start;3490list_item->quit = LABEL();3491list_item->next = common->stubs;3492common->stubs = list_item;3493}3494}34953496static void flush_stubs(compiler_common *common)3497{3498DEFINE_COMPILER;3499stub_list *list_item = common->stubs;35003501while (list_item)3502{3503JUMPHERE(list_item->start);3504add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));3505JUMPTO(SLJIT_JUMP, list_item->quit);3506list_item = list_item->next;3507}3508common->stubs = NULL;3509}35103511static SLJIT_INLINE void count_match(compiler_common *common)3512{3513DEFINE_COMPILER;35143515OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);3516add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));3517}35183519static SLJIT_INLINE void allocate_stack(compiler_common *common, sljit_s32 size)3520{3521/* May destroy all locals and registers except TMP2. */3522DEFINE_COMPILER;35233524SLJIT_ASSERT(size > 0);3525OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));3526#ifdef DESTROY_REGISTERS3527OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);3528OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);3529OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);3530#if defined SLJIT_DEBUG && SLJIT_DEBUG3531SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));3532/* These two are also used by the stackalloc calls. */3533OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, TMP1, 0);3534OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, TMP1, 0);3535#endif3536#endif3537add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));3538}35393540static SLJIT_INLINE void free_stack(compiler_common *common, sljit_s32 size)3541{3542DEFINE_COMPILER;35433544SLJIT_ASSERT(size > 0);3545OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));3546}35473548static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)3549{3550DEFINE_COMPILER;3551sljit_uw *result;35523553if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))3554return NULL;35553556result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);3557if (SLJIT_UNLIKELY(result == NULL))3558{3559sljit_set_compiler_memory_error(compiler);3560return NULL;3561}35623563*(void**)result = common->read_only_data_head;3564common->read_only_data_head = (void *)result;3565return result + 1;3566}35673568static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)3569{3570DEFINE_COMPILER;3571struct sljit_label *loop;3572sljit_s32 i;35733574/* At this point we can freely use all temporary registers. */3575SLJIT_ASSERT(length > 1);3576/* TMP1 returns with begin - 1. */3577OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));3578if (length < 8)3579{3580for (i = 1; i < length; i++)3581OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);3582}3583else3584{3585if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)3586{3587GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);3588OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);3589loop = LABEL();3590sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));3591OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);3592JUMPTO(SLJIT_NOT_ZERO, loop);3593}3594else3595{3596GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));3597OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);3598loop = LABEL();3599OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);3600OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));3601OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);3602JUMPTO(SLJIT_NOT_ZERO, loop);3603}3604}3605}36063607static SLJIT_INLINE void reset_early_fail(compiler_common *common)3608{3609DEFINE_COMPILER;3610sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);3611sljit_u32 uncleared_size;3612sljit_s32 src = SLJIT_IMM;3613sljit_s32 i;3614struct sljit_label *loop;36153616SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);36173618if (size == sizeof(sljit_sw))3619{3620OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);3621return;3622}36233624if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))3625{3626OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);3627src = TMP3;3628}36293630if (size <= 6 * sizeof(sljit_sw))3631{3632for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))3633OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);3634return;3635}36363637GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);36383639uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);36403641OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);36423643loop = LABEL();3644OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);3645OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));3646OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);3647OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);3648CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);36493650if (uncleared_size >= sizeof(sljit_sw))3651OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);36523653if (uncleared_size >= 2 * sizeof(sljit_sw))3654OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);3655}36563657static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)3658{3659DEFINE_COMPILER;3660struct sljit_label *loop;3661int i;36623663SLJIT_ASSERT(length > 1);3664/* OVECTOR(1) contains the "string begin - 1" constant. */3665if (length > 2)3666OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));3667if (length < 8)3668{3669for (i = 2; i < length; i++)3670OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);3671}3672else3673{3674if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)3675{3676GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));3677OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);3678loop = LABEL();3679sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));3680OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);3681JUMPTO(SLJIT_NOT_ZERO, loop);3682}3683else3684{3685GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));3686OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);3687loop = LABEL();3688OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);3689OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));3690OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);3691JUMPTO(SLJIT_NOT_ZERO, loop);3692}3693}36943695if (!HAS_VIRTUAL_REGISTERS)3696OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));3697else3698OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);36993700if (common->mark_ptr != 0)3701OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);3702if (common->control_head_ptr != 0)3703OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);3704if (HAS_VIRTUAL_REGISTERS)3705OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));37063707OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);3708OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));3709}37103711static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)3712{3713while (current != NULL)3714{3715switch (current[1])3716{3717case type_then_trap:3718break;37193720case type_mark:3721if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)3722return current[3];3723break;37243725default:3726SLJIT_UNREACHABLE();3727break;3728}3729SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);3730current = (sljit_sw*)current[0];3731}3732return 0;3733}37343735static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)3736{3737DEFINE_COMPILER;3738struct sljit_label *loop;3739BOOL has_pre;37403741/* At this point we can freely use all registers. */3742OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));3743OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);37443745if (HAS_VIRTUAL_REGISTERS)3746{3747OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);3748OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);3749if (common->mark_ptr != 0)3750OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);3751OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));3752OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);3753if (common->mark_ptr != 0)3754OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);3755OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),3756SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));3757}3758else3759{3760OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);3761OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));3762if (common->mark_ptr != 0)3763OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);3764OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));3765OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);3766if (common->mark_ptr != 0)3767OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);3768OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));3769}37703771has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;37723773GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));3774OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));37753776loop = LABEL();37773778if (has_pre)3779sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));3780else3781{3782OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);3783OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));3784}37853786OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));3787OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);3788/* Copy the integer value to the output buffer */3789#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 323790OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);3791#endif37923793SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);3794OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);37953796OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);3797JUMPTO(SLJIT_NOT_ZERO, loop);37983799/* Calculate the return value, which is the maximum ovector value. */3800if (topbracket > 1)3801{3802if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)3803{3804GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));3805OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);38063807/* OVECTOR(0) is never equal to SLJIT_S2. */3808loop = LABEL();3809sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));3810OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);3811CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);3812OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);3813}3814else3815{3816GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));3817OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);38183819/* OVECTOR(0) is never equal to SLJIT_S2. */3820loop = LABEL();3821OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);3822OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));3823OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);3824CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);3825OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);3826}3827}3828else3829OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);3830}38313832static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)3833{3834DEFINE_COMPILER;3835sljit_s32 mov_opcode;3836sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;38373838SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);3839SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 03840&& (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));38413842if (arguments_reg != ARGUMENTS)3843OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);3844OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),3845common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);3846OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);38473848/* Store match begin and end. */3849OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));3850OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);3851OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));38523853mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;38543855OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);3856#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 323857OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);3858#endif3859OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);38603861OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);3862#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 323863OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);3864#endif3865OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);38663867JUMPTO(SLJIT_JUMP, quit);3868}38693870static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)3871{3872/* May destroy TMP1. */3873DEFINE_COMPILER;3874struct sljit_jump *jump;38753876if (common->mode == PCRE2_JIT_PARTIAL_SOFT)3877{3878/* The value of -1 must be kept for start_used_ptr! */3879OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);3880/* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting3881is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */3882jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);3883OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);3884JUMPHERE(jump);3885}3886else if (common->mode == PCRE2_JIT_PARTIAL_HARD)3887{3888jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);3889OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);3890JUMPHERE(jump);3891}3892}38933894static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)3895{3896/* Detects if the character has an othercase. */3897unsigned int c;38983899#ifdef SUPPORT_UNICODE3900if (common->utf || common->ucp)3901{3902if (common->utf)3903{3904GETCHAR(c, cc);3905}3906else3907c = *cc;39083909if (c > 127)3910return c != UCD_OTHERCASE(c);39113912return common->fcc[c] != c;3913}3914else3915#endif3916c = *cc;3917return MAX_255(c) ? common->fcc[c] != c : FALSE;3918}39193920static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)3921{3922/* Returns with the othercase. */3923#ifdef SUPPORT_UNICODE3924if ((common->utf || common->ucp) && c > 127)3925return UCD_OTHERCASE(c);3926#endif3927return TABLE_GET(c, common->fcc, c);3928}39293930static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)3931{3932/* Detects if the character and its othercase has only 1 bit difference. */3933unsigned int c, oc, bit;3934#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 83935int n;3936#endif39373938#ifdef SUPPORT_UNICODE3939if (common->utf || common->ucp)3940{3941if (common->utf)3942{3943GETCHAR(c, cc);3944}3945else3946c = *cc;39473948if (c <= 127)3949oc = common->fcc[c];3950else3951oc = UCD_OTHERCASE(c);3952}3953else3954{3955c = *cc;3956oc = TABLE_GET(c, common->fcc, c);3957}3958#else3959c = *cc;3960oc = TABLE_GET(c, common->fcc, c);3961#endif39623963SLJIT_ASSERT(c != oc);39643965bit = c ^ oc;39663967#ifndef EBCDIC3968/* Optimized for English alphabet. */3969if (c <= 127 && bit == 0x20)3970return (0 << 8) | 0x20;3971#endif39723973/* Since c != oc, they must have at least 1 bit difference. */3974if (!is_powerof2(bit))3975return 0;39763977#if PCRE2_CODE_UNIT_WIDTH == 839783979#ifdef SUPPORT_UNICODE3980if (common->utf && c > 127)3981{3982n = GET_EXTRALEN(*cc);3983while ((bit & 0x3f) == 0)3984{3985n--;3986bit >>= 6;3987}3988return (n << 8) | bit;3989}3990#endif /* SUPPORT_UNICODE */3991return (0 << 8) | bit;39923993#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 3239943995#ifdef SUPPORT_UNICODE3996if (common->utf && c > 65535)3997{3998if (bit >= (1u << 10))3999bit >>= 10;4000else4001return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));4002}4003#endif /* SUPPORT_UNICODE */4004return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));40054006#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */4007}40084009static void check_partial(compiler_common *common, BOOL force)4010{4011/* Checks whether a partial matching is occurred. Does not modify registers. */4012DEFINE_COMPILER;4013struct sljit_jump *jump = NULL;40144015SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);40164017if (common->mode == PCRE2_JIT_COMPLETE)4018return;40194020if (!force && !common->allow_empty_partial)4021jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);4022else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)4023jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);40244025if (common->mode == PCRE2_JIT_PARTIAL_SOFT)4026OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);4027else4028{4029if (common->partialmatchlabel != NULL)4030JUMPTO(SLJIT_JUMP, common->partialmatchlabel);4031else4032add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));4033}40344035if (jump != NULL)4036JUMPHERE(jump);4037}40384039static void check_str_end(compiler_common *common, jump_list **end_reached)4040{4041/* Does not affect registers. Usually used in a tight spot. */4042DEFINE_COMPILER;4043struct sljit_jump *jump;40444045if (common->mode == PCRE2_JIT_COMPLETE)4046{4047add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));4048return;4049}40504051jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);4052if (common->mode == PCRE2_JIT_PARTIAL_SOFT)4053{4054add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));4055OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);4056add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));4057}4058else4059{4060add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));4061if (common->partialmatchlabel != NULL)4062JUMPTO(SLJIT_JUMP, common->partialmatchlabel);4063else4064add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));4065}4066JUMPHERE(jump);4067}40684069static void detect_partial_match(compiler_common *common, jump_list **backtracks)4070{4071DEFINE_COMPILER;4072struct sljit_jump *jump;40734074if (common->mode == PCRE2_JIT_COMPLETE)4075{4076add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));4077return;4078}40794080/* Partial matching mode. */4081jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);4082if (!common->allow_empty_partial)4083add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));4084else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)4085add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));40864087if (common->mode == PCRE2_JIT_PARTIAL_SOFT)4088{4089OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);4090add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));4091}4092else4093{4094if (common->partialmatchlabel != NULL)4095JUMPTO(SLJIT_JUMP, common->partialmatchlabel);4096else4097add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));4098}4099JUMPHERE(jump);4100}41014102static void process_partial_match(compiler_common *common)4103{4104DEFINE_COMPILER;4105struct sljit_jump *jump;41064107/* Partial matching mode. */4108if (common->mode == PCRE2_JIT_PARTIAL_SOFT)4109{4110jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);4111OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);4112JUMPHERE(jump);4113}4114else if (common->mode == PCRE2_JIT_PARTIAL_HARD)4115{4116if (common->partialmatchlabel != NULL)4117CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);4118else4119add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));4120}4121}41224123static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)4124{4125DEFINE_COMPILER;41264127CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);4128process_partial_match(common);4129}41304131static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)4132{4133/* Reads the character into TMP1, keeps STR_PTR.4134Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */4135DEFINE_COMPILER;4136#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 324137struct sljit_jump *jump;4138#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */41394140SLJIT_UNUSED_ARG(max);4141SLJIT_UNUSED_ARG(dst);4142SLJIT_UNUSED_ARG(dstw);4143SLJIT_UNUSED_ARG(backtracks);41444145OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));41464147#ifdef SUPPORT_UNICODE4148#if PCRE2_CODE_UNIT_WIDTH == 84149if (common->utf)4150{4151if (max < 128) return;41524153jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);4154OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);4155OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4156add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));4157OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);4158if (backtracks && common->invalid_utf)4159add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));4160JUMPHERE(jump);4161}4162#elif PCRE2_CODE_UNIT_WIDTH == 164163if (common->utf)4164{4165if (max < 0xd800) return;41664167OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);41684169if (common->invalid_utf)4170{4171jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);4172OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);4173OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4174add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));4175OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);4176if (backtracks && common->invalid_utf)4177add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));4178}4179else4180{4181/* TMP2 contains the high surrogate. */4182jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);4183OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));4184OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);4185OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);4186OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);4187}41884189JUMPHERE(jump);4190}4191#elif PCRE2_CODE_UNIT_WIDTH == 324192if (common->invalid_utf)4193{4194if (max < 0xd800) return;41954196if (backtracks != NULL)4197{4198OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);4199add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));4200add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));4201}4202else4203{4204OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);4205OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);4206SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);4207OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);4208SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);4209}4210}4211#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */4212#endif /* SUPPORT_UNICODE */4213}42144215static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)4216{4217/* Reads one character back without moving STR_PTR. TMP2 must4218contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */4219DEFINE_COMPILER;42204221#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 324222struct sljit_jump *jump;4223#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */42244225SLJIT_UNUSED_ARG(max);4226SLJIT_UNUSED_ARG(backtracks);42274228OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));42294230#ifdef SUPPORT_UNICODE4231#if PCRE2_CODE_UNIT_WIDTH == 84232if (common->utf)4233{4234if (max < 128) return;42354236jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);4237if (common->invalid_utf)4238{4239add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));4240if (backtracks != NULL)4241add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));4242}4243else4244add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));4245JUMPHERE(jump);4246}4247#elif PCRE2_CODE_UNIT_WIDTH == 164248if (common->utf)4249{4250if (max < 0xd800) return;42514252if (common->invalid_utf)4253{4254jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);4255add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));4256if (backtracks != NULL)4257add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));4258}4259else4260{4261OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);4262jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);4263/* TMP2 contains the low surrogate. */4264OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));4265OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);4266OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);4267OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);4268OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);4269}4270JUMPHERE(jump);4271}4272#elif PCRE2_CODE_UNIT_WIDTH == 324273if (common->invalid_utf)4274{4275OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);4276add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));4277add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));4278}4279#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */4280#endif /* SUPPORT_UNICODE */4281}42824283#define READ_CHAR_UPDATE_STR_PTR 0x14284#define READ_CHAR_UTF8_NEWLINE 0x24285#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)4286#define READ_CHAR_VALID_UTF 0x442874288static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,4289jump_list **backtracks, sljit_u32 options)4290{4291/* Reads the precise value of a character into TMP1, if the character is4292between min and max (c >= min && c <= max). Otherwise it returns with a value4293outside the range. Does not check STR_END. */4294DEFINE_COMPILER;4295#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 324296struct sljit_jump *jump;4297#endif4298#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 84299struct sljit_jump *jump2;4300#endif43014302SLJIT_UNUSED_ARG(min);4303SLJIT_UNUSED_ARG(max);4304SLJIT_UNUSED_ARG(backtracks);4305SLJIT_UNUSED_ARG(options);4306SLJIT_ASSERT(min <= max);43074308OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4309OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));43104311#ifdef SUPPORT_UNICODE4312#if PCRE2_CODE_UNIT_WIDTH == 84313if (common->utf)4314{4315if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;43164317if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))4318{4319jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);43204321if (options & READ_CHAR_UTF8_NEWLINE)4322add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));4323else4324add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));43254326if (backtracks != NULL)4327add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));4328JUMPHERE(jump);4329return;4330}43314332jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);4333if (min >= 0x10000)4334{4335OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);4336if (options & READ_CHAR_UPDATE_STR_PTR)4337OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);4338OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4339jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);4340OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);4341OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);4342OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4343OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));4344OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4345OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);4346OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4347OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));4348if (!(options & READ_CHAR_UPDATE_STR_PTR))4349OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));4350OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4351OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);4352OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4353JUMPHERE(jump2);4354if (options & READ_CHAR_UPDATE_STR_PTR)4355OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);4356}4357else if (min >= 0x800 && max <= 0xffff)4358{4359OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);4360if (options & READ_CHAR_UPDATE_STR_PTR)4361OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);4362OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4363jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);4364OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);4365OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);4366OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4367OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));4368if (!(options & READ_CHAR_UPDATE_STR_PTR))4369OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));4370OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4371OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);4372OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4373JUMPHERE(jump2);4374if (options & READ_CHAR_UPDATE_STR_PTR)4375OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);4376}4377else if (max >= 0x800)4378{4379add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));4380}4381else if (max < 128)4382{4383OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);4384OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);4385}4386else4387{4388OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4389if (!(options & READ_CHAR_UPDATE_STR_PTR))4390OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4391else4392OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);4393OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);4394OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4395OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);4396OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4397if (options & READ_CHAR_UPDATE_STR_PTR)4398OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);4399}4400JUMPHERE(jump);4401}4402#elif PCRE2_CODE_UNIT_WIDTH == 164403if (common->utf)4404{4405if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;44064407if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))4408{4409OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);4410jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);44114412if (options & READ_CHAR_UTF8_NEWLINE)4413add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));4414else4415add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));44164417if (backtracks != NULL)4418add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));4419JUMPHERE(jump);4420return;4421}44224423if (max >= 0x10000)4424{4425OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);4426jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);4427/* TMP2 contains the high surrogate. */4428OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4429OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);4430OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4431OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);4432OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);4433JUMPHERE(jump);4434return;4435}44364437/* Skip low surrogate if necessary. */4438OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);44394440if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)4441{4442if (options & READ_CHAR_UPDATE_STR_PTR)4443OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4444OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);4445if (options & READ_CHAR_UPDATE_STR_PTR)4446SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);4447if (max >= 0xd800)4448SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1);4449}4450else4451{4452jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);4453if (options & READ_CHAR_UPDATE_STR_PTR)4454OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4455if (max >= 0xd800)4456OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);4457JUMPHERE(jump);4458}4459}4460#elif PCRE2_CODE_UNIT_WIDTH == 324461if (common->invalid_utf)4462{4463if (backtracks != NULL)4464{4465OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);4466add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));4467add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));4468}4469else4470{4471OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);4472OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);4473SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);4474OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);4475SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);4476}4477}4478#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */4479#endif /* SUPPORT_UNICODE */4480}44814482static void skip_valid_char(compiler_common *common)4483{4484DEFINE_COMPILER;4485#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)4486struct sljit_jump *jump;4487#endif44884489#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)4490if (common->utf)4491{4492OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);4493OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4494#if PCRE2_CODE_UNIT_WIDTH == 84495jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);4496OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);4497OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);4498#elif PCRE2_CODE_UNIT_WIDTH == 164499jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);4500OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);4501OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);4502OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);4503OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);4504OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);4505#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */4506JUMPHERE(jump);4507return;4508}4509#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */4510OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4511}45124513#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 845144515static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)4516{4517/* Tells whether the character codes below 128 are enough4518to determine a match. */4519const sljit_u8 value = nclass ? 0xff : 0;4520const sljit_u8 *end = bitset + 32;45214522bitset += 16;4523do4524{4525if (*bitset++ != value)4526return FALSE;4527}4528while (bitset < end);4529return TRUE;4530}45314532static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)4533{4534/* Reads the precise character type of a character into TMP1, if the character4535is less than 128. Otherwise it returns with zero. Does not check STR_END. The4536full_read argument tells whether characters above max are accepted or not. */4537DEFINE_COMPILER;4538struct sljit_jump *jump;45394540SLJIT_ASSERT(common->utf);45414542OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);4543OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));45444545/* All values > 127 are zero in ctypes. */4546OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);45474548if (negated)4549{4550jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);45514552if (common->invalid_utf)4553{4554OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);4555add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));4556add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));4557OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);4558}4559else4560{4561OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);4562OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);4563}4564JUMPHERE(jump);4565}4566}45674568#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */45694570static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)4571{4572/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */4573DEFINE_COMPILER;4574#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 84575struct sljit_jump *jump;4576#endif4577#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 84578struct sljit_jump *jump2;4579#endif45804581SLJIT_UNUSED_ARG(backtracks);4582SLJIT_UNUSED_ARG(negated);45834584OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);4585OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));45864587#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 84588if (common->utf)4589{4590/* The result of this read may be unused, but saves an "else" part. */4591OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);4592jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);45934594if (!negated)4595{4596if (common->invalid_utf)4597add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));45984599OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4600OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4601OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);4602if (common->invalid_utf)4603add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));46044605OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);4606OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);4607OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);4608if (common->invalid_utf)4609add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));46104611OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);4612jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);4613OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);4614JUMPHERE(jump2);4615}4616else if (common->invalid_utf)4617{4618add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));4619OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);4620add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));46214622OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);4623jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);4624OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);4625JUMPHERE(jump2);4626}4627else4628add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));46294630JUMPHERE(jump);4631return;4632}4633#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */46344635#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 324636if (common->invalid_utf && negated)4637add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));4638#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */46394640#if PCRE2_CODE_UNIT_WIDTH != 84641/* The ctypes array contains only 256 values. */4642OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);4643jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);4644#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */4645OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);4646#if PCRE2_CODE_UNIT_WIDTH != 84647JUMPHERE(jump);4648#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */46494650#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 164651if (common->utf && negated)4652{4653/* Skip low surrogate if necessary. */4654if (!common->invalid_utf)4655{4656OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);46574658if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)4659{4660OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4661OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);4662SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);4663}4664else4665{4666jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);4667OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4668JUMPHERE(jump);4669}4670return;4671}46724673OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);4674jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);4675add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));4676add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));46774678OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4679OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4680OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);4681add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));46824683JUMPHERE(jump);4684return;4685}4686#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */4687}46884689static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)4690{4691/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,4692TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,4693and it is destroyed. Does not modify STR_PTR for invalid character sequences. */4694DEFINE_COMPILER;46954696#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 324697struct sljit_jump *jump;4698#endif46994700#ifdef SUPPORT_UNICODE4701#if PCRE2_CODE_UNIT_WIDTH == 84702struct sljit_label *label;47034704if (common->utf)4705{4706if (!must_be_valid && common->invalid_utf)4707{4708OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));4709OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4710jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);4711add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));4712if (backtracks != NULL)4713add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));4714JUMPHERE(jump);4715return;4716}47174718label = LABEL();4719OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));4720OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4721OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);4722CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);4723return;4724}4725#elif PCRE2_CODE_UNIT_WIDTH == 164726if (common->utf)4727{4728OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));4729OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));47304731if (!must_be_valid && common->invalid_utf)4732{4733OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);4734jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);4735add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));4736if (backtracks != NULL)4737add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));4738JUMPHERE(jump);4739return;4740}47414742/* Skip low surrogate if necessary. */4743OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);4744OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);4745OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);4746OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);4747OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);4748return;4749}4750#elif PCRE2_CODE_UNIT_WIDTH == 324751if (common->invalid_utf && !must_be_valid)4752{4753OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));4754if (backtracks != NULL)4755{4756add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));4757OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4758return;4759}47604761OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);4762OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);4763OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);4764OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);4765return;4766}4767#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */4768#endif /* SUPPORT_UNICODE */47694770SLJIT_UNUSED_ARG(backtracks);4771SLJIT_UNUSED_ARG(must_be_valid);47724773OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4774}47754776static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)4777{4778/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */4779DEFINE_COMPILER;4780struct sljit_jump *jump;47814782if (nltype == NLTYPE_ANY)4783{4784add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));4785sljit_set_current_flags(compiler, SLJIT_SET_Z);4786add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));4787}4788else if (nltype == NLTYPE_ANYCRLF)4789{4790if (jumpifmatch)4791{4792add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));4793add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));4794}4795else4796{4797jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);4798add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));4799JUMPHERE(jump);4800}4801}4802else4803{4804SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);4805add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));4806}4807}48084809#ifdef SUPPORT_UNICODE48104811#if PCRE2_CODE_UNIT_WIDTH == 84812static void do_utfreadchar(compiler_common *common)4813{4814/* Fast decoding a UTF-8 character. TMP1 contains the first byte4815of the character (>= 0xc0). Return char value in TMP1. */4816DEFINE_COMPILER;4817struct sljit_jump *jump;48184819sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);4820OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4821OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4822OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);4823OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);48244825/* Searching for the first zero. */4826OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);4827jump = JUMP(SLJIT_NOT_ZERO);4828/* Two byte sequence. */4829OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);4830OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4831OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);48324833JUMPHERE(jump);4834OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));4835OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4836OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);4837OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);48384839OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);4840jump = JUMP(SLJIT_NOT_ZERO);4841/* Three byte sequence. */4842OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);4843OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));4844OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);48454846/* Four byte sequence. */4847JUMPHERE(jump);4848OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));4849OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);4850OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));4851OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4852OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);4853OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4854OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);4855}48564857static void do_utfreadtype8(compiler_common *common)4858{4859/* Fast decoding a UTF-8 character type. TMP2 contains the first byte4860of the character (>= 0xc0). Return value in TMP1. */4861DEFINE_COMPILER;4862struct sljit_jump *jump;4863struct sljit_jump *compare;48644865sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);48664867OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);4868jump = JUMP(SLJIT_NOT_ZERO);4869/* Two byte sequence. */4870OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));4871OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));4872OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);4873/* The upper 5 bits are known at this point. */4874compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);4875OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);4876OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);4877OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);4878OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);4879OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);48804881JUMPHERE(compare);4882OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);4883OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);48844885/* We only have types for characters less than 256. */4886JUMPHERE(jump);4887OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);4888OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);4889OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);4890OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);4891}48924893static void do_utfreadchar_invalid(compiler_common *common)4894{4895/* Slow decoding a UTF-8 character. TMP1 contains the first byte4896of the character (>= 0xc0). Return char value in TMP1. STR_PTR is4897undefined for invalid characters. */4898DEFINE_COMPILER;4899sljit_s32 i;4900sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);4901struct sljit_jump *jump;4902struct sljit_jump *buffer_end_close;4903struct sljit_label *three_byte_entry;4904struct sljit_label *exit_invalid_label;4905struct sljit_jump *exit_invalid[11];49064907sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);49084909OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);49104911/* Usually more than 3 characters remained in the subject buffer. */4912OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));49134914/* Not a valid start of a multi-byte sequence, no more bytes read. */4915exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);49164917buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);49184919OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));4920OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4921/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */4922OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);4923OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);4924exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);49254926OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);4927jump = JUMP(SLJIT_NOT_ZERO);49284929OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));4930OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);49314932JUMPHERE(jump);49334934/* Three-byte sequence. */4935OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));4936OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4937OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);4938OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4939if (has_cmov)4940{4941OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);4942SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1);4943exit_invalid[2] = NULL;4944}4945else4946exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);49474948OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);4949jump = JUMP(SLJIT_NOT_ZERO);49504951three_byte_entry = LABEL();49524953OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);4954if (has_cmov)4955{4956OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);4957SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1);4958exit_invalid[3] = NULL;4959}4960else4961exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);4962OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);4963OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));49644965if (has_cmov)4966{4967OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);4968SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);4969exit_invalid[4] = NULL;4970}4971else4972exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);4973OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);49744975JUMPHERE(jump);49764977/* Four-byte sequence. */4978OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));4979OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);4980OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);4981OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);4982if (has_cmov)4983{4984OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);4985SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1);4986exit_invalid[5] = NULL;4987}4988else4989exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);49904991OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);4992if (has_cmov)4993{4994OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);4995SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);4996exit_invalid[6] = NULL;4997}4998else4999exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);50005001OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);5002OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);50035004JUMPHERE(buffer_end_close);5005OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));5006exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);50075008/* Two-byte sequence. */5009OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));5010OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);5011/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */5012OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);5013OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);5014exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);50155016OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);5017jump = JUMP(SLJIT_NOT_ZERO);50185019OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);50205021/* Three-byte sequence. */5022JUMPHERE(jump);5023exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);50245025OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));5026OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);5027OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);5028OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);5029if (has_cmov)5030{5031OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);5032SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);5033exit_invalid[10] = NULL;5034}5035else5036exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);50375038/* One will be substracted from STR_PTR later. */5039OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));50405041/* Four byte sequences are not possible. */5042CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);50435044exit_invalid_label = LABEL();5045for (i = 0; i < 11; i++)5046sljit_set_label(exit_invalid[i], exit_invalid_label);50475048OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);5049OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5050}50515052static void do_utfreadnewline_invalid(compiler_common *common)5053{5054/* Slow decoding a UTF-8 character, specialized for newlines.5055TMP1 contains the first byte of the character (>= 0xc0). Return5056char value in TMP1. */5057DEFINE_COMPILER;5058struct sljit_label *loop;5059struct sljit_label *skip_start;5060struct sljit_label *three_byte_exit;5061struct sljit_jump *jump[5];50625063sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);50645065if (common->nltype != NLTYPE_ANY)5066{5067SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);50685069/* All newlines are ascii, just skip intermediate octets. */5070jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);5071loop = LABEL();5072if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)5073sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));5074else5075{5076OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));5077OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5078}50795080OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);5081CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);5082OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));50835084JUMPHERE(jump[0]);50855086OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);5087OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5088return;5089}50905091jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);5092OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));5093OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));50945095jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);5096jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);50975098skip_start = LABEL();5099OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);5100jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);51015102/* Skip intermediate octets. */5103loop = LABEL();5104jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);5105OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));5106OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5107OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);5108CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);51095110JUMPHERE(jump[3]);5111OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));51125113three_byte_exit = LABEL();5114JUMPHERE(jump[0]);5115JUMPHERE(jump[4]);51165117OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);5118OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);51195120/* Two byte long newline: 0x85. */5121JUMPHERE(jump[1]);5122CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);51235124OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);5125OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);51265127/* Three byte long newlines: 0x2028 and 0x2029. */5128JUMPHERE(jump[2]);5129CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);5130CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);51315132OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));5133OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));51345135OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);5136CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);51375138OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);5139OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);5140OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5141}51425143static void do_utfmoveback_invalid(compiler_common *common)5144{5145/* Goes one character back. */5146DEFINE_COMPILER;5147sljit_s32 i;5148struct sljit_jump *jump;5149struct sljit_jump *buffer_start_close;5150struct sljit_label *exit_ok_label;5151struct sljit_label *exit_invalid_label;5152struct sljit_jump *exit_invalid[7];51535154sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);51555156OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));5157exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);51585159/* Two-byte sequence. */5160buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);51615162OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));51635164OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);5165jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);51665167OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);5168OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));5169OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);51705171/* Three-byte sequence. */5172JUMPHERE(jump);5173exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);51745175OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));51765177OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);5178jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);51795180OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);5181OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5182OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);51835184/* Four-byte sequence. */5185JUMPHERE(jump);5186OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);5187exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);51885189OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));5190OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);5191exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);51925193exit_ok_label = LABEL();5194OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);5195OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);51965197/* Two-byte sequence. */5198JUMPHERE(buffer_start_close);5199OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));52005201exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);52025203OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));52045205OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);5206CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);52075208/* Three-byte sequence. */5209OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5210exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);5211exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);52125213OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));52145215OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);5216CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);52175218/* Four-byte sequences are not possible. */52195220exit_invalid_label = LABEL();5221sljit_set_label(exit_invalid[5], exit_invalid_label);5222sljit_set_label(exit_invalid[6], exit_invalid_label);5223OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);5224OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));5225OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);52265227JUMPHERE(exit_invalid[4]);5228/* -2 + 4 = 2 */5229OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));52305231exit_invalid_label = LABEL();5232for (i = 0; i < 4; i++)5233sljit_set_label(exit_invalid[i], exit_invalid_label);5234OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);5235OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));5236OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5237}52385239static void do_utfpeakcharback(compiler_common *common)5240{5241/* Peak a character back. Does not modify STR_PTR. */5242DEFINE_COMPILER;5243struct sljit_jump *jump[2];52445245sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);52465247OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));5248OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);5249jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);52505251OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));5252OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);5253jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);52545255OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));5256OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);5257OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);5258OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);5259OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);52605261JUMPHERE(jump[1]);5262OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));5263OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);5264OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);5265OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);52665267JUMPHERE(jump[0]);5268OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));5269OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);5270OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);5271OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);52725273OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5274}52755276static void do_utfpeakcharback_invalid(compiler_common *common)5277{5278/* Peak a character back. Does not modify STR_PTR. */5279DEFINE_COMPILER;5280sljit_s32 i;5281sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);5282struct sljit_jump *jump[2];5283struct sljit_label *two_byte_entry;5284struct sljit_label *three_byte_entry;5285struct sljit_label *exit_invalid_label;5286struct sljit_jump *exit_invalid[8];52875288sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);52895290OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));5291exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);5292jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);52935294/* Two-byte sequence. */5295OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));5296OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);5297jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);52985299two_byte_entry = LABEL();5300OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);5301/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */5302OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);5303OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);53045305JUMPHERE(jump[1]);5306OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);5307OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);5308exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);5309OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);5310OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);53115312/* Three-byte sequence. */5313OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));5314OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);5315jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);53165317three_byte_entry = LABEL();5318OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);5319OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);53205321OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);5322if (has_cmov)5323{5324OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);5325SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1);5326exit_invalid[2] = NULL;5327}5328else5329exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);53305331OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);5332if (has_cmov)5333{5334OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);5335SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);5336exit_invalid[3] = NULL;5337}5338else5339exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);53405341OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);53425343JUMPHERE(jump[1]);5344OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);5345exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);5346OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);5347OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);53485349/* Four-byte sequence. */5350OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));5351OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);5352OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);5353OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);5354/* ADD is used instead of OR because of the SUB 0x10000 above. */5355OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);53565357if (has_cmov)5358{5359OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);5360SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);5361exit_invalid[5] = NULL;5362}5363else5364exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);53655366OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);5367OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);53685369JUMPHERE(jump[0]);5370OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));5371jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);53725373/* Two-byte sequence. */5374OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));5375OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);5376CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);53775378OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);5379OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);5380exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);5381OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);5382OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);53835384/* Three-byte sequence. */5385OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));5386OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);5387CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);53885389OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);5390OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);53915392JUMPHERE(jump[0]);5393exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);53945395/* Two-byte sequence. */5396OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));5397OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);5398CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);53995400exit_invalid_label = LABEL();5401for (i = 0; i < 8; i++)5402sljit_set_label(exit_invalid[i], exit_invalid_label);54035404OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);5405OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5406}54075408#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */54095410#if PCRE2_CODE_UNIT_WIDTH == 1654115412static void do_utfreadchar_invalid(compiler_common *common)5413{5414/* Slow decoding a UTF-16 character. TMP1 contains the first half5415of the character (>= 0xd800). Return char value in TMP1. STR_PTR is5416undefined for invalid characters. */5417DEFINE_COMPILER;5418struct sljit_jump *exit_invalid[3];54195420sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);54215422/* TMP2 contains the high surrogate. */5423exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);5424exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);54255426OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));5427OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);5428OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));54295430OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);5431OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);5432exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);54335434OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);5435OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);54365437JUMPHERE(exit_invalid[0]);5438JUMPHERE(exit_invalid[1]);5439JUMPHERE(exit_invalid[2]);5440OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);5441OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5442}54435444static void do_utfreadnewline_invalid(compiler_common *common)5445{5446/* Slow decoding a UTF-16 character, specialized for newlines.5447TMP1 contains the first half of the character (>= 0xd800). Return5448char value in TMP1. */54495450DEFINE_COMPILER;5451struct sljit_jump *exit_invalid[2];54525453sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);54545455/* TMP2 contains the high surrogate. */5456exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);54575458OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));5459exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);54605461OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);5462OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);5463OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);5464OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);5465OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);5466OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);54675468OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);54695470JUMPHERE(exit_invalid[0]);5471JUMPHERE(exit_invalid[1]);5472OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);5473OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5474}54755476static void do_utfmoveback_invalid(compiler_common *common)5477{5478/* Goes one character back. */5479DEFINE_COMPILER;5480struct sljit_jump *exit_invalid[3];54815482sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);54835484exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);5485exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);54865487OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));5488OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);5489exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);54905491OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5492OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);5493OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);54945495JUMPHERE(exit_invalid[0]);5496JUMPHERE(exit_invalid[1]);5497JUMPHERE(exit_invalid[2]);54985499OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5500OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);5501OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5502}55035504static void do_utfpeakcharback_invalid(compiler_common *common)5505{5506/* Peak a character back. Does not modify STR_PTR. */5507DEFINE_COMPILER;5508struct sljit_jump *jump;5509struct sljit_jump *exit_invalid[3];55105511sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);55125513jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);5514OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));5515exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);5516exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);55175518OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));5519OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);5520OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);5521exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);5522OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);5523OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);55245525JUMPHERE(jump);5526OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);55275528JUMPHERE(exit_invalid[0]);5529JUMPHERE(exit_invalid[1]);5530JUMPHERE(exit_invalid[2]);55315532OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);5533OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5534}55355536#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */55375538/* UCD_BLOCK_SIZE must be 128 (see the assert below). */5539#define UCD_BLOCK_MASK 1275540#define UCD_BLOCK_SHIFT 755415542static void do_getucd(compiler_common *common)5543{5544/* Search the UCD record for the character comes in TMP1.5545Returns chartype in TMP1 and UCD offset in TMP2. */5546DEFINE_COMPILER;5547#if PCRE2_CODE_UNIT_WIDTH == 325548struct sljit_jump *jump;5549#endif55505551#if defined SLJIT_DEBUG && SLJIT_DEBUG5552/* dummy_ucd_record */5553const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);5554SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);5555SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);5556#endif55575558SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);55595560sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);55615562#if PCRE2_CODE_UNIT_WIDTH == 325563if (!common->utf)5564{5565jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);5566OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);5567JUMPHERE(jump);5568}5569#endif55705571OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);5572OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);5573OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));5574OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);5575sljit_emit_op2_shift(compiler, SLJIT_ADD | SLJIT_SHL_IMM | SLJIT_SRC2_UNDEFINED, TMP1, 0, TMP1, 0, TMP2, 0, UCD_BLOCK_SHIFT);5576OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));5577OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);5578OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5579}55805581static void do_getucdtype(compiler_common *common)5582{5583/* Search the UCD record for the character comes in TMP1.5584Returns chartype in TMP1 and UCD offset in TMP2. */5585DEFINE_COMPILER;5586#if PCRE2_CODE_UNIT_WIDTH == 325587struct sljit_jump *jump;5588#endif55895590#if defined SLJIT_DEBUG && SLJIT_DEBUG5591/* dummy_ucd_record */5592const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);5593SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);5594SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);5595#endif55965597SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);55985599sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);56005601#if PCRE2_CODE_UNIT_WIDTH == 325602if (!common->utf)5603{5604jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);5605OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);5606JUMPHERE(jump);5607}5608#endif56095610OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);5611OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);5612OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));5613OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);5614sljit_emit_op2_shift(compiler, SLJIT_ADD | SLJIT_SHL_IMM | SLJIT_SRC2_UNDEFINED, TMP1, 0, TMP1, 0, TMP2, 0, UCD_BLOCK_SHIFT);5615OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));5616OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);56175618/* TMP2 is multiplied by 12. Same as (TMP2 + (TMP2 << 1)) << 2. */5619OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));5620sljit_emit_op2_shift(compiler, SLJIT_ADD | SLJIT_SHL_IMM | SLJIT_SRC2_UNDEFINED, TMP2, 0, TMP2, 0, TMP2, 0, 1);5621OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 2);56225623OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);5624}56255626#endif /* SUPPORT_UNICODE */56275628static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)5629{5630DEFINE_COMPILER;5631struct sljit_label *mainloop;5632struct sljit_label *newlinelabel = NULL;5633struct sljit_jump *start;5634struct sljit_jump *end = NULL;5635struct sljit_jump *end2 = NULL;5636#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 325637struct sljit_label *loop;5638struct sljit_jump *jump;5639#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */5640jump_list *newline = NULL;5641sljit_u32 overall_options = common->re->overall_options;5642BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;5643BOOL newlinecheck = FALSE;5644BOOL readuchar = FALSE;56455646if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)5647&& (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))5648newlinecheck = TRUE;56495650SLJIT_ASSERT(common->abort_label == NULL);56515652if ((overall_options & PCRE2_FIRSTLINE) != 0)5653{5654/* Search for the end of the first line. */5655SLJIT_ASSERT(common->match_end_ptr != 0);5656OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);56575658if (common->nltype == NLTYPE_FIXED && common->newline > 255)5659{5660mainloop = LABEL();5661OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5662end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);5663OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));5664OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));5665CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);5666CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);5667JUMPHERE(end);5668OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5669}5670else5671{5672end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);5673mainloop = LABEL();5674/* Continual stores does not cause data dependency. */5675OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);5676read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);5677check_newlinechar(common, common->nltype, &newline, TRUE);5678CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);5679JUMPHERE(end);5680OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);5681set_jumps(newline, LABEL());5682}56835684OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);5685}5686else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)5687{5688/* Check whether offset limit is set and valid. */5689SLJIT_ASSERT(common->match_end_ptr != 0);56905691if (HAS_VIRTUAL_REGISTERS)5692{5693OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);5694OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));5695}5696else5697OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));56985699OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);5700end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);5701if (HAS_VIRTUAL_REGISTERS)5702OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);5703else5704OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));57055706#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 325707OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);5708#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */5709if (HAS_VIRTUAL_REGISTERS)5710OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));57115712OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);5713end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);5714OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);5715JUMPHERE(end2);5716OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);5717add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));5718JUMPHERE(end);5719OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);5720}57215722start = JUMP(SLJIT_JUMP);57235724if (newlinecheck)5725{5726newlinelabel = LABEL();5727OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5728end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);5729OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);5730OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);5731OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);5732#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 325733OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);5734#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */5735OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);5736end2 = JUMP(SLJIT_JUMP);5737}57385739mainloop = LABEL();57405741/* Increasing the STR_PTR here requires one less jump in the most common case. */5742#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 325743if (common->utf && !common->invalid_utf) readuchar = TRUE;5744#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */5745if (newlinecheck) readuchar = TRUE;57465747if (readuchar)5748OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);57495750if (newlinecheck)5751CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);57525753OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5754#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 325755#if PCRE2_CODE_UNIT_WIDTH == 85756if (common->invalid_utf)5757{5758/* Skip continuation code units. */5759loop = LABEL();5760jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);5761OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);5762OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5763OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);5764CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);5765OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5766JUMPHERE(jump);5767}5768else if (common->utf)5769{5770jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);5771OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);5772OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);5773JUMPHERE(jump);5774}5775#elif PCRE2_CODE_UNIT_WIDTH == 165776if (common->invalid_utf)5777{5778/* Skip continuation code units. */5779loop = LABEL();5780jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);5781OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);5782OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5783OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);5784CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);5785OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5786JUMPHERE(jump);5787}5788else if (common->utf)5789{5790OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);57915792if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))5793{5794OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));5795OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);5796SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);5797}5798else5799{5800OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);5801OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);5802OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);5803OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);5804}5805}5806#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */5807#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */5808JUMPHERE(start);58095810if (newlinecheck)5811{5812JUMPHERE(end);5813JUMPHERE(end2);5814}58155816return mainloop;5817}581858195820static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)5821{5822sljit_u32 i, count = chars->count;58235824if (count == 255)5825return;58265827if (count == 0)5828{5829chars->count = 1;5830chars->chars[0] = chr;58315832if (last)5833chars->last_count = 1;5834return;5835}58365837for (i = 0; i < count; i++)5838if (chars->chars[i] == chr)5839return;58405841if (count >= MAX_DIFF_CHARS)5842{5843chars->count = 255;5844return;5845}58465847chars->chars[count] = chr;5848chars->count = count + 1;58495850if (last)5851chars->last_count++;5852}58535854/* Value can be increased if needed. Patterns5855such as /(a|){33}b/ can exhaust the stack.58565857Note: /(a|){29}b/ already stops scan_prefix()5858because it reaches the maximum step_count. */5859#define SCAN_PREFIX_STACK_END 3258605861/*5862Scan prefix stores the prefix string in the chars array.5863The elements of the chars array is either small character5864sets or "any" (count is set to 255).58655866Examples (the chars array is represented by a simple regex):58675868/(abc|xbyd)/ prefix: /[ax]b[cy]/ (length: 3)5869/a[a-z]b+c/ prefix: a.b (length: 3)5870/ab?cd/ prefix: a[bc][cd] (length: 3)5871/(ab|cd)|(ef|gh)/ prefix: [aceg][bdfh] (length: 2)58725873The length is returned by scan_prefix(). The length is5874less than or equal than the minimum length of the pattern.5875*/58765877static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars)5878{5879fast_forward_char_data *chars_start = chars;5880fast_forward_char_data *chars_end = chars + MAX_N_CHARS;5881PCRE2_SPTR cc_stack[SCAN_PREFIX_STACK_END];5882fast_forward_char_data *chars_stack[SCAN_PREFIX_STACK_END];5883sljit_u8 next_alternative_stack[SCAN_PREFIX_STACK_END];5884BOOL last, any, class, caseless;5885int stack_ptr, step_count, repeat, len, len_save;5886sljit_u32 chr; /* Any unicode character. */5887sljit_u8 *bytes, *bytes_end, byte;5888PCRE2_SPTR alternative, cc_save, oc;5889#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 85890PCRE2_UCHAR othercase[4];5891#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 165892PCRE2_UCHAR othercase[2];5893#else5894PCRE2_UCHAR othercase[1];5895#endif58965897repeat = 1;5898stack_ptr = 0;5899step_count = 10000;5900while (TRUE)5901{5902if (--step_count == 0)5903return 0;59045905SLJIT_ASSERT(chars <= chars_start + MAX_N_CHARS);59065907if (chars >= chars_end)5908{5909if (stack_ptr == 0)5910return (int)(chars_end - chars_start);59115912--stack_ptr;5913cc = cc_stack[stack_ptr];5914chars = chars_stack[stack_ptr];59155916if (chars >= chars_end)5917continue;59185919if (next_alternative_stack[stack_ptr] != 0)5920{5921/* When an alternative is processed, the5922next alternative is pushed onto the stack. */5923SLJIT_ASSERT(*cc == OP_ALT);5924alternative = cc + GET(cc, 1);5925if (*alternative == OP_ALT)5926{5927SLJIT_ASSERT(stack_ptr < SCAN_PREFIX_STACK_END);5928SLJIT_ASSERT(chars_stack[stack_ptr] == chars);5929SLJIT_ASSERT(next_alternative_stack[stack_ptr] == 1);5930cc_stack[stack_ptr] = alternative;5931stack_ptr++;5932}5933cc += 1 + LINK_SIZE;5934}5935}59365937last = TRUE;5938any = FALSE;5939class = FALSE;5940caseless = FALSE;59415942switch (*cc)5943{5944case OP_CHARI:5945caseless = TRUE;5946PCRE2_FALLTHROUGH /* Fall through */5947case OP_CHAR:5948last = FALSE;5949cc++;5950break;59515952case OP_SOD:5953case OP_SOM:5954case OP_SET_SOM:5955case OP_NOT_WORD_BOUNDARY:5956case OP_WORD_BOUNDARY:5957case OP_EODN:5958case OP_EOD:5959case OP_CIRC:5960case OP_CIRCM:5961case OP_DOLL:5962case OP_DOLLM:5963case OP_NOT_UCP_WORD_BOUNDARY:5964case OP_UCP_WORD_BOUNDARY:5965/* Zero width assertions. */5966cc++;5967continue;59685969case OP_ASSERT:5970case OP_ASSERT_NOT:5971case OP_ASSERTBACK:5972case OP_ASSERTBACK_NOT:5973case OP_ASSERT_NA:5974case OP_ASSERTBACK_NA:5975case OP_ASSERT_SCS:5976cc = bracketend(cc);5977continue;59785979case OP_PLUSI:5980case OP_MINPLUSI:5981case OP_POSPLUSI:5982caseless = TRUE;5983PCRE2_FALLTHROUGH /* Fall through */5984case OP_PLUS:5985case OP_MINPLUS:5986case OP_POSPLUS:5987cc++;5988break;59895990case OP_EXACTI:5991caseless = TRUE;5992PCRE2_FALLTHROUGH /* Fall through */5993case OP_EXACT:5994repeat = GET2(cc, 1);5995last = FALSE;5996cc += 1 + IMM2_SIZE;5997break;59985999case OP_QUERYI:6000case OP_MINQUERYI:6001case OP_POSQUERYI:6002caseless = TRUE;6003PCRE2_FALLTHROUGH /* Fall through */6004case OP_QUERY:6005case OP_MINQUERY:6006case OP_POSQUERY:6007len = 1;6008cc++;6009#ifdef SUPPORT_UNICODE6010if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);6011#endif6012if (stack_ptr >= SCAN_PREFIX_STACK_END)6013{6014chars_end = chars;6015continue;6016}60176018cc_stack[stack_ptr] = cc + len;6019chars_stack[stack_ptr] = chars;6020next_alternative_stack[stack_ptr] = 0;6021stack_ptr++;60226023last = FALSE;6024break;60256026case OP_KET:6027cc += 1 + LINK_SIZE;6028continue;60296030case OP_ALT:6031cc += GET(cc, 1);6032continue;60336034case OP_ONCE:6035case OP_BRA:6036case OP_BRAPOS:6037case OP_CBRA:6038case OP_CBRAPOS:6039alternative = cc + GET(cc, 1);6040if (*alternative == OP_ALT)6041{6042if (stack_ptr >= SCAN_PREFIX_STACK_END)6043{6044chars_end = chars;6045continue;6046}60476048cc_stack[stack_ptr] = alternative;6049chars_stack[stack_ptr] = chars;6050next_alternative_stack[stack_ptr] = 1;6051stack_ptr++;6052}60536054if (*cc == OP_CBRA || *cc == OP_CBRAPOS)6055cc += IMM2_SIZE;6056cc += 1 + LINK_SIZE;6057continue;60586059case OP_CLASS:6060#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 86061if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))6062{6063chars_end = chars;6064continue;6065}6066#endif6067class = TRUE;6068break;60696070case OP_NCLASS:6071#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 326072if (common->utf)6073{6074chars_end = chars;6075continue;6076}6077#endif6078class = TRUE;6079break;60806081#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 86082case OP_XCLASS:6083case OP_ECLASS:6084#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 326085if (common->utf)6086{6087chars_end = chars;6088continue;6089}6090#endif6091any = TRUE;6092cc += GET(cc, 1);6093break;6094#endif60956096case OP_DIGIT:6097#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 86098if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))6099{6100chars_end = chars;6101continue;6102}6103#endif6104any = TRUE;6105cc++;6106break;61076108case OP_WHITESPACE:6109#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 86110if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))6111{6112chars_end = chars;6113continue;6114}6115#endif6116any = TRUE;6117cc++;6118break;61196120case OP_WORDCHAR:6121#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 86122if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))6123{6124chars_end = chars;6125continue;6126}6127#endif6128any = TRUE;6129cc++;6130break;61316132case OP_NOT:6133case OP_NOTI:6134cc++;6135PCRE2_FALLTHROUGH /* Fall through */6136case OP_NOT_DIGIT:6137case OP_NOT_WHITESPACE:6138case OP_NOT_WORDCHAR:6139case OP_ANY:6140case OP_ALLANY:6141#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 326142if (common->utf)6143{6144chars_end = chars;6145continue;6146}6147#endif6148any = TRUE;6149cc++;6150break;61516152#ifdef SUPPORT_UNICODE6153case OP_NOTPROP:6154case OP_PROP:6155#if PCRE2_CODE_UNIT_WIDTH != 326156if (common->utf)6157{6158chars_end = chars;6159continue;6160}6161#endif6162any = TRUE;6163cc += 1 + 2;6164break;6165#endif61666167case OP_TYPEEXACT:6168repeat = GET2(cc, 1);6169cc += 1 + IMM2_SIZE;6170continue;61716172case OP_NOTEXACT:6173case OP_NOTEXACTI:6174#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 326175if (common->utf)6176{6177chars_end = chars;6178continue;6179}6180#endif6181any = TRUE;6182repeat = GET2(cc, 1);6183cc += 1 + IMM2_SIZE + 1;6184break;61856186default:6187chars_end = chars;6188continue;6189}61906191SLJIT_ASSERT(chars < chars_end);61926193if (any)6194{6195do6196{6197chars->count = 255;6198chars++;6199}6200while (--repeat > 0 && chars < chars_end);62016202repeat = 1;6203continue;6204}62056206if (class)6207{6208bytes = (sljit_u8*) (cc + 1);6209cc += 1 + 32 / sizeof(PCRE2_UCHAR);62106211SLJIT_ASSERT(last == TRUE && repeat == 1);6212switch (*cc)6213{6214case OP_CRQUERY:6215case OP_CRMINQUERY:6216case OP_CRPOSQUERY:6217last = FALSE;6218PCRE2_FALLTHROUGH /* Fall through */6219case OP_CRSTAR:6220case OP_CRMINSTAR:6221case OP_CRPOSSTAR:6222if (stack_ptr >= SCAN_PREFIX_STACK_END)6223{6224chars_end = chars;6225continue;6226}62276228cc_stack[stack_ptr] = ++cc;6229chars_stack[stack_ptr] = chars;6230next_alternative_stack[stack_ptr] = 0;6231stack_ptr++;6232break;62336234default:6235case OP_CRPLUS:6236case OP_CRMINPLUS:6237case OP_CRPOSPLUS:6238break;62396240case OP_CRRANGE:6241case OP_CRMINRANGE:6242case OP_CRPOSRANGE:6243repeat = GET2(cc, 1);6244if (repeat <= 0)6245{6246chars_end = chars;6247continue;6248}62496250last = (repeat != (int)GET2(cc, 1 + IMM2_SIZE));6251cc += 1 + 2 * IMM2_SIZE;6252break;6253}62546255do6256{6257if (bytes[31] & 0x80)6258chars->count = 255;6259else if (chars->count != 255)6260{6261bytes_end = bytes + 32;6262chr = 0;6263do6264{6265byte = *bytes++;6266SLJIT_ASSERT((chr & 0x7) == 0);6267if (byte == 0)6268chr += 8;6269else6270{6271do6272{6273if ((byte & 0x1) != 0)6274add_prefix_char(chr, chars, TRUE);6275byte >>= 1;6276chr++;6277}6278while (byte != 0);6279chr = (chr + 7) & (sljit_u32)(~7);6280}6281}6282while (chars->count != 255 && bytes < bytes_end);6283bytes = bytes_end - 32;6284}62856286chars++;6287}6288while (--repeat > 0 && chars < chars_end);62896290repeat = 1;6291if (last)6292chars_end = chars;6293continue;6294}62956296len = 1;6297#ifdef SUPPORT_UNICODE6298if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);6299#endif63006301if (caseless && char_has_othercase(common, cc))6302{6303#ifdef SUPPORT_UNICODE6304if (common->utf)6305{6306GETCHAR(chr, cc);6307if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)6308{6309chars_end = chars;6310continue;6311}6312}6313else6314#endif6315{6316chr = *cc;6317#ifdef SUPPORT_UNICODE6318if (common->ucp && chr > 127)6319{6320chr = UCD_OTHERCASE(chr);6321othercase[0] = (chr == (PCRE2_UCHAR)chr) ? chr : *cc;6322}6323else6324#endif6325othercase[0] = TABLE_GET(chr, common->fcc, chr);6326}6327}6328else6329{6330caseless = FALSE;6331othercase[0] = 0; /* Stops compiler warning - PH */6332}63336334len_save = len;6335cc_save = cc;6336while (TRUE)6337{6338oc = othercase;6339do6340{6341len--;63426343chr = *cc;6344add_prefix_char(*cc, chars, len == 0);63456346if (caseless)6347add_prefix_char(*oc, chars, len == 0);63486349chars++;6350cc++;6351oc++;6352}6353while (len > 0 && chars < chars_end);63546355if (--repeat == 0 || chars >= chars_end)6356break;63576358len = len_save;6359cc = cc_save;6360}63616362repeat = 1;6363if (last)6364chars_end = chars;6365}6366}63676368#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 326369static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)6370{6371#if PCRE2_CODE_UNIT_WIDTH == 86372OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);6373CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);6374#elif PCRE2_CODE_UNIT_WIDTH == 166375OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);6376CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);6377#else6378#error "Unknown code width"6379#endif6380}6381#endif63826383#include "pcre2_jit_simd_inc.h"63846385#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD63866387static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)6388{6389sljit_s32 i, j, max_i = 0, max_j = 0;6390sljit_u32 max_pri = 0;6391sljit_s32 max_offset = max_fast_forward_char_pair_offset();6392PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;63936394for (i = max - 1; i >= 1; i--)6395{6396if (chars[i].last_count > 2)6397{6398a1 = chars[i].chars[0];6399a2 = chars[i].chars[1];6400a_pri = chars[i].last_count;64016402j = i - max_offset;6403if (j < 0)6404j = 0;64056406while (j < i)6407{6408b_pri = chars[j].last_count;6409if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)6410{6411b1 = chars[j].chars[0];6412b2 = chars[j].chars[1];64136414if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)6415{6416max_pri = a_pri + b_pri;6417max_i = i;6418max_j = j;6419}6420}6421j++;6422}6423}6424}64256426if (max_pri == 0)6427return FALSE;64286429fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);6430return TRUE;6431}64326433#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */64346435static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)6436{6437DEFINE_COMPILER;6438struct sljit_label *start;6439struct sljit_jump *match;6440struct sljit_jump *partial_quit;6441PCRE2_UCHAR mask;6442BOOL has_match_end = (common->match_end_ptr != 0);64436444SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);64456446if (has_match_end)6447OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);64486449if (offset > 0)6450OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));64516452if (has_match_end)6453{6454OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);64556456OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));6457OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);6458SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);6459}64606461#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD64626463if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)6464{6465fast_forward_char_simd(common, char1, char2, offset);64666467if (offset > 0)6468OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));64696470if (has_match_end)6471OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);6472return;6473}64746475#endif64766477start = LABEL();64786479partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);6480if (common->mode == PCRE2_JIT_COMPLETE)6481add_jump(compiler, &common->failed_match, partial_quit);64826483OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);6484OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));64856486if (char1 == char2)6487CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);6488else6489{6490mask = char1 ^ char2;6491if (is_powerof2(mask))6492{6493OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);6494CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);6495}6496else6497{6498match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);6499CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);6500JUMPHERE(match);6501}6502}65036504#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 326505if (common->utf && offset > 0)6506{6507OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));6508jumpto_if_not_utf_char_start(compiler, TMP1, start);6509}6510#endif65116512OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));65136514if (common->mode != PCRE2_JIT_COMPLETE)6515JUMPHERE(partial_quit);65166517if (has_match_end)6518OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);6519}65206521static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)6522{6523DEFINE_COMPILER;6524struct sljit_label *start;6525struct sljit_jump *match;6526fast_forward_char_data chars[MAX_N_CHARS];6527sljit_s32 offset;6528PCRE2_UCHAR mask;6529PCRE2_UCHAR *char_set, *char_set_end;6530int i, max, from;6531int range_right = -1, range_len;6532sljit_u8 *update_table = NULL;6533BOOL in_range;65346535for (i = 0; i < MAX_N_CHARS; i++)6536{6537chars[i].count = 0;6538chars[i].last_count = 0;6539}65406541max = scan_prefix(common, common->start, chars);65426543if (max < 1)6544return FALSE;65456546/* Convert last_count to priority. */6547for (i = 0; i < max; i++)6548{6549SLJIT_ASSERT(chars[i].last_count <= chars[i].count);65506551switch (chars[i].count)6552{6553case 0:6554chars[i].count = 255;6555chars[i].last_count = 0;6556break;65576558case 1:6559chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;6560/* Simplifies algorithms later. */6561chars[i].chars[1] = chars[i].chars[0];6562break;65636564case 2:6565SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);65666567if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))6568chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;6569else6570chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;6571break;65726573default:6574chars[i].last_count = (chars[i].count == 255) ? 0 : 1;6575break;6576}6577}65786579#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD6580if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))6581return TRUE;6582#endif65836584in_range = FALSE;6585/* Prevent compiler "uninitialized" warning */6586from = 0;6587range_len = 4 /* minimum length */ - 1;6588for (i = 0; i <= max; i++)6589{6590if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))6591{6592range_len = i - from;6593range_right = i - 1;6594}65956596if (i < max && chars[i].count < 255)6597{6598SLJIT_ASSERT(chars[i].count > 0);6599if (!in_range)6600{6601in_range = TRUE;6602from = i;6603}6604}6605else6606in_range = FALSE;6607}66086609if (range_right >= 0)6610{6611update_table = (sljit_u8 *)allocate_read_only_data(common, 256);6612if (update_table == NULL)6613return TRUE;6614memset(update_table, IN_UCHARS(range_len), 256);66156616for (i = 0; i < range_len; i++)6617{6618SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);66196620char_set = chars[range_right - i].chars;6621char_set_end = char_set + chars[range_right - i].count;6622do6623{6624if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))6625update_table[(*char_set) & 0xff] = IN_UCHARS(i);6626char_set++;6627}6628while (char_set < char_set_end);6629}6630}66316632offset = -1;6633/* Scan forward. */6634for (i = 0; i < max; i++)6635{6636if (range_right == i)6637continue;66386639if (offset == -1)6640{6641if (chars[i].last_count >= 2)6642offset = i;6643}6644else if (chars[offset].last_count < chars[i].last_count)6645offset = i;6646}66476648SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));66496650if (range_right < 0)6651{6652if (offset < 0)6653return FALSE;6654/* Works regardless the value is 1 or 2. */6655fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);6656return TRUE;6657}66586659SLJIT_ASSERT(range_right != offset);66606661if (common->match_end_ptr != 0)6662{6663OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);6664OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);6665OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));6666add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));6667OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);6668SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);6669}6670else6671{6672OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));6673add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));6674}66756676SLJIT_ASSERT(range_right >= 0);66776678if (!HAS_VIRTUAL_REGISTERS)6679OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);66806681start = LABEL();6682add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));66836684#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)6685OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));6686#else6687OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);6688#endif66896690if (!HAS_VIRTUAL_REGISTERS)6691OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);6692else6693OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);66946695OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);6696CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);66976698if (offset >= 0)6699{6700OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));6701OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));67026703if (chars[offset].count == 1)6704CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);6705else6706{6707mask = chars[offset].chars[0] ^ chars[offset].chars[1];6708if (is_powerof2(mask))6709{6710OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);6711CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);6712}6713else6714{6715match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);6716CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);6717JUMPHERE(match);6718}6719}6720}67216722#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 326723if (common->utf && offset != 0)6724{6725if (offset < 0)6726{6727OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);6728OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));6729}6730else6731OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));67326733jumpto_if_not_utf_char_start(compiler, TMP1, start);67346735if (offset < 0)6736OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));6737}6738#endif67396740if (offset >= 0)6741OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));67426743if (common->match_end_ptr != 0)6744OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);6745else6746OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));6747return TRUE;6748}67496750static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)6751{6752PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);6753PCRE2_UCHAR oc;67546755oc = first_char;6756if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)6757{6758oc = TABLE_GET(first_char, common->fcc, first_char);6759#if defined SUPPORT_UNICODE6760if (first_char > 127 && (common->utf || common->ucp))6761oc = UCD_OTHERCASE(first_char);6762#endif6763}67646765fast_forward_first_char2(common, first_char, oc, 0);6766}67676768static SLJIT_INLINE void fast_forward_newline(compiler_common *common)6769{6770DEFINE_COMPILER;6771struct sljit_label *loop;6772struct sljit_jump *lastchar = NULL;6773struct sljit_jump *firstchar;6774struct sljit_jump *quit = NULL;6775struct sljit_jump *foundcr = NULL;6776struct sljit_jump *notfoundnl;6777jump_list *newline = NULL;67786779if (common->match_end_ptr != 0)6780{6781OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);6782OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);6783}67846785if (common->nltype == NLTYPE_FIXED && common->newline > 255)6786{6787#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD6788if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)6789{6790if (HAS_VIRTUAL_REGISTERS)6791{6792OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);6793OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));6794OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));6795}6796else6797{6798OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));6799OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));6800}6801firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);68026803OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));6804OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);6805OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);6806#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 326807OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);6808#endif6809OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);68106811fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);6812OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));6813}6814else6815#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */6816{6817lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);6818if (HAS_VIRTUAL_REGISTERS)6819{6820OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);6821OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));6822OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));6823}6824else6825{6826OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));6827OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));6828}6829firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);68306831OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));6832OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);6833OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);6834#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 326835OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);6836#endif6837OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);68386839loop = LABEL();6840OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));6841quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);6842OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));6843OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));6844CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);6845CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);68466847JUMPHERE(quit);6848JUMPHERE(lastchar);6849}68506851JUMPHERE(firstchar);68526853if (common->match_end_ptr != 0)6854OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);6855return;6856}68576858if (HAS_VIRTUAL_REGISTERS)6859{6860OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);6861OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));6862}6863else6864OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));68656866/* Example: match /^/ to \r\n from offset 1. */6867firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);68686869if (common->nltype == NLTYPE_ANY)6870move_back(common, NULL, FALSE);6871else6872OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));68736874loop = LABEL();6875common->ff_newline_shortcut = loop;68766877#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD6878if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))6879{6880if (common->nltype == NLTYPE_ANYCRLF)6881{6882fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);6883if (common->mode != PCRE2_JIT_COMPLETE)6884lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);68856886OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);6887OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));6888quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);6889}6890else6891{6892fast_forward_char_simd(common, common->newline, common->newline, 0);68936894OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));6895if (common->mode != PCRE2_JIT_COMPLETE)6896{6897OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);6898SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);6899}6900}6901}6902else6903#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */6904{6905read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);6906lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);6907if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)6908foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);6909check_newlinechar(common, common->nltype, &newline, FALSE);6910set_jumps(newline, loop);6911}69126913if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)6914{6915if (quit == NULL)6916{6917quit = JUMP(SLJIT_JUMP);6918JUMPHERE(foundcr);6919}69206921notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);6922OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);6923OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);6924OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);6925#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 326926OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);6927#endif6928OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);6929JUMPHERE(notfoundnl);6930JUMPHERE(quit);6931}69326933if (lastchar)6934JUMPHERE(lastchar);6935JUMPHERE(firstchar);69366937if (common->match_end_ptr != 0)6938OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);6939}69406941static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);69426943static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)6944{6945DEFINE_COMPILER;6946const sljit_u8 *start_bits = common->re->start_bitmap;6947struct sljit_label *start;6948struct sljit_jump *partial_quit;6949#if PCRE2_CODE_UNIT_WIDTH != 86950struct sljit_jump *found = NULL;6951#endif6952jump_list *matches = NULL;69536954if (common->match_end_ptr != 0)6955{6956OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);6957OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);6958OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));6959OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);6960SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);6961}69626963start = LABEL();69646965partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);6966if (common->mode == PCRE2_JIT_COMPLETE)6967add_jump(compiler, &common->failed_match, partial_quit);69686969OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);6970OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));69716972if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))6973{6974#if PCRE2_CODE_UNIT_WIDTH != 86975if ((start_bits[31] & 0x80) != 0)6976found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);6977else6978CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);6979#elif defined SUPPORT_UNICODE6980if (common->utf && is_char7_bitset(start_bits, FALSE))6981CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);6982#endif6983OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);6984OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);6985OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);6986if (!HAS_VIRTUAL_REGISTERS)6987{6988OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);6989OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);6990}6991else6992{6993OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);6994OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);6995}6996JUMPTO(SLJIT_ZERO, start);6997}6998else6999set_jumps(matches, start);70007001#if PCRE2_CODE_UNIT_WIDTH != 87002if (found != NULL)7003JUMPHERE(found);7004#endif70057006OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));70077008if (common->mode != PCRE2_JIT_COMPLETE)7009JUMPHERE(partial_quit);70107011if (common->match_end_ptr != 0)7012OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);7013}70147015static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)7016{7017DEFINE_COMPILER;7018struct sljit_label *loop;7019struct sljit_jump *toolong;7020struct sljit_jump *already_found;7021struct sljit_jump *found;7022struct sljit_jump *found_oc = NULL;7023jump_list *not_found = NULL;7024sljit_u32 oc, bit;70257026SLJIT_ASSERT(common->req_char_ptr != 0);7027OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);7028OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);7029toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);7030already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);70317032if (has_firstchar)7033OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));7034else7035OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);70367037oc = req_char;7038if (caseless)7039{7040oc = TABLE_GET(req_char, common->fcc, req_char);7041#if defined SUPPORT_UNICODE7042if (req_char > 127 && (common->utf || common->ucp))7043oc = UCD_OTHERCASE(req_char);7044#endif7045}70467047#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD7048if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)7049{7050not_found = fast_requested_char_simd(common, req_char, oc);7051}7052else7053#endif7054{7055loop = LABEL();7056add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));70577058OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);70597060if (req_char == oc)7061found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);7062else7063{7064bit = req_char ^ oc;7065if (is_powerof2(bit))7066{7067OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);7068found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);7069}7070else7071{7072found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);7073found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);7074}7075}7076OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));7077JUMPTO(SLJIT_JUMP, loop);70787079JUMPHERE(found);7080if (found_oc)7081JUMPHERE(found_oc);7082}70837084OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);70857086JUMPHERE(already_found);7087JUMPHERE(toolong);7088return not_found;7089}70907091static void do_revertframes(compiler_common *common)7092{7093DEFINE_COMPILER;7094struct sljit_jump *jump;7095struct sljit_label *mainloop;70967097sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);7098GET_LOCAL_BASE(TMP1, 0, 0);70997100/* Drop frames until we reach STACK_TOP. */7101mainloop = LABEL();7102OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));7103OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);7104jump = JUMP(SLJIT_SIG_LESS_EQUAL);71057106OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);7107if (HAS_VIRTUAL_REGISTERS)7108{7109OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));7110OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));7111OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));7112}7113else7114{7115OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));7116OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));7117OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));7118OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);7119GET_LOCAL_BASE(TMP1, 0, 0);7120OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);7121}7122JUMPTO(SLJIT_JUMP, mainloop);71237124JUMPHERE(jump);7125sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z);7126jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */);7127/* End of reverting values. */7128OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);71297130JUMPHERE(jump);7131OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, TMP2, 0);7132if (HAS_VIRTUAL_REGISTERS)7133{7134OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));7135OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));7136}7137else7138{7139OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));7140OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));7141OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);7142}7143JUMPTO(SLJIT_JUMP, mainloop);7144}71457146#ifdef SUPPORT_UNICODE7147#define UCPCAT(bit) (1 << (bit))7148#define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2))7149#define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))7150#define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))7151#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)7152#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)7153#define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)7154#endif71557156static void check_wordboundary(compiler_common *common, BOOL ucp)7157{7158DEFINE_COMPILER;7159struct sljit_jump *skipread;7160jump_list *skipread_list = NULL;7161#ifdef SUPPORT_UNICODE7162struct sljit_label *valid_utf;7163jump_list *invalid_utf1 = NULL;7164#endif /* SUPPORT_UNICODE */7165jump_list *invalid_utf2 = NULL;7166#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE7167struct sljit_jump *jump;7168#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */71697170SLJIT_UNUSED_ARG(ucp);7171SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);71727173SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));7174sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);7175/* Get type of the previous char, and put it to TMP3. */7176OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);7177OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));7178OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);7179skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);71807181#ifdef SUPPORT_UNICODE7182if (common->invalid_utf)7183{7184peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);71857186if (common->mode != PCRE2_JIT_COMPLETE)7187{7188OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);7189OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);7190move_back(common, NULL, TRUE);7191check_start_used_ptr(common);7192OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);7193OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);7194}7195}7196else7197#endif /* SUPPORT_UNICODE */7198{7199if (common->mode == PCRE2_JIT_COMPLETE)7200peek_char_back(common, READ_CHAR_MAX, NULL);7201else7202{7203move_back(common, NULL, TRUE);7204check_start_used_ptr(common);7205read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);7206}7207}72087209/* Testing char type. */7210#ifdef SUPPORT_UNICODE7211if (ucp)7212{7213add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));7214OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);7215OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);7216OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO);7217}7218else7219#endif /* SUPPORT_UNICODE */7220{7221#if PCRE2_CODE_UNIT_WIDTH != 87222jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);7223#elif defined SUPPORT_UNICODE7224/* Here TMP3 has already been zeroed. */7225jump = NULL;7226if (common->utf)7227jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);7228#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */7229OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);7230OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);7231OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);7232#if PCRE2_CODE_UNIT_WIDTH != 87233JUMPHERE(jump);7234#elif defined SUPPORT_UNICODE7235if (jump != NULL)7236JUMPHERE(jump);7237#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */7238}7239JUMPHERE(skipread);72407241OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);7242check_str_end(common, &skipread_list);7243peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, &invalid_utf2);72447245/* Testing char type. This is a code duplication. */7246#ifdef SUPPORT_UNICODE72477248valid_utf = LABEL();72497250if (ucp)7251{7252add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));7253OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);7254OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);7255OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);7256}7257else7258#endif /* SUPPORT_UNICODE */7259{7260#if PCRE2_CODE_UNIT_WIDTH != 87261/* TMP2 may be destroyed by peek_char. */7262OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);7263jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);7264#elif defined SUPPORT_UNICODE7265OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);7266jump = NULL;7267if (common->utf)7268jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);7269#endif7270OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);7271OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);7272OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);7273#if PCRE2_CODE_UNIT_WIDTH != 87274JUMPHERE(jump);7275#elif defined SUPPORT_UNICODE7276if (jump != NULL)7277JUMPHERE(jump);7278#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */7279}7280set_jumps(skipread_list, LABEL());72817282OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);7283OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);7284OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);72857286#ifdef SUPPORT_UNICODE7287if (common->invalid_utf)7288{7289set_jumps(invalid_utf1, LABEL());72907291peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCAL1, NULL);7292CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);72937294OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);7295OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);7296OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);72977298set_jumps(invalid_utf2, LABEL());7299OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);7300OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);7301OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);7302}7303#endif /* SUPPORT_UNICODE */7304}73057306static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)7307{7308/* May destroy TMP1. */7309DEFINE_COMPILER;7310int ranges[MAX_CLASS_RANGE_SIZE];7311sljit_u8 bit, cbit, all;7312int i, byte, length = 0;73137314bit = bits[0] & 0x1;7315/* All bits will be zero or one (since bit is zero or one). */7316all = (sljit_u8)-bit;73177318for (i = 0; i < 256; )7319{7320byte = i >> 3;7321if ((i & 0x7) == 0 && bits[byte] == all)7322i += 8;7323else7324{7325cbit = (bits[byte] >> (i & 0x7)) & 0x1;7326if (cbit != bit)7327{7328if (length >= MAX_CLASS_RANGE_SIZE)7329return FALSE;7330ranges[length] = i;7331length++;7332bit = cbit;7333all = (sljit_u8)-cbit; /* sign extend bit into byte */7334}7335i++;7336}7337}73387339if (((bit == 0) && nclass) || ((bit == 1) && !nclass))7340{7341if (length >= MAX_CLASS_RANGE_SIZE)7342return FALSE;7343ranges[length] = 256;7344length++;7345}73467347if (length < 0 || length > 4)7348return FALSE;73497350bit = bits[0] & 0x1;7351if (invert) bit ^= 0x1;73527353/* No character is accepted. */7354if (length == 0 && bit == 0)7355add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));73567357switch(length)7358{7359case 0:7360/* When bit != 0, all characters are accepted. */7361return TRUE;73627363case 1:7364add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));7365return TRUE;73667367case 2:7368if (ranges[0] + 1 != ranges[1])7369{7370OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);7371add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));7372}7373else7374add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));7375return TRUE;73767377case 3:7378if (bit != 0)7379{7380add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));7381if (ranges[0] + 1 != ranges[1])7382{7383OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);7384add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));7385}7386else7387add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));7388return TRUE;7389}73907391add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));7392if (ranges[1] + 1 != ranges[2])7393{7394OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);7395add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));7396}7397else7398add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));7399return TRUE;74007401case 4:7402if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])7403&& (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]7404&& (ranges[1] & (ranges[2] - ranges[0])) == 07405&& is_powerof2(ranges[2] - ranges[0]))7406{7407SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);7408OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);7409if (ranges[2] + 1 != ranges[3])7410{7411OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);7412add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));7413}7414else7415add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));7416return TRUE;7417}74187419if (bit != 0)7420{7421i = 0;7422if (ranges[0] + 1 != ranges[1])7423{7424OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);7425add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));7426i = ranges[0];7427}7428else7429add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));74307431if (ranges[2] + 1 != ranges[3])7432{7433OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);7434add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));7435}7436else7437add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));7438return TRUE;7439}74407441OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);7442add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));7443if (ranges[1] + 1 != ranges[2])7444{7445OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);7446add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));7447}7448else7449add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));7450return TRUE;74517452default:7453SLJIT_UNREACHABLE();7454return FALSE;7455}7456}74577458static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)7459{7460/* May destroy TMP1. */7461DEFINE_COMPILER;7462uint16_t char_list[MAX_CLASS_CHARS_SIZE];7463uint8_t byte;7464sljit_s32 type;7465int i, j, k, len, c;74667467if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))7468return FALSE;74697470len = 0;74717472for (i = 0; i < 32; i++)7473{7474byte = bits[i];74757476if (nclass)7477byte = (sljit_u8)~byte;74787479j = 0;7480while (byte != 0)7481{7482if (byte & 0x1)7483{7484c = i * 8 + j;74857486k = len;74877488if ((c & 0x20) != 0)7489{7490for (k = 0; k < len; k++)7491if (char_list[k] == c - 0x20)7492{7493char_list[k] |= 0x120;7494break;7495}7496}74977498if (k == len)7499{7500if (len >= MAX_CLASS_CHARS_SIZE)7501return FALSE;75027503char_list[len++] = (uint16_t) c;7504}7505}75067507byte >>= 1;7508j++;7509}7510}75117512if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */75137514i = 0;7515j = 0;75167517if (char_list[0] == 0)7518{7519i++;7520OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);7521OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);7522}7523else7524OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);75257526while (i < len)7527{7528if ((char_list[i] & 0x100) != 0)7529j++;7530else7531{7532OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);7533SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);7534}7535i++;7536}75377538if (j != 0)7539{7540OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);75417542for (i = 0; i < len; i++)7543if ((char_list[i] & 0x100) != 0)7544{7545j--;7546OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);7547SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);7548}7549}75507551if (invert)7552nclass = !nclass;75537554type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;7555add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));7556return TRUE;7557}75587559static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)7560{7561/* May destroy TMP1. */7562if (optimize_class_ranges(common, bits, nclass, invert, backtracks))7563return TRUE;7564return optimize_class_chars(common, bits, nclass, invert, backtracks);7565}75667567static void check_anynewline(compiler_common *common)7568{7569/* Check whether TMP1 contains a newline character. TMP2 destroyed. */7570DEFINE_COMPILER;75717572sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);75737574#ifdef EBCDIC7575OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_LF);7576OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);7577OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_VT);7578OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7579OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_FF);7580OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7581OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_CR);7582OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7583OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NEL);7584#else7585OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, CHAR_LF);7586OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR - CHAR_LF);7587OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);7588OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NEL - CHAR_LF);7589#endif7590#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 327591#if PCRE2_CODE_UNIT_WIDTH == 87592if (common->utf)7593{7594#endif7595OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7596OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);7597OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - CHAR_LF);7598#if PCRE2_CODE_UNIT_WIDTH == 87599}7600#endif7601#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */7602OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);7603OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);7604}76057606static void check_hspace(compiler_common *common)7607{7608/* Check whether TMP1 contains a newline character. TMP2 destroyed. */7609DEFINE_COMPILER;76107611sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);76127613OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_HT);7614OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);7615OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_SPACE);7616OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7617OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NBSP);7618#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 327619#if PCRE2_CODE_UNIT_WIDTH == 87620if (common->utf)7621{7622#endif7623OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7624OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);7625OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7626OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);7627OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7628OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);7629OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200a - 0x2000);7630OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);7631OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);7632OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7633OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);7634OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7635OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);7636#if PCRE2_CODE_UNIT_WIDTH == 87637}7638#endif7639#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */7640OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);76417642OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);7643}76447645static void check_vspace(compiler_common *common)7646{7647/* Check whether TMP1 contains a newline character. TMP2 destroyed. */7648DEFINE_COMPILER;76497650sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);76517652#ifdef EBCDIC7653OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_LF);7654OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);7655OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_VT);7656OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7657OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_FF);7658OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7659OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_CR);7660OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7661OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NEL);7662#else7663OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, CHAR_LF);7664OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR - CHAR_LF);7665OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);7666OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NEL - CHAR_LF);7667#endif7668#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 327669#if PCRE2_CODE_UNIT_WIDTH == 87670if (common->utf)7671{7672#endif7673OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);7674OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);7675OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - CHAR_LF);7676#if PCRE2_CODE_UNIT_WIDTH == 87677}7678#endif7679#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */7680OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);76817682OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);7683}76847685static void do_casefulcmp(compiler_common *common)7686{7687DEFINE_COMPILER;7688struct sljit_jump *jump;7689struct sljit_label *label;7690int char1_reg;7691int char2_reg;76927693if (HAS_VIRTUAL_REGISTERS)7694{7695char1_reg = STR_END;7696char2_reg = STACK_TOP;7697}7698else7699{7700char1_reg = TMP3;7701char2_reg = RETURN_ADDR;7702}77037704/* Update ref_update_local_size() when this changes. */7705SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));7706sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);7707OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);77087709if (char1_reg == STR_END)7710{7711OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);7712OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);7713}77147715if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)7716{7717label = LABEL();7718sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));7719sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));7720jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);7721OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));7722JUMPTO(SLJIT_NOT_ZERO, label);77237724JUMPHERE(jump);7725OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);7726}7727else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)7728{7729OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));7730OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));77317732label = LABEL();7733sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));7734sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));7735jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);7736OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));7737JUMPTO(SLJIT_NOT_ZERO, label);77387739JUMPHERE(jump);7740OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);7741OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));7742}7743else7744{7745label = LABEL();7746OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);7747OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);7748OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));7749OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));7750jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);7751OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));7752JUMPTO(SLJIT_NOT_ZERO, label);77537754JUMPHERE(jump);7755OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);7756}77577758if (char1_reg == STR_END)7759{7760OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);7761OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);7762}77637764OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);7765}77667767static void do_caselesscmp(compiler_common *common)7768{7769DEFINE_COMPILER;7770struct sljit_jump *jump;7771struct sljit_label *label;7772int char1_reg = STR_END;7773int char2_reg;7774int lcc_table;7775int opt_type = 0;77767777if (HAS_VIRTUAL_REGISTERS)7778{7779char2_reg = STACK_TOP;7780lcc_table = STACK_LIMIT;7781}7782else7783{7784char2_reg = RETURN_ADDR;7785lcc_table = TMP3;7786}77877788if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)7789opt_type = 1;7790else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)7791opt_type = 2;77927793/* Update ref_update_local_size() when this changes. */7794SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));7795sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);7796OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);77977798OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, char1_reg, 0);77997800if (char2_reg == STACK_TOP)7801{7802OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);7803OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);7804}78057806OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);78077808if (opt_type == 1)7809{7810label = LABEL();7811sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));7812sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));7813}7814else if (opt_type == 2)7815{7816OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));7817OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));78187819label = LABEL();7820sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));7821sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));7822}7823else7824{7825label = LABEL();7826OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);7827OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);7828OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));7829}78307831#if PCRE2_CODE_UNIT_WIDTH != 87832jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);7833#endif7834OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);7835#if PCRE2_CODE_UNIT_WIDTH != 87836JUMPHERE(jump);7837jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);7838#endif7839OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);7840#if PCRE2_CODE_UNIT_WIDTH != 87841JUMPHERE(jump);7842#endif78437844if (opt_type == 0)7845OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));78467847jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);7848OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));7849JUMPTO(SLJIT_NOT_ZERO, label);78507851JUMPHERE(jump);7852OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);78537854if (opt_type == 2)7855OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));78567857if (char2_reg == STACK_TOP)7858{7859OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);7860OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);7861}78627863OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);7864OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);7865}78667867#include "pcre2_jit_char_inc.h"78687869static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)7870{7871DEFINE_COMPILER;7872struct sljit_jump *jump[4];78737874switch(type)7875{7876case OP_SOD:7877if (HAS_VIRTUAL_REGISTERS)7878{7879OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);7880OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));7881}7882else7883OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));7884add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));7885return cc;78867887case OP_SOM:7888if (HAS_VIRTUAL_REGISTERS)7889{7890OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);7891OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));7892}7893else7894OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));7895add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));7896return cc;78977898case OP_NOT_WORD_BOUNDARY:7899case OP_WORD_BOUNDARY:7900case OP_NOT_UCP_WORD_BOUNDARY:7901case OP_UCP_WORD_BOUNDARY:7902add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL));7903#ifdef SUPPORT_UNICODE7904if (common->invalid_utf)7905{7906add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));7907return cc;7908}7909#endif /* SUPPORT_UNICODE */7910sljit_set_current_flags(compiler, SLJIT_SET_Z);7911add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO));7912return cc;79137914case OP_EODN:7915/* Requires rather complex checks. */7916jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);7917if (common->nltype == NLTYPE_FIXED && common->newline > 255)7918{7919OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));7920OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));7921if (common->mode == PCRE2_JIT_COMPLETE)7922add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));7923else7924{7925jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);7926OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);7927OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);7928OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);7929OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);7930add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));7931check_partial(common, TRUE);7932add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));7933JUMPHERE(jump[1]);7934}7935OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));7936add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));7937add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));7938}7939else if (common->nltype == NLTYPE_FIXED)7940{7941OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));7942OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));7943add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));7944add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));7945}7946else7947{7948OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));7949jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);7950OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));7951OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);7952jump[2] = JUMP(SLJIT_GREATER);7953add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);7954/* Equal. */7955OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));7956jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);7957add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));79587959JUMPHERE(jump[1]);7960if (common->nltype == NLTYPE_ANYCRLF)7961{7962OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));7963add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));7964add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));7965}7966else7967{7968OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);7969read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);7970add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));7971add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));7972sljit_set_current_flags(compiler, SLJIT_SET_Z);7973add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));7974OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);7975}7976JUMPHERE(jump[2]);7977JUMPHERE(jump[3]);7978}7979JUMPHERE(jump[0]);7980if (common->mode != PCRE2_JIT_COMPLETE)7981check_partial(common, TRUE);7982return cc;79837984case OP_EOD:7985add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));7986if (common->mode != PCRE2_JIT_COMPLETE)7987check_partial(common, TRUE);7988return cc;79897990case OP_DOLL:7991if (HAS_VIRTUAL_REGISTERS)7992{7993OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);7994OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);7995}7996else7997OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);7998add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));79998000if (!common->endonly)8001compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);8002else8003{8004add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));8005check_partial(common, FALSE);8006}8007return cc;80088009case OP_DOLLM:8010jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);8011if (HAS_VIRTUAL_REGISTERS)8012{8013OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);8014OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);8015}8016else8017OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);8018add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));8019check_partial(common, FALSE);8020jump[0] = JUMP(SLJIT_JUMP);8021JUMPHERE(jump[1]);80228023if (common->nltype == NLTYPE_FIXED && common->newline > 255)8024{8025OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));8026OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));8027if (common->mode == PCRE2_JIT_COMPLETE)8028add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));8029else8030{8031jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);8032/* STR_PTR = STR_END - IN_UCHARS(1) */8033add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));8034check_partial(common, TRUE);8035add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));8036JUMPHERE(jump[1]);8037}80388039OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));8040add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));8041add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));8042}8043else8044{8045peek_char(common, common->nlmax, TMP3, 0, NULL);8046check_newlinechar(common, common->nltype, backtracks, FALSE);8047}8048JUMPHERE(jump[0]);8049return cc;80508051case OP_CIRC:8052if (HAS_VIRTUAL_REGISTERS)8053{8054OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);8055OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));8056add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));8057OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);8058add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));8059}8060else8061{8062OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));8063add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));8064OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);8065add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));8066}8067return cc;80688069case OP_CIRCM:8070/* TMP2 might be used by peek_char_back. */8071if (HAS_VIRTUAL_REGISTERS)8072{8073OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);8074OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));8075jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);8076OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);8077}8078else8079{8080OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));8081jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);8082OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);8083}8084add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));8085jump[0] = JUMP(SLJIT_JUMP);8086JUMPHERE(jump[1]);80878088if (!common->alt_circumflex)8089add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));80908091if (common->nltype == NLTYPE_FIXED && common->newline > 255)8092{8093OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));8094add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));8095OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));8096OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));8097add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));8098add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));8099}8100else8101{8102peek_char_back(common, common->nlmax, backtracks);8103check_newlinechar(common, common->nltype, backtracks, FALSE);8104}8105JUMPHERE(jump[0]);8106return cc;8107}8108SLJIT_UNREACHABLE();8109return cc;8110}81118112/* Forward definitions. */8113static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);8114static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);81158116#define PUSH_BACKTRACK(size, ccstart, error) \8117do \8118{ \8119backtrack = sljit_alloc_memory(compiler, (size)); \8120if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \8121return error; \8122memset(backtrack, 0, size); \8123backtrack->prev = parent->top; \8124backtrack->cc = (ccstart); \8125parent->top = backtrack; \8126} \8127while (0)81288129#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \8130do \8131{ \8132backtrack = sljit_alloc_memory(compiler, (size)); \8133if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \8134return; \8135memset(backtrack, 0, size); \8136backtrack->prev = parent->top; \8137backtrack->cc = (ccstart); \8138parent->top = backtrack; \8139} \8140while (0)81418142#define BACKTRACK_AS(type) ((type *)backtrack)81438144static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)8145{8146/* The OVECTOR offset goes to TMP2. */8147DEFINE_COMPILER;8148int count = GET2(cc, 1 + IMM2_SIZE);8149PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;8150unsigned int offset;8151jump_list *found = NULL;81528153SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);81548155OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));81568157count--;8158while (count-- > 0)8159{8160offset = GET2(slot, 0) << 1;8161GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));8162add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));8163slot += common->name_entry_size;8164}81658166offset = GET2(slot, 0) << 1;8167GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));8168if (backtracks != NULL && !common->unset_backref)8169add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));81708171set_jumps(found, LABEL());8172}81738174static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)8175{8176DEFINE_COMPILER;8177BOOL ref = (*cc == OP_REF || *cc == OP_REFI);8178int offset = 0;8179struct sljit_jump *jump = NULL;8180struct sljit_jump *partial;8181struct sljit_jump *nopartial;8182#if defined SUPPORT_UNICODE8183struct sljit_label *loop;8184struct sljit_label *caseless_loop;8185struct sljit_jump *turkish_ascii_i = NULL;8186struct sljit_jump *turkish_non_ascii_i = NULL;8187jump_list *no_match = NULL;8188int source_reg = COUNT_MATCH;8189int source_end_reg = ARGUMENTS;8190int char1_reg = STACK_LIMIT;8191PCRE2_UCHAR refi_flag = 0;81928193if (*cc == OP_REFI || *cc == OP_DNREFI)8194refi_flag = cc[PRIV(OP_lengths)[*cc] - 1];8195#endif /* SUPPORT_UNICODE */81968197if (ref)8198{8199offset = GET2(cc, 1) << 1;8200OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));8201/* OVECTOR(1) contains the "string begin - 1" constant. */8202if (withchecks && !common->unset_backref)8203add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));8204}8205else8206OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);82078208#if defined SUPPORT_UNICODE8209if ((common->utf || common->ucp) && (*cc == OP_REFI || *cc == OP_DNREFI))8210{8211/* Update ref_update_local_size() when this changes. */8212SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));82138214if (ref)8215OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));8216else8217OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));82188219if (withchecks && emptyfail)8220add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));82218222OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, source_reg, 0);8223OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, source_end_reg, 0);8224OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, char1_reg, 0);82258226OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);8227OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);82288229loop = LABEL();8230jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);8231partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);82328233/* Read original character. It must be a valid UTF character. */8234OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);8235OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);82368237read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);82388239OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);8240OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);8241OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);82428243/* Read second character. */8244read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);82458246CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);82478248if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) ==8249REFI_FLAG_TURKISH_CASING)8250{8251OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x20);8252turkish_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x69);82538254OP2(SLJIT_OR, SLJIT_TMP_DEST_REG, 0, char1_reg, 0, SLJIT_IMM, 0x1);8255turkish_non_ascii_i = CMP(SLJIT_EQUAL, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x131);8256}82578258OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);82598260add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));82618262OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);8263OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);8264OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);82658266OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));82678268OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));8269OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));8270OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);8271CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);82728273add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));8274OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);8275OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));82768277if (refi_flag & REFI_FLAG_CASELESS_RESTRICT)8278add_jump(compiler, &no_match, CMP(SLJIT_LESS | SLJIT_32, SLJIT_MEM1(TMP2), 0, SLJIT_IMM, 128));82798280caseless_loop = LABEL();8281OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);8282OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));8283OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);8284JUMPTO(SLJIT_EQUAL, loop);8285JUMPTO(SLJIT_LESS, caseless_loop);82868287if ((refi_flag & (REFI_FLAG_TURKISH_CASING|REFI_FLAG_CASELESS_RESTRICT)) ==8288REFI_FLAG_TURKISH_CASING)8289{8290add_jump(compiler, &no_match, JUMP(SLJIT_JUMP));8291JUMPHERE(turkish_ascii_i);82928293OP2(SLJIT_LSHR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5);8294OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);8295OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);8296OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x130);8297CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);82988299add_jump(compiler, &no_match, JUMP(SLJIT_JUMP));8300JUMPHERE(turkish_non_ascii_i);83018302OP2(SLJIT_AND, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);8303OP2(SLJIT_XOR, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 1);8304OP2(SLJIT_SHL, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 5);8305OP2(SLJIT_ADD, char1_reg, 0, char1_reg, 0, SLJIT_IMM, 0x49);8306CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);8307}83088309set_jumps(no_match, LABEL());8310if (common->mode == PCRE2_JIT_COMPLETE)8311JUMPHERE(partial);83128313OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);8314OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);8315OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);8316add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));83178318if (common->mode != PCRE2_JIT_COMPLETE)8319{8320JUMPHERE(partial);8321OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);8322OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);8323OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);83248325check_partial(common, FALSE);8326add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));8327}83288329JUMPHERE(jump);8330OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);8331OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);8332OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);8333return;8334}8335else8336#endif /* SUPPORT_UNICODE */8337{8338if (ref)8339OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);8340else8341OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);83428343if (withchecks)8344jump = JUMP(SLJIT_ZERO);83458346OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);8347partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);8348if (common->mode == PCRE2_JIT_COMPLETE)8349add_jump(compiler, backtracks, partial);83508351add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));8352add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));83538354if (common->mode != PCRE2_JIT_COMPLETE)8355{8356nopartial = JUMP(SLJIT_JUMP);8357JUMPHERE(partial);8358/* TMP2 -= STR_END - STR_PTR */8359OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);8360OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);8361partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);8362OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);8363add_jump(compiler, (*cc == OP_REF || *cc == OP_DNREF) ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));8364add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));8365JUMPHERE(partial);8366check_partial(common, FALSE);8367add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));8368JUMPHERE(nopartial);8369}8370}83718372if (jump != NULL)8373{8374if (emptyfail)8375add_jump(compiler, backtracks, jump);8376else8377JUMPHERE(jump);8378}8379}83808381static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)8382{8383DEFINE_COMPILER;8384BOOL ref = (*cc == OP_REF || *cc == OP_REFI);8385backtrack_common *backtrack;8386PCRE2_UCHAR type;8387int local_start = LOCAL2;8388int offset = 0;8389struct sljit_label *label;8390struct sljit_jump *zerolength;8391struct sljit_jump *jump = NULL;8392PCRE2_SPTR ccbegin = cc;8393int min = 0, max = 0;8394BOOL minimize;83958396PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);83978398if (ref)8399offset = GET2(cc, 1) << 1;8400else8401cc += IMM2_SIZE;84028403if (*ccbegin == OP_REFI || *ccbegin == OP_DNREFI)8404{8405cc += 1;8406#ifdef SUPPORT_UNICODE8407if (common->utf || common->ucp)8408local_start = LOCAL3;8409#endif8410}84118412type = cc[1 + IMM2_SIZE];84138414SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);8415/* Update ref_update_local_size() when this changes. */8416SLJIT_ASSERT(local_start + 2 * SSIZE_OF(sw) <= (int)LOCAL0 + common->locals_size);8417minimize = (type & 0x1) != 0;8418switch(type)8419{8420case OP_CRSTAR:8421case OP_CRMINSTAR:8422min = 0;8423max = 0;8424cc += 1 + IMM2_SIZE + 1;8425break;8426case OP_CRPLUS:8427case OP_CRMINPLUS:8428min = 1;8429max = 0;8430cc += 1 + IMM2_SIZE + 1;8431break;8432case OP_CRQUERY:8433case OP_CRMINQUERY:8434min = 0;8435max = 1;8436cc += 1 + IMM2_SIZE + 1;8437break;8438case OP_CRRANGE:8439case OP_CRMINRANGE:8440min = GET2(cc, 1 + IMM2_SIZE + 1);8441max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);8442cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;8443break;8444default:8445SLJIT_UNREACHABLE();8446break;8447}84488449if (!minimize)8450{8451if (min == 0)8452{8453allocate_stack(common, 2);8454if (ref)8455OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));8456OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);8457OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);8458/* Temporary release of STR_PTR. */8459OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));8460/* Handles both invalid and empty cases. Since the minimum repeat,8461is zero the invalid case is basically the same as an empty case. */8462if (ref)8463zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));8464else8465{8466compile_dnref_search(common, ccbegin, NULL);8467OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);8468OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0);8469zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));8470}8471/* Restore if not zero length. */8472OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));8473}8474else8475{8476allocate_stack(common, 1);8477if (ref)8478OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));8479OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);84808481if (ref)8482{8483if (!common->unset_backref)8484add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));8485zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));8486}8487else8488{8489compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);8490OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);8491OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw), TMP2, 0);8492zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));8493}8494}84958496if (min > 1 || max > 1)8497OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, SLJIT_IMM, 0);84988499label = LABEL();8500if (!ref)8501OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), local_start + SSIZE_OF(sw));8502compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE);85038504if (min > 1 || max > 1)8505{8506OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), local_start);8507OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);8508OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), local_start, TMP1, 0);8509if (min > 1)8510CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);8511if (max > 1)8512{8513jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);8514allocate_stack(common, 1);8515OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);8516JUMPTO(SLJIT_JUMP, label);8517JUMPHERE(jump);8518}8519}85208521if (max == 0)8522{8523/* Includes min > 1 case as well. */8524allocate_stack(common, 1);8525OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);8526JUMPTO(SLJIT_JUMP, label);8527}85288529JUMPHERE(zerolength);8530BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();85318532count_match(common);8533return cc;8534}85358536allocate_stack(common, ref ? 2 : 3);8537if (ref)8538OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));8539OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);8540if (type != OP_CRMINSTAR)8541OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);85428543if (min == 0)8544{8545/* Handles both invalid and empty cases. Since the minimum repeat,8546is zero the invalid case is basically the same as an empty case. */8547if (ref)8548zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));8549else8550{8551compile_dnref_search(common, ccbegin, NULL);8552OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);8553OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);8554zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));8555}8556/* Length is non-zero, we can match real repeats. */8557OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);8558jump = JUMP(SLJIT_JUMP);8559}8560else8561{8562if (ref)8563{8564if (!common->unset_backref)8565add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));8566zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));8567}8568else8569{8570compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);8571OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);8572OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);8573zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));8574}8575}85768577BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();8578if (max > 0)8579add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));85808581if (!ref)8582OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));8583compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE);8584OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);85858586if (min > 1)8587{8588OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));8589OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);8590OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);8591CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);8592}8593else if (max > 0)8594OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);85958596if (jump != NULL)8597JUMPHERE(jump);8598JUMPHERE(zerolength);85998600count_match(common);8601return cc;8602}86038604static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)8605{8606DEFINE_COMPILER;8607backtrack_common *backtrack;8608recurse_entry *entry = common->entries;8609recurse_entry *prev = NULL;8610PCRE2_SPTR end;8611sljit_sw start = GET(cc, 1);8612sljit_uw arg_size;8613PCRE2_SPTR start_cc;8614BOOL needs_control_head;86158616end = cc + 1 + LINK_SIZE;86178618while (*end == OP_CREF)8619end += 1 + IMM2_SIZE;86208621PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, end);86228623/* Inlining simple patterns. */8624if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)8625{8626start_cc = common->start + start;8627compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);8628BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;8629return end;8630}86318632cc += 1 + LINK_SIZE;8633arg_size = (sljit_uw)IN_UCHARS(end - cc);8634while (entry != NULL)8635{8636if (entry->start == start && entry->arg_size == arg_size8637&& (arg_size == 0 || memcmp(cc, entry->arg_start, arg_size) == 0))8638break;8639prev = entry;8640entry = entry->next;8641}86428643if (entry == NULL)8644{8645entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));8646if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))8647return end;8648entry->next = NULL;8649entry->entry_label = NULL;8650entry->backtrack_label = NULL;8651entry->entry_calls = NULL;8652entry->backtrack_calls = NULL;8653entry->start = start;8654entry->arg_start = cc;8655entry->arg_size = arg_size;86568657if (prev != NULL)8658prev->next = entry;8659else8660common->entries = entry;8661}86628663BACKTRACK_AS(recurse_backtrack)->entry = entry;86648665if (entry->entry_label == NULL)8666add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));8667else8668JUMPTO(SLJIT_FAST_CALL, entry->entry_label);8669/* Leave if the match is failed. */8670add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));8671BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();8672return end;8673}86748675static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)8676{8677PCRE2_SPTR begin;8678PCRE2_SIZE *ovector;8679sljit_u32 oveccount, capture_top;86808681if (arguments->callout == NULL)8682return 0;86838684SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);86858686begin = arguments->begin;8687ovector = (PCRE2_SIZE*)(callout_block + 1);8688oveccount = callout_block->capture_top;86898690SLJIT_ASSERT(oveccount >= 1);86918692callout_block->version = 2;8693callout_block->callout_flags = 0;86948695/* Offsets in subject. */8696callout_block->subject_length = arguments->end - arguments->begin;8697callout_block->start_match = jit_ovector[0] - begin;8698callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;8699callout_block->subject = begin;87008701/* Convert and copy the JIT offset vector to the ovector array. */8702callout_block->capture_top = 1;8703callout_block->offset_vector = ovector;87048705ovector[0] = PCRE2_UNSET;8706ovector[1] = PCRE2_UNSET;8707ovector += 2;8708jit_ovector += 2;8709capture_top = 1;87108711/* Convert pointers to sizes. */8712while (--oveccount != 0)8713{8714capture_top++;87158716ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);8717ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);87188719if (ovector[0] != PCRE2_UNSET)8720callout_block->capture_top = capture_top;87218722ovector += 2;8723jit_ovector += 2;8724}87258726return (arguments->callout)(callout_block, arguments->callout_data);8727}87288729#define CALLOUT_ARG_OFFSET(arg) \8730SLJIT_OFFSETOF(pcre2_callout_block, arg)87318732static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)8733{8734DEFINE_COMPILER;8735backtrack_common *backtrack;8736sljit_s32 mov_opcode;8737unsigned int callout_length = (*cc == OP_CALLOUT)8738? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);8739sljit_sw value1;8740sljit_sw value2;8741sljit_sw value3;8742sljit_s32 callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw); /* top_bracket is uint16 so maximum is 1MiB */87438744PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);87458746callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);87478748allocate_stack(common, callout_arg_size);87498750SLJIT_ASSERT(common->capture_last_ptr != 0);8751OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);8752OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);8753value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;8754OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);8755OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);8756OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);87578758/* These pointer sized fields temporarly stores internal variables. */8759OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);87608761if (common->mark_ptr != 0)8762OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));8763mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;8764OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));8765OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));87668767if (*cc == OP_CALLOUT)8768{8769value1 = 0;8770value2 = 0;8771value3 = 0;8772}8773else8774{8775value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);8776value2 = (callout_length - (1 + 4*LINK_SIZE + 2));8777value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));8778}87798780OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);8781OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);8782OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);8783OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);87848785SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);87868787/* Needed to save important temporary registers. */8788SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));8789OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);8790/* SLJIT_R0 = arguments */8791OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);8792GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);8793sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));8794OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);8795free_stack(common, callout_arg_size);87968797/* Check return value. */8798OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);8799add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER));8800if (common->abort_label == NULL)8801add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);8802else8803JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);8804return cc + callout_length;8805}88068807#undef CALLOUT_ARG_SIZE8808#undef CALLOUT_ARG_OFFSET88098810static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)8811{8812DEFINE_COMPILER;8813backtrack_common *backtrack = NULL;8814jump_list **reverse_failed;8815unsigned int lmin, lmax;8816#ifdef SUPPORT_UNICODE8817struct sljit_jump *jump;8818struct sljit_label *label;8819#endif88208821SLJIT_ASSERT(parent->top == NULL);88228823if (*cc == OP_REVERSE)8824{8825reverse_failed = &parent->own_backtracks;8826lmin = GET2(cc, 1);8827lmax = lmin;8828cc += 1 + IMM2_SIZE;88298830SLJIT_ASSERT(lmin > 0);8831}8832else8833{8834SLJIT_ASSERT(*cc == OP_VREVERSE);8835PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, cc + 1 + 2 * IMM2_SIZE);88368837reverse_failed = &backtrack->own_backtracks;8838lmin = GET2(cc, 1);8839lmax = GET2(cc, 1 + IMM2_SIZE);8840cc += 1 + 2 * IMM2_SIZE;88418842SLJIT_ASSERT(lmin < lmax);8843}88448845if (HAS_VIRTUAL_REGISTERS)8846{8847OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);8848OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));8849}8850else8851OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));88528853#ifdef SUPPORT_UNICODE8854if (common->utf)8855{8856if (lmin > 0)8857{8858OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);8859label = LABEL();8860add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));8861move_back(common, reverse_failed, FALSE);8862OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);8863JUMPTO(SLJIT_NOT_ZERO, label);8864}88658866if (lmin < lmax)8867{8868OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);88698870OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);8871label = LABEL();8872jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);8873move_back(common, reverse_failed, FALSE);8874OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);8875JUMPTO(SLJIT_NOT_ZERO, label);88768877JUMPHERE(jump);8878OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);8879}8880}8881else8882#endif8883{8884if (lmin > 0)8885{8886OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));8887add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));8888}88898890if (lmin < lmax)8891{8892OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);88938894OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));8895OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);8896SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);88978898OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);8899}8900}89018902check_start_used_ptr(common);89038904if (lmin < lmax)8905BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();89068907return cc;8908}89098910static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)8911{8912while (TRUE)8913{8914switch (*cc)8915{8916case OP_CALLOUT_STR:8917cc += GET(cc, 1 + 2*LINK_SIZE);8918break;89198920case OP_NOT_WORD_BOUNDARY:8921case OP_WORD_BOUNDARY:8922case OP_CIRC:8923case OP_CIRCM:8924case OP_DOLL:8925case OP_DOLLM:8926case OP_CALLOUT:8927case OP_ALT:8928case OP_NOT_UCP_WORD_BOUNDARY:8929case OP_UCP_WORD_BOUNDARY:8930cc += PRIV(OP_lengths)[*cc];8931break;89328933case OP_KET:8934return FALSE;89358936default:8937return TRUE;8938}8939}8940}89418942static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)8943{8944DEFINE_COMPILER;8945int framesize;8946int extrasize;8947BOOL local_quit_available = FALSE;8948BOOL needs_control_head;8949BOOL end_block_size = 0;8950BOOL has_vreverse;8951int private_data_ptr;8952backtrack_common altbacktrack;8953PCRE2_SPTR ccbegin;8954PCRE2_UCHAR opcode;8955PCRE2_UCHAR bra = OP_BRA;8956jump_list *tmp = NULL;8957jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks;8958jump_list **found;8959/* Saving previous accept variables. */8960BOOL save_local_quit_available = common->local_quit_available;8961BOOL save_in_positive_assertion = common->in_positive_assertion;8962sljit_s32 save_restore_end_ptr = common->restore_end_ptr;8963then_trap_backtrack *save_then_trap = common->then_trap;8964struct sljit_label *save_quit_label = common->quit_label;8965struct sljit_label *save_accept_label = common->accept_label;8966jump_list *save_quit = common->quit;8967jump_list *save_positive_assertion_quit = common->positive_assertion_quit;8968jump_list *save_accept = common->accept;8969struct sljit_jump *jump;8970struct sljit_jump *brajump = NULL;89718972/* Assert captures then. */8973common->then_trap = NULL;89748975if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)8976{8977SLJIT_ASSERT(!conditional);8978bra = *cc;8979cc++;8980}89818982private_data_ptr = PRIVATE_DATA(cc);8983SLJIT_ASSERT(private_data_ptr != 0);8984framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);8985backtrack->framesize = framesize;8986backtrack->private_data_ptr = private_data_ptr;8987opcode = *cc;8988SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);8989found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;8990ccbegin = cc;8991cc += GET(cc, 1);89928993if (bra == OP_BRAMINZERO)8994{8995/* This is a braminzero backtrack path. */8996OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));8997free_stack(common, 1);8998brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);8999}90009001if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))9002end_block_size = 3;90039004if (framesize < 0)9005{9006extrasize = 1;9007if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))9008extrasize = 0;90099010extrasize += end_block_size;90119012if (needs_control_head)9013extrasize++;90149015if (framesize == no_frame)9016OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);90179018if (extrasize > 0)9019allocate_stack(common, extrasize);90209021if (needs_control_head)9022OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);90239024if (extrasize > 0)9025OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);90269027if (needs_control_head)9028{9029SLJIT_ASSERT(extrasize == end_block_size + 2);9030OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);9031OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);9032}9033}9034else9035{9036extrasize = (needs_control_head ? 3 : 2) + end_block_size;90379038OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);9039allocate_stack(common, framesize + extrasize);90409041OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9042OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);9043if (needs_control_head)9044OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);9045OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);90469047if (needs_control_head)9048{9049OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);9050OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);9051OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);9052}9053else9054OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);90559056init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);9057}90589059if (end_block_size > 0)9060{9061OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);9062OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);9063}90649065memset(&altbacktrack, 0, sizeof(backtrack_common));9066if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))9067{9068/* Control verbs cannot escape from these asserts. */9069local_quit_available = TRUE;9070common->restore_end_ptr = 0;9071common->local_quit_available = TRUE;9072common->quit_label = NULL;9073common->quit = NULL;9074}90759076common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);9077common->positive_assertion_quit = NULL;90789079while (1)9080{9081common->accept_label = NULL;9082common->accept = NULL;9083altbacktrack.top = NULL;9084altbacktrack.own_backtracks = NULL;90859086if (*ccbegin == OP_ALT && extrasize > 0)9087OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));90889089altbacktrack.cc = ccbegin;9090ccbegin += 1 + LINK_SIZE;90919092has_vreverse = (*ccbegin == OP_VREVERSE);9093if (*ccbegin == OP_REVERSE || has_vreverse)9094ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);90959096compile_matchingpath(common, ccbegin, cc, &altbacktrack);9097if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))9098{9099if (local_quit_available)9100{9101common->local_quit_available = save_local_quit_available;9102common->quit_label = save_quit_label;9103common->quit = save_quit;9104}9105common->in_positive_assertion = save_in_positive_assertion;9106common->restore_end_ptr = save_restore_end_ptr;9107common->then_trap = save_then_trap;9108common->accept_label = save_accept_label;9109common->positive_assertion_quit = save_positive_assertion_quit;9110common->accept = save_accept;9111return NULL;9112}91139114if (has_vreverse)9115{9116SLJIT_ASSERT(altbacktrack.top != NULL);9117add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));9118}91199120common->accept_label = LABEL();9121if (common->accept != NULL)9122set_jumps(common->accept, common->accept_label);91239124/* Reset stack. */9125if (framesize < 0)9126{9127if (framesize == no_frame)9128OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9129else if (extrasize > 0)9130free_stack(common, extrasize);91319132if (end_block_size > 0)9133OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));91349135if (needs_control_head)9136OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));9137}9138else9139{9140if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)9141{9142/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */9143OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));91449145if (end_block_size > 0)9146OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));91479148if (needs_control_head)9149OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));9150}9151else9152{9153OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);91549155if (end_block_size > 0)9156OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));91579158if (needs_control_head)9159OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));9160add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));9161OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));9162}9163}91649165if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)9166{9167/* We know that STR_PTR was stored on the top of the stack. */9168if (conditional)9169{9170if (extrasize > 0)9171OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1)));9172}9173else if (bra == OP_BRAZERO)9174{9175if (framesize < 0)9176OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));9177else9178{9179OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));9180OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));9181OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);9182}9183OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));9184OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);9185}9186else if (framesize >= 0)9187{9188/* For OP_BRA and OP_BRAMINZERO. */9189OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));9190}9191}9192add_jump(compiler, found, JUMP(SLJIT_JUMP));91939194compile_backtrackingpath(common, altbacktrack.top);9195if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))9196{9197if (local_quit_available)9198{9199common->local_quit_available = save_local_quit_available;9200common->quit_label = save_quit_label;9201common->quit = save_quit;9202}9203common->in_positive_assertion = save_in_positive_assertion;9204common->restore_end_ptr = save_restore_end_ptr;9205common->then_trap = save_then_trap;9206common->accept_label = save_accept_label;9207common->positive_assertion_quit = save_positive_assertion_quit;9208common->accept = save_accept;9209return NULL;9210}9211set_jumps(altbacktrack.own_backtracks, LABEL());92129213if (*cc != OP_ALT)9214break;92159216ccbegin = cc;9217cc += GET(cc, 1);9218}92199220if (local_quit_available)9221{9222SLJIT_ASSERT(common->positive_assertion_quit == NULL);9223/* Makes the check less complicated below. */9224common->positive_assertion_quit = common->quit;9225}92269227/* None of them matched. */9228if (common->positive_assertion_quit != NULL)9229{9230jump = JUMP(SLJIT_JUMP);9231set_jumps(common->positive_assertion_quit, LABEL());9232SLJIT_ASSERT(framesize != no_stack);9233if (framesize < 0)9234OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));9235else9236{9237OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9238add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));9239OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));9240}9241JUMPHERE(jump);9242}92439244if (end_block_size > 0)9245OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));92469247if (needs_control_head)9248OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));92499250if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)9251{9252/* Assert is failed. */9253if ((conditional && extrasize > 0) || bra == OP_BRAZERO)9254OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));92559256if (framesize < 0)9257{9258/* The topmost item should be 0. */9259if (bra == OP_BRAZERO)9260{9261if (extrasize >= 2)9262free_stack(common, extrasize - 1);9263OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);9264}9265else if (extrasize > 0)9266free_stack(common, extrasize);9267}9268else9269{9270OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));9271/* The topmost item should be 0. */9272if (bra == OP_BRAZERO)9273{9274free_stack(common, framesize + extrasize - 1);9275OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);9276}9277else9278free_stack(common, framesize + extrasize);9279OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);9280}9281jump = JUMP(SLJIT_JUMP);9282if (bra != OP_BRAZERO)9283add_jump(compiler, target, jump);92849285/* Assert is successful. */9286set_jumps(tmp, LABEL());9287if (framesize < 0)9288{9289/* We know that STR_PTR was stored on the top of the stack. */9290if (extrasize > 0)9291OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));92929293/* Keep the STR_PTR on the top of the stack. */9294if (bra == OP_BRAZERO)9295{9296/* This allocation is always successful. */9297OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));9298if (extrasize >= 2)9299OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);9300}9301else if (bra == OP_BRAMINZERO)9302{9303OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));9304OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);9305}9306}9307else9308{9309if (bra == OP_BRA)9310{9311/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */9312OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));9313OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));9314}9315else9316{9317/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */9318OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));93199320if (extrasize == 2 + end_block_size)9321{9322OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));9323if (bra == OP_BRAMINZERO)9324OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);9325}9326else9327{9328SLJIT_ASSERT(extrasize == 3 + end_block_size);9329OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));9330OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);9331}9332}9333}93349335if (bra == OP_BRAZERO)9336{9337backtrack->matchingpath = LABEL();9338SET_LABEL(jump, backtrack->matchingpath);9339}9340else if (bra == OP_BRAMINZERO)9341{9342JUMPTO(SLJIT_JUMP, backtrack->matchingpath);9343JUMPHERE(brajump);9344SLJIT_ASSERT(framesize != 0);9345if (framesize > 0)9346{9347OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9348add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));9349OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));9350OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));9351OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);9352}9353set_jumps(backtrack->common.own_backtracks, LABEL());9354}9355}9356else9357{9358/* AssertNot is successful. */9359if (framesize < 0)9360{9361if (extrasize > 0)9362OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));93639364if (bra != OP_BRA)9365{9366if (extrasize >= 2)9367free_stack(common, extrasize - 1);9368OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);9369}9370else if (extrasize > 0)9371free_stack(common, extrasize);9372}9373else9374{9375OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));9376OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));9377/* The topmost item should be 0. */9378if (bra != OP_BRA)9379{9380free_stack(common, framesize + extrasize - 1);9381OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);9382}9383else9384free_stack(common, framesize + extrasize);9385OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);9386}93879388if (bra == OP_BRAZERO)9389backtrack->matchingpath = LABEL();9390else if (bra == OP_BRAMINZERO)9391{9392JUMPTO(SLJIT_JUMP, backtrack->matchingpath);9393JUMPHERE(brajump);9394}93959396if (bra != OP_BRA)9397{9398SLJIT_ASSERT(found == &backtrack->common.own_backtracks);9399set_jumps(backtrack->common.own_backtracks, LABEL());9400backtrack->common.own_backtracks = NULL;9401}9402}94039404if (local_quit_available)9405{9406common->local_quit_available = save_local_quit_available;9407common->quit_label = save_quit_label;9408common->quit = save_quit;9409}94109411common->in_positive_assertion = save_in_positive_assertion;9412common->restore_end_ptr = save_restore_end_ptr;9413common->then_trap = save_then_trap;9414common->accept_label = save_accept_label;9415common->positive_assertion_quit = save_positive_assertion_quit;9416common->accept = save_accept;9417return cc + 1 + LINK_SIZE;9418}94199420static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)9421{9422DEFINE_COMPILER;9423int stacksize;94249425if (framesize < 0)9426{9427if (framesize == no_frame)9428OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9429else9430{9431stacksize = needs_control_head ? 1 : 0;9432if (ket != OP_KET || has_alternatives)9433stacksize++;94349435if (stacksize > 0)9436free_stack(common, stacksize);9437}94389439if (needs_control_head)9440OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));94419442/* TMP2 which is set here used by OP_KETRMAX below. */9443if (ket == OP_KETRMAX)9444OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));9445else if (ket == OP_KETRMIN)9446{9447/* Move the STR_PTR to the private_data_ptr. */9448OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));9449}9450}9451else9452{9453stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;9454OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));9455if (needs_control_head)9456OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));94579458if (ket == OP_KETRMAX)9459{9460/* TMP2 which is set here used by OP_KETRMAX below. */9461OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));9462}9463}9464if (needs_control_head)9465OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);9466}94679468static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)9469{9470DEFINE_COMPILER;94719472if (common->capture_last_ptr != 0)9473{9474OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);9475OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);9476OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);9477stacksize++;9478}9479if (!is_optimized_cbracket(common, offset >> 1))9480{9481OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));9482OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));9483OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);9484OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9485OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);9486OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);9487OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);9488stacksize += 2;9489}9490return stacksize;9491}94929493static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)9494{9495if (PRIV(script_run)(ptr, endptr, FALSE))9496return endptr;9497return NULL;9498}94999500#ifdef SUPPORT_UNICODE95019502static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)9503{9504if (PRIV(script_run)(ptr, endptr, TRUE))9505return endptr;9506return NULL;9507}95089509#endif /* SUPPORT_UNICODE */95109511static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)9512{9513DEFINE_COMPILER;95149515SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);95169517OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9518#ifdef SUPPORT_UNICODE9519sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,9520common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));9521#else9522sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));9523#endif95249525OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);9526add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));9527}95289529/*9530Handling bracketed expressions is probably the most complex part.95319532Stack layout naming characters:9533S - Push the current STR_PTR95340 - Push a 0 (NULL)9535A - Push the current STR_PTR. Needed for restoring the STR_PTR9536before the next alternative. Not pushed if there are no alternatives.9537M - Any values pushed by the current alternative. Can be empty, or anything.9538C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.9539L - Push the previous local (pointed by localptr) to the stack9540() - opional values stored on the stack9541()* - optonal, can be stored multiple times95429543The following list shows the regular expression templates, their PCRE byte codes9544and stack layout supported by pcre-sljit.95459546(?:) OP_BRA | OP_KET A M9547() OP_CBRA | OP_KET C M9548(?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*9549OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*9550(?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*9551OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*9552()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*9553OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*9554()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*9555OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*9556(?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )9557(?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )9558()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )9559()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )9560(?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*9561OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*9562(?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*9563OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*9564()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*9565OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*9566()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*9567OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*956895699570Stack layout naming characters:9571A - Push the alternative index (starting from 0) on the stack.9572Not pushed if there is no alternatives.9573M - Any values pushed by the current alternative. Can be empty, or anything.95749575The next list shows the possible content of a bracket:9576(|) OP_*BRA | OP_ALT ... M A9577(?()|) OP_*COND | OP_ALT M A9578(?>|) OP_ONCE | OP_ALT ... [stack trace] M A9579Or nothing, if trace is unnecessary9580*/95819582static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)9583{9584DEFINE_COMPILER;9585backtrack_common *backtrack;9586PCRE2_UCHAR opcode;9587int private_data_ptr = 0;9588int offset = 0;9589int i, stacksize;9590int repeat_ptr = 0, repeat_length = 0;9591int repeat_type = 0, repeat_count = 0;9592PCRE2_SPTR ccbegin;9593PCRE2_SPTR matchingpath;9594PCRE2_SPTR slot;9595PCRE2_UCHAR bra = OP_BRA;9596PCRE2_UCHAR ket;9597assert_backtrack *assert;9598BOOL has_alternatives;9599BOOL needs_control_head = FALSE;9600BOOL has_vreverse = FALSE;9601struct sljit_jump *jump;9602struct sljit_jump *skip;9603jump_list *jumplist;9604struct sljit_label *rmax_label = NULL;9605struct sljit_jump *braminzero = NULL;96069607PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);96089609if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)9610{9611bra = *cc;9612cc++;9613opcode = *cc;9614}96159616opcode = *cc;9617ccbegin = cc;9618matchingpath = bracketend(cc) - 1 - LINK_SIZE;9619ket = *matchingpath;9620if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)9621{9622repeat_ptr = PRIVATE_DATA(matchingpath);9623repeat_length = PRIVATE_DATA(matchingpath + 1);9624repeat_type = PRIVATE_DATA(matchingpath + 2);9625repeat_count = PRIVATE_DATA(matchingpath + 3);9626SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);9627if (repeat_type == OP_UPTO)9628ket = OP_KETRMAX;9629if (repeat_type == OP_MINUPTO)9630ket = OP_KETRMIN;9631}96329633matchingpath = ccbegin + 1 + LINK_SIZE;9634SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);9635SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));9636cc += GET(cc, 1);96379638has_alternatives = *cc == OP_ALT;9639if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))9640{9641SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,9642compile_time_checks_must_be_grouped_together);9643has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;9644}96459646if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))9647opcode = OP_SCOND;96489649if (opcode == OP_CBRA || opcode == OP_SCBRA)9650{9651/* Capturing brackets has a pre-allocated space. */9652offset = GET2(ccbegin, 1 + LINK_SIZE);9653if (!is_optimized_cbracket(common, offset))9654{9655private_data_ptr = OVECTOR_PRIV(offset);9656offset <<= 1;9657}9658else9659{9660offset <<= 1;9661private_data_ptr = OVECTOR(offset);9662}9663BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;9664matchingpath += IMM2_SIZE;9665}9666else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE9667|| opcode == OP_ASSERT_SCS || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)9668{9669/* Other brackets simply allocate the next entry. */9670private_data_ptr = PRIVATE_DATA(ccbegin);9671SLJIT_ASSERT(private_data_ptr != 0);9672BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;9673if (opcode == OP_ONCE)9674BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);9675}96769677/* Instructions before the first alternative. */9678stacksize = 0;9679if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))9680stacksize++;9681if (bra == OP_BRAZERO)9682stacksize++;96839684if (stacksize > 0)9685allocate_stack(common, stacksize);96869687stacksize = 0;9688if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))9689{9690OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);9691stacksize++;9692}96939694if (bra == OP_BRAZERO)9695OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);96969697if (bra == OP_BRAMINZERO)9698{9699/* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */9700OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));9701if (ket != OP_KETRMIN)9702{9703free_stack(common, 1);9704braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);9705}9706else if (opcode == OP_ONCE || opcode >= OP_SBRA)9707{9708jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);9709OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));9710/* Nothing stored during the first run. */9711skip = JUMP(SLJIT_JUMP);9712JUMPHERE(jump);9713/* Checking zero-length iteration. */9714if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)9715{9716/* When we come from outside, private_data_ptr contains the previous STR_PTR. */9717braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9718}9719else9720{9721/* Except when the whole stack frame must be saved. */9722OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9723braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));9724}9725JUMPHERE(skip);9726}9727else9728{9729jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);9730OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));9731JUMPHERE(jump);9732}9733}97349735if (repeat_type != 0)9736{9737OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);9738if (repeat_type == OP_EXACT)9739rmax_label = LABEL();9740}97419742if (ket == OP_KETRMIN)9743BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();97449745if (ket == OP_KETRMAX)9746{9747rmax_label = LABEL();9748if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)9749BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;9750}97519752/* Handling capturing brackets and alternatives. */9753if (opcode == OP_ONCE)9754{9755stacksize = 0;9756if (needs_control_head)9757{9758OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);9759stacksize++;9760}97619762if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)9763{9764/* Neither capturing brackets nor recursions are found in the block. */9765if (ket == OP_KETRMIN)9766{9767stacksize += 2;9768if (!needs_control_head)9769OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9770}9771else9772{9773if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)9774OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);9775if (ket == OP_KETRMAX || has_alternatives)9776stacksize++;9777}97789779if (stacksize > 0)9780allocate_stack(common, stacksize);97819782stacksize = 0;9783if (needs_control_head)9784{9785stacksize++;9786OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);9787}97889789if (ket == OP_KETRMIN)9790{9791if (needs_control_head)9792OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9793OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);9794if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)9795OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));9796OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);9797}9798else if (ket == OP_KETRMAX || has_alternatives)9799OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);9800}9801else9802{9803if (ket != OP_KET || has_alternatives)9804stacksize++;98059806stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;9807allocate_stack(common, stacksize);98089809if (needs_control_head)9810OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);98119812OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9813OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));98149815stacksize = needs_control_head ? 1 : 0;9816if (ket != OP_KET || has_alternatives)9817{9818OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);9819OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);9820stacksize++;9821OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);9822}9823else9824{9825OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);9826OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);9827}9828init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);9829}9830}9831else if (opcode == OP_CBRA || opcode == OP_SCBRA)9832{9833/* Saving the previous values. */9834if (is_optimized_cbracket(common, offset >> 1))9835{9836SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));9837allocate_stack(common, 2);9838OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9839OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));9840OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);9841OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);9842OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);9843}9844else9845{9846OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9847allocate_stack(common, 1);9848OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);9849OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);9850}9851}9852else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))9853{9854OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9855allocate_stack(common, 4);9856OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));9857OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);9858OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);9859OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);9860OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);9861OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);98629863has_vreverse = (*matchingpath == OP_VREVERSE);9864if (*matchingpath == OP_REVERSE || has_vreverse)9865matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);9866}9867else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)9868{9869/* Saving the previous value. */9870OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9871allocate_stack(common, 1);9872OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);9873OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);98749875if (*matchingpath == OP_REVERSE)9876matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);9877}9878else if (opcode == OP_ASSERT_SCS)9879{9880/* Nested scs blocks will not update this variable. */9881if (common->restore_end_ptr == 0)9882common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw);98839884if (*matchingpath == OP_CREF && (matchingpath[1 + IMM2_SIZE] != OP_CREF && matchingpath[1 + IMM2_SIZE] != OP_DNCREF))9885{9886/* Optimized case for a single capture reference. */9887i = OVECTOR(GET2(matchingpath, 1) << 1);98889889OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), i);98909891add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));9892matchingpath += 1 + IMM2_SIZE;98939894allocate_stack(common, has_alternatives ? 3 : 2);98959896OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9897OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));9898OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);9899OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), i + sizeof(sljit_sw));9900OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);9901OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);9902}9903else9904{9905OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));9906jumplist = NULL;99079908while (TRUE)9909{9910if (*matchingpath == OP_CREF)9911{9912sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(matchingpath, 1) << 1));9913matchingpath += 1 + IMM2_SIZE;9914}9915else9916{9917SLJIT_ASSERT(*matchingpath == OP_DNCREF);99189919i = GET2(matchingpath, 1 + IMM2_SIZE);9920slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;99219922while (i-- > 1)9923{9924sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1));9925add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));9926slot += common->name_entry_size;9927}99289929sljit_get_local_base(compiler, TMP2, 0, OVECTOR(GET2(slot, 0) << 1));9930matchingpath += 1 + 2 * IMM2_SIZE;9931}99329933if (*matchingpath != OP_CREF && *matchingpath != OP_DNCREF)9934break;99359936add_jump(compiler, &jumplist, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));9937}99389939add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture),9940CMP(SLJIT_EQUAL, SLJIT_MEM1(TMP2), 0, TMP1, 0));99419942set_jumps(jumplist, LABEL());99439944allocate_stack(common, has_alternatives ? 3 : 2);99459946OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);9947OP1(SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));9948OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);9949OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), 0);9950OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);9951OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));9952}99539954OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);9955OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_TMP_DEST_REG, 0);99569957if (has_alternatives)9958OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);9959}9960else if (has_alternatives)9961{9962/* Pushing the starting string pointer. */9963allocate_stack(common, 1);9964OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);9965}99669967/* Generating code for the first alternative. */9968if (opcode == OP_COND || opcode == OP_SCOND)9969{9970if (*matchingpath == OP_CREF)9971{9972SLJIT_ASSERT(has_alternatives);9973add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture),9974CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));9975matchingpath += 1 + IMM2_SIZE;9976}9977else if (*matchingpath == OP_DNCREF)9978{9979SLJIT_ASSERT(has_alternatives);99809981i = GET2(matchingpath, 1 + IMM2_SIZE);9982slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;9983OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);9984OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));9985OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);9986slot += common->name_entry_size;9987i--;9988while (i-- > 0)9989{9990OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);9991OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);9992slot += common->name_entry_size;9993}9994OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);9995add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.no_capture), JUMP(SLJIT_ZERO));9996matchingpath += 1 + 2 * IMM2_SIZE;9997}9998else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)9999{10000/* Never has other case. */10001BACKTRACK_AS(bracket_backtrack)->u.no_capture = NULL;10002SLJIT_ASSERT(!has_alternatives);1000310004if (*matchingpath == OP_TRUE)10005{10006stacksize = 1;10007matchingpath++;10008}10009else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)10010stacksize = 0;10011else if (*matchingpath == OP_RREF)10012{10013stacksize = GET2(matchingpath, 1);10014if (common->currententry == NULL)10015stacksize = 0;10016else if (stacksize == RREF_ANY)10017stacksize = 1;10018else if (common->currententry->start == 0)10019stacksize = stacksize == 0;10020else10021stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);1002210023if (stacksize != 0)10024matchingpath += 1 + IMM2_SIZE;10025}10026else10027{10028if (common->currententry == NULL || common->currententry->start == 0)10029stacksize = 0;10030else10031{10032stacksize = GET2(matchingpath, 1 + IMM2_SIZE);10033slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;10034i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);10035while (stacksize > 0)10036{10037if ((int)GET2(slot, 0) == i)10038break;10039slot += common->name_entry_size;10040stacksize--;10041}10042}1004310044if (stacksize != 0)10045matchingpath += 1 + 2 * IMM2_SIZE;10046}1004710048/* The stacksize == 0 is a common "else" case. */10049if (stacksize == 0)10050{10051if (*cc == OP_ALT)10052{10053matchingpath = cc + 1 + LINK_SIZE;10054cc += GET(cc, 1);10055}10056else10057matchingpath = cc;10058}10059}10060else10061{10062SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);10063/* Similar code as PUSH_BACKTRACK macro. */10064assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));10065if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))10066return NULL;10067memset(assert, 0, sizeof(assert_backtrack));10068assert->common.cc = matchingpath;10069BACKTRACK_AS(bracket_backtrack)->u.assert = assert;10070matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);10071if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))10072return NULL;10073}10074}1007510076compile_matchingpath(common, matchingpath, cc, backtrack);10077if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))10078return NULL;1007910080switch (opcode)10081{10082case OP_ASSERTBACK_NA:10083if (has_vreverse)10084{10085SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));10086add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));10087}1008810089if (PRIVATE_DATA(ccbegin + 1))10090OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));10091break;10092case OP_ONCE:10093match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);10094break;10095case OP_SCRIPT_RUN:10096match_script_run_common(common, private_data_ptr, backtrack);10097break;10098}1009910100stacksize = 0;10101if (repeat_type == OP_MINUPTO)10102{10103/* We need to preserve the counter. TMP2 will be used below. */10104OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);10105stacksize++;10106}10107if (ket != OP_KET || bra != OP_BRA)10108stacksize++;10109if (offset != 0)10110{10111if (common->capture_last_ptr != 0)10112stacksize++;10113if (!is_optimized_cbracket(common, offset >> 1))10114stacksize += 2;10115}10116if (has_alternatives && opcode != OP_ONCE)10117stacksize++;1011810119if (stacksize > 0)10120allocate_stack(common, stacksize);1012110122stacksize = 0;10123if (repeat_type == OP_MINUPTO)10124{10125/* TMP2 was set above. */10126OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);10127stacksize++;10128}1012910130if (ket != OP_KET || bra != OP_BRA)10131{10132if (ket != OP_KET)10133OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);10134else10135OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);10136stacksize++;10137}1013810139if (offset != 0)10140stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);1014110142/* Skip and count the other alternatives. */10143i = 1;10144while (*cc == OP_ALT)10145{10146cc += GET(cc, 1);10147i++;10148}1014910150if (has_alternatives)10151{10152if (opcode != OP_ONCE)10153{10154if (i <= 3)10155OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);10156else10157BACKTRACK_AS(bracket_backtrack)->matching_mov_addr = sljit_emit_op_addr(compiler, SLJIT_MOV_ADDR, SLJIT_MEM1(STACK_TOP), STACK(stacksize));10158}10159if (ket != OP_KETRMAX)10160BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();10161}1016210163/* Must be after the matchingpath label. */10164if (offset != 0 && is_optimized_cbracket(common, offset >> 1))10165{10166SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));10167OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);10168}10169else switch (opcode)10170{10171case OP_ASSERT_NA:10172OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);10173break;10174case OP_ASSERT_SCS:10175OP1(SLJIT_MOV, TMP1, 0, STR_END, 0);10176OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);10177OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));10178OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0);1017910180/* Nested scs blocks will not update this variable. */10181if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw))10182common->restore_end_ptr = 0;10183break;10184}1018510186if (ket == OP_KETRMAX)10187{10188if (repeat_type != 0)10189{10190if (has_alternatives)10191BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();10192OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);10193JUMPTO(SLJIT_NOT_ZERO, rmax_label);10194/* Drop STR_PTR for greedy plus quantifier. */10195if (opcode != OP_ONCE)10196free_stack(common, 1);10197}10198else if (opcode < OP_BRA || opcode >= OP_SBRA)10199{10200if (has_alternatives)10201BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();1020210203/* Checking zero-length iteration. */10204if (opcode != OP_ONCE)10205{10206/* This case includes opcodes such as OP_SCRIPT_RUN. */10207CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);10208/* Drop STR_PTR for greedy plus quantifier. */10209if (bra != OP_BRAZERO)10210free_stack(common, 1);10211}10212else10213/* TMP2 must contain the starting STR_PTR. */10214CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);10215}10216else10217JUMPTO(SLJIT_JUMP, rmax_label);10218BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();10219}1022010221if (repeat_type == OP_EXACT)10222{10223count_match(common);10224OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);10225JUMPTO(SLJIT_NOT_ZERO, rmax_label);10226}10227else if (repeat_type == OP_UPTO)10228{10229/* We need to preserve the counter. */10230OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);10231allocate_stack(common, 1);10232OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);10233}1023410235if (bra == OP_BRAZERO)10236BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();1023710238if (bra == OP_BRAMINZERO)10239{10240/* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */10241JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);10242if (braminzero != NULL)10243{10244JUMPHERE(braminzero);10245/* We need to release the end pointer to perform the10246backtrack for the zero-length iteration. When10247framesize is < 0, OP_ONCE will do the release itself. */10248if (opcode == OP_ONCE)10249{10250int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;1025110252SLJIT_ASSERT(framesize != 0);10253if (framesize > 0)10254{10255OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);10256add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));10257OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));10258}10259}10260else if (ket == OP_KETRMIN)10261free_stack(common, 1);10262}10263/* Continue to the normal backtrack. */10264}1026510266if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO || (has_alternatives && repeat_type != OP_EXACT))10267count_match(common);1026810269cc += 1 + LINK_SIZE;1027010271if (opcode == OP_ONCE)10272{10273int data;10274int framesize = BACKTRACK_AS(bracket_backtrack)->u.framesize;1027510276SLJIT_ASSERT(SHRT_MIN <= framesize && framesize < SHRT_MAX/2);10277/* We temporarily encode the needs_control_head in the lowest bit.10278The real value should be short enough for this operation to work10279without triggering Undefined Behaviour. */10280data = (int)((short)((unsigned short)framesize << 1) | (needs_control_head ? 1 : 0));10281BACKTRACK_AS(bracket_backtrack)->u.framesize = data;10282}10283return cc + repeat_length;10284}1028510286static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)10287{10288DEFINE_COMPILER;10289backtrack_common *backtrack;10290PCRE2_UCHAR opcode;10291int private_data_ptr;10292int cbraprivptr = 0;10293BOOL needs_control_head;10294int framesize;10295int stacksize;10296int offset = 0;10297BOOL zero = FALSE;10298PCRE2_SPTR ccbegin = NULL;10299int stack; /* Also contains the offset of control head. */10300struct sljit_label *loop = NULL;10301struct jump_list *emptymatch = NULL;1030210303PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);10304if (*cc == OP_BRAPOSZERO)10305{10306zero = TRUE;10307cc++;10308}1030910310opcode = *cc;10311private_data_ptr = PRIVATE_DATA(cc);10312SLJIT_ASSERT(private_data_ptr != 0);10313BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;10314switch(opcode)10315{10316case OP_BRAPOS:10317case OP_SBRAPOS:10318ccbegin = cc + 1 + LINK_SIZE;10319break;1032010321case OP_CBRAPOS:10322case OP_SCBRAPOS:10323offset = GET2(cc, 1 + LINK_SIZE);10324/* This case cannot be optimized in the same way as10325normal capturing brackets. */10326SLJIT_ASSERT(!is_optimized_cbracket(common, offset));10327cbraprivptr = OVECTOR_PRIV(offset);10328offset <<= 1;10329ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;10330break;1033110332default:10333SLJIT_UNREACHABLE();10334break;10335}1033610337framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);10338BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;10339if (framesize < 0)10340{10341if (offset != 0)10342{10343stacksize = 2;10344if (common->capture_last_ptr != 0)10345stacksize++;10346}10347else10348stacksize = 1;1034910350if (needs_control_head)10351stacksize++;10352if (!zero)10353stacksize++;1035410355BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;10356allocate_stack(common, stacksize);10357if (framesize == no_frame)10358OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);1035910360stack = 0;10361if (offset != 0)10362{10363stack = 2;10364OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));10365OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));10366OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);10367if (common->capture_last_ptr != 0)10368OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);10369OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);10370if (needs_control_head)10371OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);10372if (common->capture_last_ptr != 0)10373{10374OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);10375stack = 3;10376}10377}10378else10379{10380if (needs_control_head)10381OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);10382OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);10383stack = 1;10384}1038510386if (needs_control_head)10387stack++;10388if (!zero)10389OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);10390if (needs_control_head)10391{10392stack--;10393OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);10394}10395}10396else10397{10398stacksize = framesize + 1;10399if (!zero)10400stacksize++;10401if (needs_control_head)10402stacksize++;10403if (offset == 0)10404stacksize++;10405BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;1040610407allocate_stack(common, stacksize);10408OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);10409if (needs_control_head)10410OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);10411OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));1041210413stack = 0;10414if (!zero)10415{10416OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);10417stack = 1;10418}10419if (needs_control_head)10420{10421OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);10422stack++;10423}10424if (offset == 0)10425{10426OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);10427stack++;10428}10429OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);10430init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);10431stack -= 1 + (offset == 0);10432}1043310434if (offset != 0)10435OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);1043610437loop = LABEL();10438while (*cc != OP_KETRPOS)10439{10440backtrack->top = NULL;10441backtrack->own_backtracks = NULL;10442cc += GET(cc, 1);1044310444compile_matchingpath(common, ccbegin, cc, backtrack);10445if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))10446return NULL;1044710448if (framesize < 0)10449{10450if (framesize == no_frame)10451OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);1045210453if (offset != 0)10454{10455OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);10456OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);10457OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);10458if (common->capture_last_ptr != 0)10459OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);10460OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);10461}10462else10463{10464if (opcode == OP_SBRAPOS)10465OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));10466OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);10467}1046810469/* Even if the match is empty, we need to reset the control head. */10470if (needs_control_head)10471OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));1047210473if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)10474add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));1047510476if (!zero)10477OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);10478}10479else10480{10481if (offset != 0)10482{10483OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));10484OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);10485OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);10486OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);10487if (common->capture_last_ptr != 0)10488OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);10489OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);10490}10491else10492{10493OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);10494OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));10495if (opcode == OP_SBRAPOS)10496OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));10497OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);10498}1049910500/* Even if the match is empty, we need to reset the control head. */10501if (needs_control_head)10502OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));1050310504if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)10505add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));1050610507if (!zero)10508OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);10509}1051010511JUMPTO(SLJIT_JUMP, loop);10512flush_stubs(common);1051310514compile_backtrackingpath(common, backtrack->top);10515if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))10516return NULL;10517set_jumps(backtrack->own_backtracks, LABEL());1051810519if (framesize < 0)10520{10521if (offset != 0)10522OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);10523else10524OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));10525}10526else10527{10528if (offset != 0)10529{10530/* Last alternative. */10531if (*cc == OP_KETRPOS)10532OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);10533OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);10534}10535else10536{10537OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);10538OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));10539}10540}1054110542if (*cc == OP_KETRPOS)10543break;10544ccbegin = cc + 1 + LINK_SIZE;10545}1054610547/* We don't have to restore the control head in case of a failed match. */1054810549backtrack->own_backtracks = NULL;10550if (!zero)10551{10552if (framesize < 0)10553add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));10554else /* TMP2 is set to [private_data_ptr] above. */10555add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));10556}1055710558/* None of them matched. */10559set_jumps(emptymatch, LABEL());10560count_match(common);10561return cc + 1 + LINK_SIZE;10562}1056310564static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)10565{10566int class_len;1056710568*opcode = *cc;10569*exact = 0;1057010571if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)10572{10573cc++;10574*type = OP_CHAR;10575}10576else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)10577{10578cc++;10579*type = OP_CHARI;10580*opcode -= OP_STARI - OP_STAR;10581}10582else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)10583{10584cc++;10585*type = OP_NOT;10586*opcode -= OP_NOTSTAR - OP_STAR;10587}10588else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)10589{10590cc++;10591*type = OP_NOTI;10592*opcode -= OP_NOTSTARI - OP_STAR;10593}10594else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)10595{10596cc++;10597*opcode -= OP_TYPESTAR - OP_STAR;10598*type = OP_END;10599}10600else10601{10602SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS || *opcode == OP_ECLASS);10603*type = *opcode;10604class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 1);10605*opcode = cc[class_len];10606cc++;1060710608if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)10609{10610*opcode -= OP_CRSTAR - OP_STAR;10611*end = cc + class_len;1061210613if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)10614{10615*exact = 1;10616*opcode -= OP_PLUS - OP_STAR;10617}10618return cc;10619}1062010621if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)10622{10623*opcode -= OP_CRPOSSTAR - OP_POSSTAR;10624*end = cc + class_len;1062510626if (*opcode == OP_POSPLUS)10627{10628*exact = 1;10629*opcode = OP_POSSTAR;10630}10631return cc;10632}1063310634SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);10635*max = GET2(cc, (class_len + IMM2_SIZE));10636*exact = GET2(cc, class_len);10637*end = cc + class_len + 2 * IMM2_SIZE;1063810639if (*max == 0)10640{10641SLJIT_ASSERT(*exact > 1);10642if (*opcode == OP_CRRANGE)10643*opcode = OP_UPTO;10644else if (*opcode == OP_CRPOSRANGE)10645*opcode = OP_POSUPTO;10646else10647*opcode = OP_MINSTAR;10648return cc;10649}1065010651*max -= *exact;10652if (*max == 0)10653*opcode = OP_EXACT;10654else10655{10656SLJIT_ASSERT(*exact > 0 || *max > 1);10657if (*opcode == OP_CRRANGE)10658*opcode = OP_UPTO;10659else if (*opcode == OP_CRPOSRANGE)10660*opcode = OP_POSUPTO;10661else if (*max == 1)10662*opcode = OP_MINQUERY;10663else10664*opcode = OP_MINUPTO;10665}10666return cc;10667}1066810669switch(*opcode)10670{10671case OP_EXACT:10672*exact = GET2(cc, 0);10673cc += IMM2_SIZE;10674break;1067510676case OP_PLUS:10677case OP_MINPLUS:10678*exact = 1;10679*opcode -= OP_PLUS - OP_STAR;10680break;1068110682case OP_POSPLUS:10683*exact = 1;10684*opcode = OP_POSSTAR;10685break;1068610687case OP_UPTO:10688case OP_MINUPTO:10689case OP_POSUPTO:10690*max = GET2(cc, 0);10691cc += IMM2_SIZE;10692break;10693}1069410695if (*type == OP_END)10696{10697*type = *cc;10698*end = next_opcode(common, cc);10699cc++;10700return cc;10701}1070210703*end = cc + 1;10704#ifdef SUPPORT_UNICODE10705if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);10706#endif10707return cc;10708}1070910710static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent, jump_list **prev_backtracks)10711{10712DEFINE_COMPILER;10713backtrack_common *backtrack = NULL;10714PCRE2_SPTR begin = cc;10715PCRE2_UCHAR opcode;10716PCRE2_UCHAR type;10717sljit_u32 max = 0, exact;10718sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);10719sljit_s32 early_fail_type;10720BOOL charpos_enabled, use_tmp;10721PCRE2_UCHAR charpos_char;10722unsigned int charpos_othercasebit;10723PCRE2_SPTR end;10724jump_list *no_match = NULL;10725jump_list *no_char1_match = NULL;10726struct sljit_jump *jump = NULL;10727struct sljit_label *label;10728int private_data_ptr = PRIVATE_DATA(cc);10729int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);10730int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;10731int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);10732int tmp_base, tmp_offset;1073310734early_fail_type = (early_fail_ptr & 0x7);10735early_fail_ptr >>= 3;1073610737/* During recursion, these optimizations are disabled. */10738if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)10739{10740early_fail_ptr = 0;10741early_fail_type = type_skip;10742}1074310744SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 010745|| (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));1074610747if (early_fail_type == type_fail)10748add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));1074910750cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);1075110752if (type != OP_EXTUNI)10753{10754tmp_base = TMP3;10755tmp_offset = 0;10756}10757else10758{10759tmp_base = SLJIT_MEM1(SLJIT_SP);10760tmp_offset = LOCAL2;10761}1076210763if (opcode == OP_EXACT)10764{10765SLJIT_ASSERT(early_fail_ptr == 0 && exact >= 2);1076610767if (common->mode == PCRE2_JIT_COMPLETE10768#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 3210769&& !common->utf10770#endif10771&& type != OP_ANYNL && type != OP_EXTUNI)10772{10773OP2(SLJIT_SUB, TMP1, 0, STR_END, 0, STR_PTR, 0);10774add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, IN_UCHARS(exact)));1077510776#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 3210777if (type == OP_ALLANY && !common->invalid_utf)10778#else10779if (type == OP_ALLANY)10780#endif10781OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));10782else10783{10784OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);10785label = LABEL();10786compile_char1_matchingpath(common, type, cc, prev_backtracks, FALSE);10787OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);10788JUMPTO(SLJIT_NOT_ZERO, label);10789}10790}10791else10792{10793SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));10794OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);10795label = LABEL();10796compile_char1_matchingpath(common, type, cc, prev_backtracks, TRUE);10797OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);10798JUMPTO(SLJIT_NOT_ZERO, label);10799}10800}1080110802if (early_fail_type == type_fail_range)10803{10804/* Range end first, followed by range start. */10805OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);10806OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));10807OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);10808OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);10809add_jump(compiler, prev_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));1081010811OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);10812OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);10813}1081410815if (opcode < OP_EXACT)10816PUSH_BACKTRACK(sizeof(char_iterator_backtrack), begin, NULL);1081710818switch(opcode)10819{10820case OP_STAR:10821case OP_UPTO:10822SLJIT_ASSERT(backtrack != NULL && (early_fail_ptr == 0 || opcode == OP_STAR));10823max += exact;1082410825if (type == OP_EXTUNI)10826{10827SLJIT_ASSERT(private_data_ptr == 0);10828SLJIT_ASSERT(early_fail_ptr == 0);1082910830if (exact == 1)10831{10832SLJIT_ASSERT(opcode == OP_STAR);10833allocate_stack(common, 1);10834OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);10835}10836else10837{10838/* If OP_EXTUNI is present, it has a separate EXACT opcode. */10839SLJIT_ASSERT(exact == 0);1084010841allocate_stack(common, 2);10842OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);10843OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);10844}1084510846if (opcode == OP_UPTO)10847{10848SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));10849OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, SLJIT_IMM, max);10850}1085110852label = LABEL();10853compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);10854if (opcode == OP_UPTO)10855{10856OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);10857OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);10858jump = JUMP(SLJIT_ZERO);10859OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, TMP1, 0);10860}1086110862/* We cannot use TMP3 because of allocate_stack. */10863allocate_stack(common, 1);10864OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);10865JUMPTO(SLJIT_JUMP, label);10866if (jump != NULL)10867JUMPHERE(jump);10868BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();10869break;10870}10871#ifdef SUPPORT_UNICODE10872else if (type == OP_ALLANY && !common->invalid_utf)10873#else10874else if (type == OP_ALLANY)10875#endif10876{10877if (opcode == OP_STAR)10878{10879if (exact == 1)10880detect_partial_match(common, prev_backtracks);1088110882if (private_data_ptr == 0)10883allocate_stack(common, 2);1088410885OP1(SLJIT_MOV, base, offset0, STR_END, 0);10886OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);1088710888OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);10889process_partial_match(common);1089010891if (early_fail_ptr != 0)10892OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);10893BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();10894break;10895}10896#ifdef SUPPORT_UNICODE10897else if (!common->utf)10898#else10899else10900#endif10901{10902/* If OP_ALLANY is present, it has a separate EXACT opcode. */10903SLJIT_ASSERT(exact == 0);1090410905if (private_data_ptr == 0)10906allocate_stack(common, 2);1090710908OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);10909OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));1091010911if (common->mode == PCRE2_JIT_COMPLETE)10912{10913OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);10914SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);10915}10916else10917{10918jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);10919process_partial_match(common);10920JUMPHERE(jump);10921}1092210923OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);1092410925if (early_fail_ptr != 0)10926OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);10927BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();10928break;10929}10930}1093110932charpos_enabled = FALSE;10933charpos_char = 0;10934charpos_othercasebit = 0;1093510936SLJIT_ASSERT(tmp_base == TMP3);10937if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))10938{10939#ifdef SUPPORT_UNICODE10940charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);10941#else10942charpos_enabled = TRUE;10943#endif10944if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))10945{10946charpos_othercasebit = char_get_othercase_bit(common, end + 1);10947if (charpos_othercasebit == 0)10948charpos_enabled = FALSE;10949}1095010951if (charpos_enabled)10952{10953charpos_char = end[1];10954/* Consume the OP_CHAR opcode. */10955end += 2;10956#if PCRE2_CODE_UNIT_WIDTH == 810957SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);10958#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 3210959SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);10960if ((charpos_othercasebit & 0x100) != 0)10961charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;10962#endif10963if (charpos_othercasebit != 0)10964charpos_char |= charpos_othercasebit;1096510966BACKTRACK_AS(char_iterator_backtrack)->charpos.charpos_enabled = TRUE;10967BACKTRACK_AS(char_iterator_backtrack)->charpos.chr = charpos_char;10968BACKTRACK_AS(char_iterator_backtrack)->charpos.othercasebit = charpos_othercasebit;1096910970if (private_data_ptr == 0)10971allocate_stack(common, 2);1097210973use_tmp = (opcode == OP_STAR);1097410975if (use_tmp)10976{10977OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);10978OP1(SLJIT_MOV, base, offset0, TMP3, 0);10979}10980else10981{10982OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0);10983OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, 0);10984OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0);10985OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : (max + 1));10986}1098710988/* Search the first instance of charpos_char. */10989if (exact > 0)10990detect_partial_match(common, &no_match);10991else10992jump = JUMP(SLJIT_JUMP);1099310994label = LABEL();1099510996if (opcode == OP_UPTO)10997{10998if (exact == max)10999OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);11000else11001{11002OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);11003add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));11004}11005}1100611007compile_char1_matchingpath(common, type, cc, &no_match, FALSE);1100811009if (early_fail_ptr != 0)11010OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);1101111012if (exact == 0)11013JUMPHERE(jump);1101411015detect_partial_match(common, &no_match);1101611017if (opcode == OP_UPTO && exact > 0)11018{11019if (exact == max)11020CMPTO(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact, label);11021else11022CMPTO(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, (max + 1) - exact, label);11023}1102411025OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));11026if (charpos_othercasebit != 0)11027OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);11028CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);1102911030OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11031if (use_tmp)11032{11033OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, SLJIT_IMM, 0);11034SELECT(SLJIT_EQUAL, TMP3, STR_PTR, 0, TMP3);11035}11036else11037{11038OP2U(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, SLJIT_IMM, 0);11039SELECT(SLJIT_EQUAL, COUNT_MATCH, STR_PTR, 0, COUNT_MATCH);11040}11041JUMPTO(SLJIT_JUMP, label);1104211043set_jumps(no_match, LABEL());11044OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);11045if (use_tmp)11046OP1(SLJIT_MOV, base, offset1, TMP3, 0);11047else11048{11049OP1(SLJIT_MOV, TMP1, 0, base, offset1);11050OP1(SLJIT_MOV, base, offset1, COUNT_MATCH, 0);11051OP1(SLJIT_MOV, COUNT_MATCH, 0, TMP1, 0);11052}1105311054add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));1105511056BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();11057OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11058OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));11059break;11060}11061}1106211063if (private_data_ptr == 0)11064allocate_stack(common, 2);1106511066#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 3211067use_tmp = (opcode == OP_STAR);1106811069if (common->utf)11070{11071if (!use_tmp)11072OP1(SLJIT_MOV, base, offset0, COUNT_MATCH, 0);1107311074OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);11075}11076#endif1107711078if (opcode == OP_UPTO)11079OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? -(sljit_sw)exact : (sljit_sw)max);1108011081if (opcode == OP_UPTO && exact > 0)11082{11083label = LABEL();11084detect_partial_match(common, &no_match);11085compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);11086#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 3211087if (common->utf)11088OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);11089#endif1109011091if (exact == max)11092{11093OP2(SLJIT_ADD | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);11094JUMPTO(SLJIT_NOT_ZERO, label);11095}11096else11097{11098OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);11099add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));11100CMPTO(SLJIT_NOT_EQUAL, TMP3, 0, SLJIT_IMM, max - exact, label);11101}1110211103OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);11104JUMPTO(SLJIT_JUMP, label);11105}11106else11107{11108OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);1110911110detect_partial_match(common, &no_match);11111label = LABEL();11112compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);11113#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 3211114if (common->utf)11115OP1(SLJIT_MOV, use_tmp ? TMP3 : COUNT_MATCH, 0, STR_PTR, 0);11116#endif1111711118if (opcode == OP_UPTO)11119{11120OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);11121add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));11122}1112311124detect_partial_match_to(common, label);11125}1112611127#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 3211128if (common->utf)11129{11130set_jumps(no_char1_match, LABEL());11131set_jumps(no_match, LABEL());11132if (use_tmp)11133{11134OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);11135OP1(SLJIT_MOV, base, offset0, TMP3, 0);11136}11137else11138{11139OP1(SLJIT_MOV, STR_PTR, 0, COUNT_MATCH, 0);11140OP1(SLJIT_MOV, COUNT_MATCH, 0, base, offset0);11141OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11142}11143}11144else11145#endif11146{11147if (opcode != OP_UPTO || exact == 0)11148OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));11149set_jumps(no_char1_match, LABEL());1115011151OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));11152set_jumps(no_match, LABEL());11153OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11154}1115511156if (opcode == OP_UPTO)11157{11158if (exact > 0)11159{11160if (max == exact)11161jump = CMP(SLJIT_GREATER_EQUAL, TMP3, 0, SLJIT_IMM, -(sljit_sw)exact);11162else11163jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact);1116411165add_jump(compiler, &backtrack->own_backtracks, jump);11166}11167}11168else if (exact == 1)11169add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, base, offset1, STR_PTR, 0));1117011171if (early_fail_ptr != 0)11172OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);1117311174BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();11175break;1117611177case OP_QUERY:11178SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0);11179if (private_data_ptr == 0)11180allocate_stack(common, 1);11181OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11182compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);11183BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();11184break;1118511186case OP_MINSTAR:11187case OP_MINQUERY:11188SLJIT_ASSERT(backtrack != NULL && (opcode == OP_MINSTAR || early_fail_ptr == 0));11189if (private_data_ptr == 0)11190allocate_stack(common, 1);1119111192if (exact >= 1)11193{11194if (exact >= 2)11195{11196/* Extuni has a separate exact opcode. */11197SLJIT_ASSERT(tmp_base == TMP3 && early_fail_ptr == 0);11198OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact);11199}1120011201if (opcode == OP_MINQUERY)11202OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, -1);1120311204label = LABEL();11205BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label;1120611207compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);1120811209if (exact >= 2)11210{11211OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);11212JUMPTO(SLJIT_NOT_ZERO, label);11213}1121411215if (opcode == OP_MINQUERY)11216OP2(SLJIT_AND, base, offset0, base, offset0, STR_PTR, 0);11217else11218OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11219}11220else11221{11222OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11223BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();11224}1122511226if (early_fail_ptr != 0)11227OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);11228break;1122911230case OP_MINUPTO:11231SLJIT_ASSERT(backtrack != NULL && early_fail_ptr == 0);11232if (private_data_ptr == 0)11233allocate_stack(common, 2);1123411235OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);1123611237if (exact == 0)11238{11239OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11240BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();11241break;11242}1124311244if (exact >= 2)11245{11246/* Extuni has a separate exact opcode. */11247SLJIT_ASSERT(tmp_base == TMP3);11248OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact);11249}1125011251label = LABEL();11252BACKTRACK_AS(char_iterator_backtrack)->matchingpath = label;1125311254compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);1125511256if (exact >= 2)11257{11258OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);11259JUMPTO(SLJIT_NOT_ZERO, label);11260}1126111262OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);11263break;1126411265case OP_EXACT:11266SLJIT_ASSERT(backtrack == NULL);11267break;1126811269case OP_POSSTAR:11270SLJIT_ASSERT(backtrack == NULL);11271#if defined SUPPORT_UNICODE11272if (type == OP_ALLANY && !common->invalid_utf)11273#else11274if (type == OP_ALLANY)11275#endif11276{11277if (exact == 1)11278detect_partial_match(common, prev_backtracks);1127911280OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);11281process_partial_match(common);11282if (early_fail_ptr != 0)11283OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);11284break;11285}1128611287#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 3211288if (common->utf)11289{11290SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));1129111292if (tmp_base != TMP3)11293{11294OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0);11295tmp_base = COUNT_MATCH;11296}1129711298OP1(SLJIT_MOV, tmp_base, 0, exact == 1 ? SLJIT_IMM : STR_PTR, 0);11299detect_partial_match(common, &no_match);11300label = LABEL();11301compile_char1_matchingpath(common, type, cc, &no_match, FALSE);11302OP1(SLJIT_MOV, tmp_base, 0, STR_PTR, 0);11303detect_partial_match_to(common, label);1130411305set_jumps(no_match, LABEL());11306OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, 0);1130711308if (tmp_base != TMP3)11309OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);1131011311if (exact == 1)11312add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));1131311314if (early_fail_ptr != 0)11315OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);11316break;11317}11318#endif1131911320if (exact == 1)11321OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);1132211323detect_partial_match(common, &no_match);11324label = LABEL();11325/* Extuni never fails, so no_char1_match is not used in that case.11326Anynl optionally reads an extra character on success. */11327compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);11328detect_partial_match_to(common, label);11329if (type != OP_EXTUNI)11330OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));1133111332set_jumps(no_char1_match, LABEL());11333if (type != OP_EXTUNI)11334OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));1133511336set_jumps(no_match, LABEL());1133711338if (exact == 1)11339add_jump(compiler, prev_backtracks, CMP(SLJIT_EQUAL, tmp_base, tmp_offset, STR_PTR, 0));1134011341if (early_fail_ptr != 0)11342OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);11343break;1134411345case OP_POSUPTO:11346SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0);11347max += exact;1134811349#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 3211350if (type == OP_EXTUNI || common->utf)11351#else11352if (type == OP_EXTUNI)11353#endif11354{11355SLJIT_ASSERT(common->locals_size >= 3 * SSIZE_OF(sw));1135611357/* Count match is not modified by compile_char1_matchingpath. */11358OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL2, COUNT_MATCH, 0);11359OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_IMM, exact == max ? 0 : max);1136011361label = LABEL();11362/* Extuni only modifies TMP3 on successful match. */11363OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);11364compile_char1_matchingpath(common, type, cc, &no_match, TRUE);1136511366if (exact == max)11367{11368OP2(SLJIT_ADD, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);11369JUMPTO(SLJIT_JUMP, label);11370}11371else11372{11373OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);11374JUMPTO(SLJIT_NOT_ZERO, label);11375OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);11376}1137711378set_jumps(no_match, LABEL());1137911380if (exact > 0)11381{11382if (exact == max)11383OP2U(SLJIT_SUB | SLJIT_SET_LESS, COUNT_MATCH, 0, SLJIT_IMM, exact);11384else11385OP2U(SLJIT_SUB | SLJIT_SET_GREATER, COUNT_MATCH, 0, SLJIT_IMM, max - exact);11386}1138711388OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LOCAL2);1138911390if (exact > 0)11391add_jump(compiler, prev_backtracks, JUMP(exact == max ? SLJIT_LESS : SLJIT_GREATER));11392OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);11393break;11394}1139511396SLJIT_ASSERT(tmp_base == TMP3);1139711398OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, exact == max ? 0 : max);1139911400detect_partial_match(common, &no_match);11401label = LABEL();11402compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);1140311404if (exact == max)11405OP2(SLJIT_ADD, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);11406else11407{11408OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);11409add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));11410}11411detect_partial_match_to(common, label);11412OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));1141311414set_jumps(no_char1_match, LABEL());11415OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));11416set_jumps(no_match, LABEL());1141711418if (exact > 0)11419{11420if (exact == max)11421jump = CMP(SLJIT_LESS, TMP3, 0, SLJIT_IMM, exact);11422else11423jump = CMP(SLJIT_GREATER, TMP3, 0, SLJIT_IMM, max - exact);1142411425add_jump(compiler, prev_backtracks, jump);11426}11427break;1142811429case OP_POSQUERY:11430SLJIT_ASSERT(backtrack == NULL && early_fail_ptr == 0);11431SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));11432OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);11433compile_char1_matchingpath(common, type, cc, &no_match, TRUE);11434OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);11435set_jumps(no_match, LABEL());11436OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);11437break;1143811439default:11440SLJIT_UNREACHABLE();11441break;11442}1144311444count_match(common);11445return end;11446}1144711448static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)11449{11450DEFINE_COMPILER;11451backtrack_common *backtrack;1145211453PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);1145411455if (*cc == OP_FAIL)11456{11457add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));11458return cc + 1;11459}1146011461if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)11462add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));1146311464if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)11465{11466/* No need to check notempty conditions. */11467if (common->accept_label == NULL)11468add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));11469else11470JUMPTO(SLJIT_JUMP, common->accept_label);11471return cc + 1;11472}1147311474if (common->accept_label == NULL)11475add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));11476else11477CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);1147811479if (HAS_VIRTUAL_REGISTERS)11480{11481OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);11482OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));11483}11484else11485OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));1148611487OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);11488add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO));11489OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);11490if (common->accept_label == NULL)11491add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));11492else11493JUMPTO(SLJIT_ZERO, common->accept_label);1149411495OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));11496if (common->accept_label == NULL)11497add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));11498else11499CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);11500add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));11501return cc + 1;11502}1150311504static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)11505{11506DEFINE_COMPILER;11507int offset = GET2(cc, 1);11508BOOL optimized_cbracket = is_optimized_cbracket(common, offset);1150911510/* Data will be discarded anyway... */11511if (common->currententry != NULL)11512return cc + 1 + IMM2_SIZE;1151311514if (!optimized_cbracket)11515OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));11516offset <<= 1;11517OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);11518if (!optimized_cbracket)11519OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);11520return cc + 1 + IMM2_SIZE;11521}1152211523static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)11524{11525DEFINE_COMPILER;11526backtrack_common *backtrack;11527PCRE2_UCHAR opcode = *cc;11528PCRE2_SPTR ccend = cc + 1;1152911530if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||11531opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)11532ccend += 2 + cc[1];1153311534PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);1153511536if (opcode == OP_SKIP)11537{11538allocate_stack(common, 1);11539OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);11540return ccend;11541}1154211543if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)11544{11545if (HAS_VIRTUAL_REGISTERS)11546OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);11547OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));11548OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);11549OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);11550}1155111552return ccend;11553}1155411555static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };1155611557static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)11558{11559DEFINE_COMPILER;11560backtrack_common *backtrack;11561BOOL needs_control_head;11562int size;1156311564PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);11565common->then_trap = BACKTRACK_AS(then_trap_backtrack);11566BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;11567BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);11568BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);1156911570size = BACKTRACK_AS(then_trap_backtrack)->framesize;11571size = 3 + (size < 0 ? 0 : size);1157211573OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);11574allocate_stack(common, size);11575if (size > 3)11576OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));11577else11578OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);11579OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);11580OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);11581OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);1158211583size = BACKTRACK_AS(then_trap_backtrack)->framesize;11584if (size >= 0)11585init_frame(common, cc, ccend, size - 1, 0);11586}1158711588static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)11589{11590DEFINE_COMPILER;11591backtrack_common *backtrack;11592BOOL has_then_trap = FALSE;11593then_trap_backtrack *save_then_trap = NULL;11594size_t op_len;1159511596SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));1159711598if (common->has_then && common->then_offsets[cc - common->start] != 0)11599{11600SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);11601has_then_trap = TRUE;11602save_then_trap = common->then_trap;11603/* Tail item on backtrack. */11604compile_then_trap_matchingpath(common, cc, ccend, parent);11605}1160611607while (cc < ccend)11608{11609switch(*cc)11610{11611case OP_SOD:11612case OP_SOM:11613case OP_NOT_WORD_BOUNDARY:11614case OP_WORD_BOUNDARY:11615case OP_EODN:11616case OP_EOD:11617case OP_DOLL:11618case OP_DOLLM:11619case OP_CIRC:11620case OP_CIRCM:11621case OP_NOT_UCP_WORD_BOUNDARY:11622case OP_UCP_WORD_BOUNDARY:11623cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);11624break;1162511626case OP_NOT_DIGIT:11627case OP_DIGIT:11628case OP_NOT_WHITESPACE:11629case OP_WHITESPACE:11630case OP_NOT_WORDCHAR:11631case OP_WORDCHAR:11632case OP_ANY:11633case OP_ALLANY:11634case OP_ANYBYTE:11635case OP_NOTPROP:11636case OP_PROP:11637case OP_ANYNL:11638case OP_NOT_HSPACE:11639case OP_HSPACE:11640case OP_NOT_VSPACE:11641case OP_VSPACE:11642case OP_EXTUNI:11643case OP_NOT:11644case OP_NOTI:11645cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);11646break;1164711648case OP_SET_SOM:11649PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);11650OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));11651allocate_stack(common, 1);11652OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);11653OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);11654cc++;11655break;1165611657case OP_CHAR:11658case OP_CHARI:11659if (common->mode == PCRE2_JIT_COMPLETE)11660cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);11661else11662cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);11663break;1166411665case OP_STAR:11666case OP_MINSTAR:11667case OP_PLUS:11668case OP_MINPLUS:11669case OP_QUERY:11670case OP_MINQUERY:11671case OP_UPTO:11672case OP_MINUPTO:11673case OP_EXACT:11674case OP_POSSTAR:11675case OP_POSPLUS:11676case OP_POSQUERY:11677case OP_POSUPTO:11678case OP_STARI:11679case OP_MINSTARI:11680case OP_PLUSI:11681case OP_MINPLUSI:11682case OP_QUERYI:11683case OP_MINQUERYI:11684case OP_UPTOI:11685case OP_MINUPTOI:11686case OP_EXACTI:11687case OP_POSSTARI:11688case OP_POSPLUSI:11689case OP_POSQUERYI:11690case OP_POSUPTOI:11691case OP_NOTSTAR:11692case OP_NOTMINSTAR:11693case OP_NOTPLUS:11694case OP_NOTMINPLUS:11695case OP_NOTQUERY:11696case OP_NOTMINQUERY:11697case OP_NOTUPTO:11698case OP_NOTMINUPTO:11699case OP_NOTEXACT:11700case OP_NOTPOSSTAR:11701case OP_NOTPOSPLUS:11702case OP_NOTPOSQUERY:11703case OP_NOTPOSUPTO:11704case OP_NOTSTARI:11705case OP_NOTMINSTARI:11706case OP_NOTPLUSI:11707case OP_NOTMINPLUSI:11708case OP_NOTQUERYI:11709case OP_NOTMINQUERYI:11710case OP_NOTUPTOI:11711case OP_NOTMINUPTOI:11712case OP_NOTEXACTI:11713case OP_NOTPOSSTARI:11714case OP_NOTPOSPLUSI:11715case OP_NOTPOSQUERYI:11716case OP_NOTPOSUPTOI:11717case OP_TYPESTAR:11718case OP_TYPEMINSTAR:11719case OP_TYPEPLUS:11720case OP_TYPEMINPLUS:11721case OP_TYPEQUERY:11722case OP_TYPEMINQUERY:11723case OP_TYPEUPTO:11724case OP_TYPEMINUPTO:11725case OP_TYPEEXACT:11726case OP_TYPEPOSSTAR:11727case OP_TYPEPOSPLUS:11728case OP_TYPEPOSQUERY:11729case OP_TYPEPOSUPTO:11730cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);11731break;1173211733case OP_CLASS:11734case OP_NCLASS:11735if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)11736cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);11737else11738cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);11739break;1174011741#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 3211742case OP_XCLASS:11743case OP_ECLASS:11744op_len = GET(cc, 1);11745if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)11746cc = compile_iterator_matchingpath(common, cc, parent, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);11747else11748cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);11749break;11750#endif1175111752case OP_REF:11753case OP_REFI:11754op_len = PRIV(OP_lengths)[*cc];11755if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)11756cc = compile_ref_iterator_matchingpath(common, cc, parent);11757else11758{11759compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);11760cc += op_len;11761}11762break;1176311764case OP_DNREF:11765case OP_DNREFI:11766op_len = PRIV(OP_lengths)[*cc];11767if (cc[op_len] >= OP_CRSTAR && cc[op_len] <= OP_CRPOSRANGE)11768cc = compile_ref_iterator_matchingpath(common, cc, parent);11769else11770{11771compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);11772compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);11773cc += op_len;11774}11775break;1177611777case OP_RECURSE:11778cc = compile_recurse_matchingpath(common, cc, parent);11779break;1178011781case OP_CALLOUT:11782case OP_CALLOUT_STR:11783cc = compile_callout_matchingpath(common, cc, parent);11784break;1178511786case OP_ASSERT:11787case OP_ASSERT_NOT:11788case OP_ASSERTBACK:11789case OP_ASSERTBACK_NOT:11790PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);11791cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);11792break;1179311794case OP_BRAMINZERO:11795PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);11796cc = bracketend(cc + 1);11797if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)11798{11799allocate_stack(common, 1);11800OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);11801}11802else11803{11804allocate_stack(common, 2);11805OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);11806OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);11807}11808BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();11809count_match(common);11810break;1181111812case OP_ASSERT_NA:11813case OP_ASSERTBACK_NA:11814case OP_ASSERT_SCS:11815case OP_ONCE:11816case OP_SCRIPT_RUN:11817case OP_BRA:11818case OP_CBRA:11819case OP_COND:11820case OP_SBRA:11821case OP_SCBRA:11822case OP_SCOND:11823cc = compile_bracket_matchingpath(common, cc, parent);11824break;1182511826case OP_BRAZERO:11827if (cc[1] > OP_ASSERTBACK_NOT)11828cc = compile_bracket_matchingpath(common, cc, parent);11829else11830{11831PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);11832cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);11833}11834break;1183511836case OP_BRAPOS:11837case OP_CBRAPOS:11838case OP_SBRAPOS:11839case OP_SCBRAPOS:11840case OP_BRAPOSZERO:11841cc = compile_bracketpos_matchingpath(common, cc, parent);11842break;1184311844case OP_MARK:11845PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);11846SLJIT_ASSERT(common->mark_ptr != 0);11847OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);11848allocate_stack(common, common->has_skip_arg ? 5 : 1);11849if (HAS_VIRTUAL_REGISTERS)11850OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);11851OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);11852OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));11853OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);11854OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);11855if (common->has_skip_arg)11856{11857OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);11858OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);11859OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);11860OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));11861OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);11862OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);11863}11864cc += 1 + 2 + cc[1];11865break;1186611867case OP_PRUNE:11868case OP_PRUNE_ARG:11869case OP_SKIP:11870case OP_SKIP_ARG:11871case OP_THEN:11872case OP_THEN_ARG:11873case OP_COMMIT:11874case OP_COMMIT_ARG:11875cc = compile_control_verb_matchingpath(common, cc, parent);11876break;1187711878case OP_FAIL:11879case OP_ACCEPT:11880case OP_ASSERT_ACCEPT:11881cc = compile_fail_accept_matchingpath(common, cc, parent);11882break;1188311884case OP_CLOSE:11885cc = compile_close_matchingpath(common, cc);11886break;1188711888case OP_SKIPZERO:11889cc = bracketend(cc + 1);11890break;1189111892default:11893SLJIT_UNREACHABLE();11894return;11895}11896if (cc == NULL)11897return;11898}1189911900if (has_then_trap)11901{11902/* Head item on backtrack. */11903PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);11904BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;11905BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;11906common->then_trap = save_then_trap;11907}11908SLJIT_ASSERT(cc == ccend);11909}1191011911#undef PUSH_BACKTRACK11912#undef PUSH_BACKTRACK_NOVALUE11913#undef BACKTRACK_AS1191411915#define COMPILE_BACKTRACKINGPATH(current) \11916do \11917{ \11918compile_backtrackingpath(common, (current)); \11919if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \11920return; \11921} \11922while (0)1192311924#define CURRENT_AS(type) ((type *)current)1192511926static void compile_newline_move_back(compiler_common *common)11927{11928DEFINE_COMPILER;11929struct sljit_jump *jump;1193011931OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));11932jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, TMP2, 0);11933/* All newlines are single byte, or their last byte11934is not equal to CHAR_NL/CHAR_CR even if UTF is enabled. */11935OP1(MOV_UCHAR, SLJIT_TMP_DEST_REG, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));11936OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));11937OP2(SLJIT_SHL, SLJIT_TMP_DEST_REG, 0, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 8);11938OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_TMP_DEST_REG, 0);11939OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_CR << 8 | CHAR_NL);11940OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);11941#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 3211942OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);11943#endif11944OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);11945JUMPHERE(jump);11946}1194711948static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)11949{11950DEFINE_COMPILER;11951PCRE2_SPTR cc = current->cc;11952PCRE2_UCHAR opcode;11953PCRE2_UCHAR type;11954sljit_u32 max = 0, exact;11955struct sljit_label *label = NULL;11956struct sljit_jump *jump = NULL;11957jump_list *jumplist = NULL;11958PCRE2_SPTR end;11959int private_data_ptr = PRIVATE_DATA(cc);11960int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);11961int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;11962int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);1196311964cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);1196511966switch(opcode)11967{11968case OP_STAR:11969case OP_UPTO:11970if (type == OP_EXTUNI)11971{11972SLJIT_ASSERT(private_data_ptr == 0);11973set_jumps(current->own_backtracks, LABEL());11974OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));11975free_stack(common, 1);11976CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);11977}11978else11979{11980if (CURRENT_AS(char_iterator_backtrack)->charpos.charpos_enabled)11981{11982OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);11983OP1(SLJIT_MOV, TMP2, 0, base, offset1);1198411985jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);11986label = LABEL();11987if (type == OP_ANYNL)11988compile_newline_move_back(common);11989move_back(common, NULL, TRUE);1199011991OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));11992if (CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit != 0)11993OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.othercasebit);11994CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);11995/* The range beginning must match, no need to compare. */11996JUMPTO(SLJIT_JUMP, label);1199711998set_jumps(current->own_backtracks, LABEL());11999current->own_backtracks = NULL;12000}12001else12002{12003OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);1200412005if (opcode == OP_STAR && exact == 1)12006{12007if (type == OP_ANYNL)12008{12009OP1(SLJIT_MOV, TMP2, 0, base, offset1);12010compile_newline_move_back(common);12011}1201212013move_back(common, NULL, TRUE);12014jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);12015}12016else12017{12018if (type == OP_ANYNL)12019{12020OP1(SLJIT_MOV, TMP2, 0, base, offset1);12021jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);12022compile_newline_move_back(common);12023}12024else12025jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);1202612027move_back(common, NULL, TRUE);12028}1202912030OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);12031JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);1203212033set_jumps(current->own_backtracks, LABEL());12034}1203512036JUMPHERE(jump);12037if (private_data_ptr == 0)12038free_stack(common, 2);12039}12040break;1204112042case OP_QUERY:12043OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);12044OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);12045CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);12046jump = JUMP(SLJIT_JUMP);12047set_jumps(current->own_backtracks, LABEL());12048OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);12049OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);12050JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);12051JUMPHERE(jump);12052if (private_data_ptr == 0)12053free_stack(common, 1);12054break;1205512056case OP_MINSTAR:12057OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);12058if (exact == 0)12059{12060compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);12061OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);12062}12063else if (exact > 1)12064OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);1206512066JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);12067set_jumps(exact > 0 ? current->own_backtracks : jumplist, LABEL());12068if (private_data_ptr == 0)12069free_stack(common, 1);12070break;1207112072case OP_MINUPTO:12073OP1(SLJIT_MOV, TMP1, 0, base, offset1);12074OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);12075OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);1207612077if (exact == 0)12078{12079add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));1208012081OP1(SLJIT_MOV, base, offset1, TMP1, 0);12082compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);12083OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);12084JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);1208512086set_jumps(jumplist, LABEL());12087}12088else12089{12090if (exact > 1)12091OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);12092OP1(SLJIT_MOV, base, offset1, TMP1, 0);12093JUMPTO(SLJIT_NOT_ZERO, CURRENT_AS(char_iterator_backtrack)->matchingpath);1209412095set_jumps(current->own_backtracks, LABEL());12096}1209712098if (private_data_ptr == 0)12099free_stack(common, 2);12100break;1210112102case OP_MINQUERY:12103OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);12104OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);1210512106if (exact >= 1)12107{12108if (exact >= 2)12109OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);12110CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);12111set_jumps(current->own_backtracks, LABEL());12112}12113else12114{12115jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);12116compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);12117JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);12118set_jumps(jumplist, LABEL());12119JUMPHERE(jump);12120}1212112122if (private_data_ptr == 0)12123free_stack(common, 1);12124break;1212512126default:12127SLJIT_UNREACHABLE();12128break;12129}12130}1213112132static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)12133{12134DEFINE_COMPILER;12135PCRE2_SPTR cc = current->cc;12136BOOL ref = (*cc == OP_REF || *cc == OP_REFI);12137PCRE2_UCHAR type;1213812139type = cc[PRIV(OP_lengths)[*cc]];1214012141if ((type & 0x1) == 0)12142{12143/* Maximize case. */12144set_jumps(current->own_backtracks, LABEL());12145OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12146free_stack(common, 1);12147CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);12148return;12149}1215012151OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12152CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);12153set_jumps(current->own_backtracks, LABEL());12154free_stack(common, ref ? 2 : 3);12155}1215612157static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)12158{12159DEFINE_COMPILER;12160recurse_entry *entry;1216112162if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)12163{12164entry = CURRENT_AS(recurse_backtrack)->entry;12165if (entry->backtrack_label == NULL)12166add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));12167else12168JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);12169CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);12170}12171else12172compile_backtrackingpath(common, current->top);1217312174set_jumps(current->own_backtracks, LABEL());12175}1217612177static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)12178{12179DEFINE_COMPILER;12180PCRE2_SPTR cc = current->cc;12181PCRE2_UCHAR bra = OP_BRA;12182struct sljit_jump *brajump = NULL;1218312184SLJIT_ASSERT(*cc != OP_BRAMINZERO);12185if (*cc == OP_BRAZERO)12186{12187bra = *cc;12188cc++;12189}1219012191if (bra == OP_BRAZERO)12192{12193SLJIT_ASSERT(current->own_backtracks == NULL);12194OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12195}1219612197if (CURRENT_AS(assert_backtrack)->framesize < 0)12198{12199set_jumps(current->own_backtracks, LABEL());1220012201if (bra == OP_BRAZERO)12202{12203OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);12204CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);12205free_stack(common, 1);12206}12207return;12208}1220912210if (bra == OP_BRAZERO)12211{12212if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)12213{12214OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);12215CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);12216free_stack(common, 1);12217return;12218}12219free_stack(common, 1);12220brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);12221}1222212223if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)12224{12225OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);12226add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));12227OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));12228OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));12229OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);1223012231set_jumps(current->own_backtracks, LABEL());12232}12233else12234set_jumps(current->own_backtracks, LABEL());1223512236if (bra == OP_BRAZERO)12237{12238/* We know there is enough place on the stack. */12239OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));12240OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);12241JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);12242JUMPHERE(brajump);12243}12244}1224512246static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)12247{12248DEFINE_COMPILER;12249int opcode, stacksize, alt_count, alt_max;12250int offset = 0;12251int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;12252int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;12253PCRE2_SPTR cc = current->cc;12254PCRE2_SPTR ccbegin;12255PCRE2_SPTR ccprev;12256PCRE2_UCHAR bra = OP_BRA;12257PCRE2_UCHAR ket;12258const assert_backtrack *assert;12259BOOL has_alternatives;12260BOOL needs_control_head = FALSE;12261BOOL has_vreverse;12262struct sljit_jump *brazero = NULL;12263struct sljit_jump *next_alt = NULL;12264struct sljit_jump *once = NULL;12265struct sljit_jump *cond = NULL;12266struct sljit_label *rmin_label = NULL;12267struct sljit_label *exact_label = NULL;12268struct sljit_jump *mov_addr = NULL;1226912270if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)12271{12272bra = *cc;12273cc++;12274}1227512276opcode = *cc;12277ccbegin = bracketend(cc) - 1 - LINK_SIZE;12278ket = *ccbegin;12279if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)12280{12281repeat_ptr = PRIVATE_DATA(ccbegin);12282repeat_type = PRIVATE_DATA(ccbegin + 2);12283repeat_count = PRIVATE_DATA(ccbegin + 3);12284SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);12285if (repeat_type == OP_UPTO)12286ket = OP_KETRMAX;12287if (repeat_type == OP_MINUPTO)12288ket = OP_KETRMIN;12289}12290ccbegin = cc;12291cc += GET(cc, 1);12292has_alternatives = *cc == OP_ALT;12293if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))12294has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.no_capture != NULL;12295if (opcode == OP_CBRA || opcode == OP_SCBRA)12296offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;12297if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))12298opcode = OP_SCOND;1229912300alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;1230112302/* Decoding the needs_control_head in framesize. */12303if (opcode == OP_ONCE)12304{12305needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;12306CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;12307}1230812309if (ket != OP_KET && repeat_type != 0)12310{12311/* TMP1 is used in OP_KETRMIN below. */12312OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12313free_stack(common, 1);12314if (repeat_type == OP_UPTO)12315OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);12316else12317OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);12318}1231912320if (ket == OP_KETRMAX)12321{12322if (bra == OP_BRAZERO)12323{12324OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12325free_stack(common, 1);12326brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);12327}12328}12329else if (ket == OP_KETRMIN)12330{12331if (bra != OP_BRAMINZERO)12332{12333OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12334if (repeat_type != 0)12335{12336/* TMP1 was set a few lines above. */12337CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);12338/* Drop STR_PTR for non-greedy plus quantifier. */12339if (opcode != OP_ONCE)12340free_stack(common, 1);12341}12342else if (opcode >= OP_SBRA || opcode == OP_ONCE)12343{12344/* Checking zero-length iteration. */12345if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)12346CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);12347else12348{12349OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);12350CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);12351}12352/* Drop STR_PTR for non-greedy plus quantifier. */12353if (opcode != OP_ONCE)12354free_stack(common, 1);12355}12356else12357JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);12358}12359rmin_label = LABEL();12360if (repeat_type != 0)12361OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);12362}12363else if (bra == OP_BRAZERO)12364{12365OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12366free_stack(common, 1);12367brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);12368}12369else if (repeat_type == OP_EXACT)12370{12371OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);12372exact_label = LABEL();12373}1237412375if (offset != 0)12376{12377if (common->capture_last_ptr != 0)12378{12379SLJIT_ASSERT(!is_optimized_cbracket(common, offset >> 1));12380OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12381OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12382OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);12383OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));12384free_stack(common, 3);12385OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);12386OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);12387}12388else if (!is_optimized_cbracket(common, offset >> 1))12389{12390OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12391OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12392free_stack(common, 2);12393OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);12394OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);12395}12396}12397else if (SLJIT_UNLIKELY(opcode == OP_ASSERT_SCS))12398{12399OP1(SLJIT_MOV, TMP1, 0, STR_END, 0);12400OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));12401OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP1, 0);1240212403/* Nested scs blocks will not update this variable. */12404if (common->restore_end_ptr == 0)12405common->restore_end_ptr = private_data_ptr + sizeof(sljit_sw);12406}1240712408if (SLJIT_UNLIKELY(opcode == OP_ONCE))12409{12410int framesize = CURRENT_AS(bracket_backtrack)->u.framesize;1241112412SLJIT_ASSERT(framesize != 0);12413if (framesize > 0)12414{12415OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);12416add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));12417OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));12418}12419once = JUMP(SLJIT_JUMP);12420}12421else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))12422{12423if (has_alternatives)12424{12425/* Always exactly one alternative. */12426OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12427free_stack(common, 1);1242812429alt_max = 2;12430next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);12431}12432}12433else if (has_alternatives)12434{12435OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12436free_stack(common, 1);1243712438if (alt_max > 3)12439{12440sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);1244112442SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->matching_mov_addr != NULL);12443sljit_set_label(CURRENT_AS(bracket_backtrack)->matching_mov_addr, LABEL());12444sljit_emit_op0(compiler, SLJIT_ENDBR);12445}12446else12447next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);12448}1244912450COMPILE_BACKTRACKINGPATH(current->top);12451if (current->own_backtracks)12452set_jumps(current->own_backtracks, LABEL());1245312454if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))12455{12456/* Conditional block always has at most one alternative. */12457if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)12458{12459SLJIT_ASSERT(has_alternatives);12460assert = CURRENT_AS(bracket_backtrack)->u.assert;12461SLJIT_ASSERT(assert->framesize != 0);12462if (assert->framesize > 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))12463{12464OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);12465add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));12466OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));12467OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));12468OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);12469}12470cond = JUMP(SLJIT_JUMP);12471set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());12472}12473else if (CURRENT_AS(bracket_backtrack)->u.no_capture != NULL)12474{12475SLJIT_ASSERT(has_alternatives);12476cond = JUMP(SLJIT_JUMP);12477set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL());12478}12479else12480SLJIT_ASSERT(!has_alternatives);12481}1248212483if (has_alternatives)12484{12485alt_count = 1;12486do12487{12488current->top = NULL;12489current->own_backtracks = NULL;12490current->simple_backtracks = NULL;12491/* Conditional blocks always have an additional alternative, even if it is empty. */12492if (*cc == OP_ALT)12493{12494ccprev = cc + 1 + LINK_SIZE;12495cc += GET(cc, 1);1249612497has_vreverse = FALSE;1249812499switch (opcode)12500{12501case OP_ASSERTBACK:12502case OP_ASSERTBACK_NA:12503SLJIT_ASSERT(private_data_ptr != 0);12504OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);1250512506has_vreverse = (*ccprev == OP_VREVERSE);12507if (*ccprev == OP_REVERSE || has_vreverse)12508ccprev = compile_reverse_matchingpath(common, ccprev, current);12509break;12510case OP_ASSERT_SCS:12511OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));12512break;12513case OP_ONCE:12514OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));12515break;12516case OP_COND:12517case OP_SCOND:12518break;12519default:12520if (private_data_ptr != 0)12521OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);12522else12523OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12524break;12525}1252612527compile_matchingpath(common, ccprev, cc, current);12528if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))12529return;1253012531switch (opcode)12532{12533case OP_ASSERTBACK_NA:12534if (has_vreverse)12535{12536SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));12537add_jump(compiler, ¤t->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));12538}1253912540if (PRIVATE_DATA(ccbegin + 1))12541OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));12542break;12543case OP_ASSERT_NA:12544OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);12545break;12546case OP_SCRIPT_RUN:12547match_script_run_common(common, private_data_ptr, current);12548break;12549}12550}1255112552/* Instructions after the current alternative is successfully matched. */12553/* There is a similar code in compile_bracket_matchingpath. */12554if (opcode == OP_ONCE)12555match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);1255612557stacksize = 0;12558if (repeat_type == OP_MINUPTO)12559{12560/* We need to preserve the counter. TMP2 will be used below. */12561OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);12562stacksize++;12563}12564if (ket != OP_KET || bra != OP_BRA)12565stacksize++;12566if (offset != 0)12567{12568if (common->capture_last_ptr != 0)12569stacksize++;12570if (!is_optimized_cbracket(common, offset >> 1))12571stacksize += 2;12572}12573if (opcode != OP_ONCE)12574stacksize++;1257512576if (stacksize > 0)12577allocate_stack(common, stacksize);1257812579stacksize = 0;12580if (repeat_type == OP_MINUPTO)12581{12582/* TMP2 was set above. */12583OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);12584stacksize++;12585}1258612587if (ket != OP_KET || bra != OP_BRA)12588{12589if (ket != OP_KET)12590OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);12591else12592OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);12593stacksize++;12594}1259512596if (offset != 0)12597stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);1259812599if (opcode != OP_ONCE)12600{12601if (alt_max <= 3)12602OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);12603else12604mov_addr = sljit_emit_op_addr(compiler, SLJIT_MOV_ADDR, SLJIT_MEM1(STACK_TOP), STACK(stacksize));12605}1260612607if (offset != 0 && ket == OP_KETRMAX && is_optimized_cbracket(common, offset >> 1))12608{12609/* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */12610SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));12611OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);12612}1261312614JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);1261512616if (opcode != OP_ONCE)12617{12618if (alt_max <= 3)12619{12620JUMPHERE(next_alt);12621alt_count++;12622if (alt_count < alt_max)12623{12624SLJIT_ASSERT(alt_count == 2 && alt_max == 3);12625next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);12626}12627}12628else12629{12630sljit_set_label(mov_addr, LABEL());12631sljit_emit_op0(compiler, SLJIT_ENDBR);12632}12633}1263412635COMPILE_BACKTRACKINGPATH(current->top);12636if (current->own_backtracks)12637set_jumps(current->own_backtracks, LABEL());12638SLJIT_ASSERT(!current->simple_backtracks);12639}12640while (*cc == OP_ALT);1264112642if (cond != NULL)12643{12644SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);12645if (ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT)12646{12647assert = CURRENT_AS(bracket_backtrack)->u.assert;12648SLJIT_ASSERT(assert->framesize != 0);12649if (assert->framesize > 0)12650{12651OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);12652add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));12653OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));12654OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));12655OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);12656}12657}12658JUMPHERE(cond);12659}1266012661/* Free the STR_PTR. */12662if (private_data_ptr == 0)12663free_stack(common, 1);12664}1266512666if (offset != 0)12667{12668/* Using both tmp register is better for instruction scheduling. */12669if (is_optimized_cbracket(common, offset >> 1))12670{12671OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12672OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12673free_stack(common, 2);12674OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);12675OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);12676}12677else12678{12679OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12680free_stack(common, 1);12681OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);12682}12683}12684else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))12685{12686OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12687OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12688OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));12689OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);12690OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);12691free_stack(common, 4);12692}12693else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)12694{12695OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));12696free_stack(common, 1);12697}12698else if (opcode == OP_ASSERT_SCS)12699{12700OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12701OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12702OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));12703OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);12704OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);12705free_stack(common, has_alternatives ? 3 : 2);1270612707set_jumps(CURRENT_AS(bracket_backtrack)->u.no_capture, LABEL());1270812709/* Nested scs blocks will not update this variable. */12710if (common->restore_end_ptr == private_data_ptr + SSIZE_OF(sw))12711common->restore_end_ptr = 0;12712}12713else if (opcode == OP_ONCE)12714{12715cc = ccbegin + GET(ccbegin, 1);12716stacksize = needs_control_head ? 1 : 0;1271712718if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)12719{12720/* Reset head and drop saved frame. */12721stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);12722}12723else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))12724{12725/* The STR_PTR must be released. */12726stacksize++;12727}1272812729if (stacksize > 0)12730free_stack(common, stacksize);1273112732JUMPHERE(once);12733/* Restore previous private_data_ptr */12734if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)12735OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));12736else if (ket == OP_KETRMIN)12737{12738OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12739/* See the comment below. */12740free_stack(common, 2);12741OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);12742}12743}1274412745if (repeat_type == OP_EXACT)12746{12747OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);12748OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);12749CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);12750}12751else if (ket == OP_KETRMAX)12752{12753OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12754if (bra != OP_BRAZERO)12755free_stack(common, 1);1275612757CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);12758if (bra == OP_BRAZERO)12759{12760OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12761JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);12762JUMPHERE(brazero);12763free_stack(common, 1);12764}12765}12766else if (ket == OP_KETRMIN)12767{12768OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));1276912770/* OP_ONCE removes everything in case of a backtrack, so we don't12771need to explicitly release the STR_PTR. The extra release would12772affect badly the free_stack(2) above. */12773if (opcode != OP_ONCE)12774free_stack(common, 1);12775CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);12776if (opcode == OP_ONCE)12777free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);12778else if (bra == OP_BRAMINZERO)12779free_stack(common, 1);12780}12781else if (bra == OP_BRAZERO)12782{12783OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12784JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);12785JUMPHERE(brazero);12786}12787}1278812789static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)12790{12791DEFINE_COMPILER;12792int offset;12793struct sljit_jump *jump;12794PCRE2_SPTR cc;1279512796/* No retry on backtrack, just drop everything. */12797if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)12798{12799cc = current->cc;1280012801if (*cc == OP_BRAPOSZERO)12802cc++;1280312804if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)12805{12806offset = (GET2(cc, 1 + LINK_SIZE)) << 1;12807OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12808OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));12809OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);12810if (common->capture_last_ptr != 0)12811OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));12812OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);12813if (common->capture_last_ptr != 0)12814OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);12815}12816set_jumps(current->own_backtracks, LABEL());12817free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);12818return;12819}1282012821OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);12822add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));12823OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));1282412825if (current->own_backtracks)12826{12827jump = JUMP(SLJIT_JUMP);12828set_jumps(current->own_backtracks, LABEL());12829/* Drop the stack frame. */12830free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);12831JUMPHERE(jump);12832}12833OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));12834}1283512836static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)12837{12838assert_backtrack backtrack;1283912840current->top = NULL;12841current->own_backtracks = NULL;12842current->simple_backtracks = NULL;12843if (current->cc[1] > OP_ASSERTBACK_NOT)12844{12845/* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */12846compile_bracket_matchingpath(common, current->cc, current);12847if (SLJIT_UNLIKELY(sljit_get_compiler_error(common->compiler)))12848return;12849compile_bracket_backtrackingpath(common, current->top);12850}12851else12852{12853memset(&backtrack, 0, sizeof(backtrack));12854backtrack.common.cc = current->cc;12855backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;12856/* Manual call of compile_assert_matchingpath. */12857compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);12858if (SLJIT_UNLIKELY(sljit_get_compiler_error(common->compiler)))12859return;12860}12861SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks);12862}1286312864static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)12865{12866DEFINE_COMPILER;12867PCRE2_UCHAR opcode = *current->cc;12868struct sljit_label *loop;12869struct sljit_jump *jump;1287012871if (opcode == OP_THEN || opcode == OP_THEN_ARG)12872{12873if (common->then_trap != NULL)12874{12875SLJIT_ASSERT(common->control_head_ptr != 0);1287612877OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);12878OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);12879OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);12880jump = JUMP(SLJIT_JUMP);1288112882loop = LABEL();12883OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12884JUMPHERE(jump);12885CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);12886CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);12887add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));12888return;12889}12890else if (!common->local_quit_available && common->in_positive_assertion)12891{12892add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));12893return;12894}12895}1289612897if (common->restore_end_ptr != 0 && opcode != OP_SKIP_ARG)12898OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);1289912900if (common->local_quit_available)12901{12902/* Abort match with a fail. */12903if (common->quit_label == NULL)12904add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));12905else12906JUMPTO(SLJIT_JUMP, common->quit_label);12907return;12908}1290912910if (opcode == OP_SKIP_ARG)12911{12912SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);12913OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);12914OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));12915sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));1291612917if (common->restore_end_ptr == 0)12918{12919OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);12920add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));12921return;12922}1292312924jump = CMP(SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0);12925OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);12926OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);12927add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));12928JUMPHERE(jump);12929return;12930}1293112932if (opcode == OP_SKIP)12933OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12934else12935OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);12936add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));12937}1293812939static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)12940{12941DEFINE_COMPILER;12942struct sljit_jump *jump;12943struct sljit_label *label;1294412945OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));12946jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));12947skip_valid_char(common);12948OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);12949JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);1295012951label = LABEL();12952sljit_set_label(jump, label);12953set_jumps(current->own_backtracks, label);12954}1295512956static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)12957{12958DEFINE_COMPILER;12959struct sljit_jump *jump;12960int framesize;12961int size;1296212963if (CURRENT_AS(then_trap_backtrack)->then_trap)12964{12965common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;12966return;12967}1296812969size = CURRENT_AS(then_trap_backtrack)->framesize;12970size = 3 + (size < 0 ? 0 : size);1297112972OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));12973free_stack(common, size);12974jump = JUMP(SLJIT_JUMP);1297512976set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());1297712978framesize = CURRENT_AS(then_trap_backtrack)->framesize;12979SLJIT_ASSERT(framesize != 0);1298012981/* STACK_TOP is set by THEN. */12982if (framesize > 0)12983{12984add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));12985OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));12986}12987OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));12988free_stack(common, 3);1298912990JUMPHERE(jump);12991OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);12992}1299312994static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)12995{12996DEFINE_COMPILER;12997then_trap_backtrack *save_then_trap = common->then_trap;1299812999while (current)13000{13001if (current->simple_backtracks != NULL)13002set_jumps(current->simple_backtracks, LABEL());13003switch(*current->cc)13004{13005case OP_SET_SOM:13006OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));13007free_stack(common, 1);13008OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);13009break;1301013011case OP_STAR:13012case OP_MINSTAR:13013case OP_PLUS:13014case OP_MINPLUS:13015case OP_QUERY:13016case OP_MINQUERY:13017case OP_UPTO:13018case OP_MINUPTO:13019case OP_EXACT:13020case OP_POSSTAR:13021case OP_POSPLUS:13022case OP_POSQUERY:13023case OP_POSUPTO:13024case OP_STARI:13025case OP_MINSTARI:13026case OP_PLUSI:13027case OP_MINPLUSI:13028case OP_QUERYI:13029case OP_MINQUERYI:13030case OP_UPTOI:13031case OP_MINUPTOI:13032case OP_EXACTI:13033case OP_POSSTARI:13034case OP_POSPLUSI:13035case OP_POSQUERYI:13036case OP_POSUPTOI:13037case OP_NOTSTAR:13038case OP_NOTMINSTAR:13039case OP_NOTPLUS:13040case OP_NOTMINPLUS:13041case OP_NOTQUERY:13042case OP_NOTMINQUERY:13043case OP_NOTUPTO:13044case OP_NOTMINUPTO:13045case OP_NOTEXACT:13046case OP_NOTPOSSTAR:13047case OP_NOTPOSPLUS:13048case OP_NOTPOSQUERY:13049case OP_NOTPOSUPTO:13050case OP_NOTSTARI:13051case OP_NOTMINSTARI:13052case OP_NOTPLUSI:13053case OP_NOTMINPLUSI:13054case OP_NOTQUERYI:13055case OP_NOTMINQUERYI:13056case OP_NOTUPTOI:13057case OP_NOTMINUPTOI:13058case OP_NOTEXACTI:13059case OP_NOTPOSSTARI:13060case OP_NOTPOSPLUSI:13061case OP_NOTPOSQUERYI:13062case OP_NOTPOSUPTOI:13063case OP_TYPESTAR:13064case OP_TYPEMINSTAR:13065case OP_TYPEPLUS:13066case OP_TYPEMINPLUS:13067case OP_TYPEQUERY:13068case OP_TYPEMINQUERY:13069case OP_TYPEUPTO:13070case OP_TYPEMINUPTO:13071case OP_TYPEEXACT:13072case OP_TYPEPOSSTAR:13073case OP_TYPEPOSPLUS:13074case OP_TYPEPOSQUERY:13075case OP_TYPEPOSUPTO:13076/* Since classes has no backtracking path, this13077backtrackingpath was pushed by an iterator. */13078case OP_CLASS:13079case OP_NCLASS:13080#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 813081case OP_XCLASS:13082case OP_ECLASS:13083#endif13084compile_iterator_backtrackingpath(common, current);13085break;1308613087case OP_REF:13088case OP_REFI:13089case OP_DNREF:13090case OP_DNREFI:13091compile_ref_iterator_backtrackingpath(common, current);13092break;1309313094case OP_RECURSE:13095compile_recurse_backtrackingpath(common, current);13096break;1309713098case OP_ASSERT:13099case OP_ASSERT_NOT:13100case OP_ASSERTBACK:13101case OP_ASSERTBACK_NOT:13102compile_assert_backtrackingpath(common, current);13103break;1310413105case OP_ASSERT_NA:13106case OP_ASSERTBACK_NA:13107case OP_ASSERT_SCS:13108case OP_ONCE:13109case OP_SCRIPT_RUN:13110case OP_BRA:13111case OP_CBRA:13112case OP_COND:13113case OP_SBRA:13114case OP_SCBRA:13115case OP_SCOND:13116compile_bracket_backtrackingpath(common, current);13117break;1311813119case OP_BRAZERO:13120if (current->cc[1] > OP_ASSERTBACK_NOT)13121compile_bracket_backtrackingpath(common, current);13122else13123compile_assert_backtrackingpath(common, current);13124break;1312513126case OP_BRAPOS:13127case OP_CBRAPOS:13128case OP_SBRAPOS:13129case OP_SCBRAPOS:13130case OP_BRAPOSZERO:13131compile_bracketpos_backtrackingpath(common, current);13132break;1313313134case OP_BRAMINZERO:13135compile_braminzero_backtrackingpath(common, current);13136break;1313713138case OP_MARK:13139OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));13140if (common->has_skip_arg)13141OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));13142free_stack(common, common->has_skip_arg ? 5 : 1);13143OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);13144if (common->has_skip_arg)13145OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);13146break;1314713148case OP_THEN:13149case OP_THEN_ARG:13150case OP_PRUNE:13151case OP_PRUNE_ARG:13152case OP_SKIP:13153case OP_SKIP_ARG:13154compile_control_verb_backtrackingpath(common, current);13155break;1315613157case OP_COMMIT:13158case OP_COMMIT_ARG:13159if (common->restore_end_ptr != 0)13160OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->restore_end_ptr);1316113162if (!common->local_quit_available)13163OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);1316413165if (common->quit_label == NULL)13166add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));13167else13168JUMPTO(SLJIT_JUMP, common->quit_label);13169break;1317013171case OP_CALLOUT:13172case OP_CALLOUT_STR:13173case OP_FAIL:13174case OP_ACCEPT:13175case OP_ASSERT_ACCEPT:13176set_jumps(current->own_backtracks, LABEL());13177break;1317813179case OP_VREVERSE:13180compile_vreverse_backtrackingpath(common, current);13181break;1318213183case OP_THEN_TRAP:13184/* A virtual opcode for then traps. */13185compile_then_trap_backtrackingpath(common, current);13186break;1318713188default:13189SLJIT_UNREACHABLE();13190break;13191}13192current = current->prev;13193}13194common->then_trap = save_then_trap;13195}1319613197static SLJIT_INLINE void compile_recurse(compiler_common *common)13198{13199DEFINE_COMPILER;13200PCRE2_SPTR cc = common->start + common->currententry->start;13201PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);13202PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);13203uint32_t recurse_flags = 0;13204int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);13205int alt_count, alt_max, local_size;13206backtrack_common altbacktrack;13207jump_list *match = NULL;13208struct sljit_jump *next_alt = NULL;13209struct sljit_jump *accept_exit = NULL;13210struct sljit_label *quit;13211struct sljit_jump *mov_addr = NULL;1321213213/* Recurse captures then. */13214common->then_trap = NULL;1321513216SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);1321713218alt_max = no_alternatives(cc);13219alt_count = 0;1322013221/* Matching path. */13222SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);13223common->currententry->entry_label = LABEL();13224set_jumps(common->currententry->entry_calls, common->currententry->entry_label);1322513226sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0);13227count_match(common);1322813229local_size = (alt_max > 1) ? 2 : 1;1323013231/* (Reversed) stack layout:13232[private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */1323313234allocate_stack(common, private_data_size + local_size);13235/* Save return address. */13236OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);1323713238copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);1323913240/* This variable is saved and restored all time when we enter or exit from a recursive context. */13241OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);1324213243if (recurse_flags & recurse_flag_control_head_found)13244OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);1324513246if (alt_max > 1)13247OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);1324813249memset(&altbacktrack, 0, sizeof(backtrack_common));13250common->quit_label = NULL;13251common->accept_label = NULL;13252common->quit = NULL;13253common->accept = NULL;13254altbacktrack.cc = ccbegin;13255cc += GET(cc, 1);13256while (1)13257{13258altbacktrack.top = NULL;13259altbacktrack.own_backtracks = NULL;1326013261if (altbacktrack.cc != ccbegin)13262OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));1326313264compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);13265if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))13266return;1326713268allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);13269OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);1327013271if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))13272{13273if (alt_max > 3)13274mov_addr = sljit_emit_op_addr(compiler, SLJIT_MOV_ADDR, SLJIT_MEM1(STACK_TOP), STACK(1));13275else13276OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);13277}1327813279add_jump(compiler, &match, JUMP(SLJIT_JUMP));1328013281if (alt_count == 0)13282{13283/* Backtracking path entry. */13284SLJIT_ASSERT(common->currententry->backtrack_label == NULL);13285common->currententry->backtrack_label = LABEL();13286set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);1328713288sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0);1328913290if (recurse_flags & recurse_flag_accept_found)13291accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);1329213293OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));13294/* Save return address. */13295OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);1329613297copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);1329813299if (alt_max > 1)13300{13301OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));13302free_stack(common, 2);1330313304if (alt_max > 3)13305{13306sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);13307sljit_set_label(mov_addr, LABEL());13308sljit_emit_op0(compiler, SLJIT_ENDBR);13309}13310else13311next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);13312}13313else13314free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);13315}13316else if (alt_max > 3)13317{13318sljit_set_label(mov_addr, LABEL());13319sljit_emit_op0(compiler, SLJIT_ENDBR);13320}13321else13322{13323JUMPHERE(next_alt);13324if (alt_count + 1 < alt_max)13325{13326SLJIT_ASSERT(alt_count == 1 && alt_max == 3);13327next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);13328}13329}1333013331alt_count++;1333213333compile_backtrackingpath(common, altbacktrack.top);13334if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))13335return;13336set_jumps(altbacktrack.own_backtracks, LABEL());1333713338if (*cc != OP_ALT)13339break;1334013341altbacktrack.cc = cc + 1 + LINK_SIZE;13342cc += GET(cc, 1);13343}1334413345/* No alternative is matched. */1334613347quit = LABEL();1334813349copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);1335013351OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));13352free_stack(common, private_data_size + local_size);13353OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);13354OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);1335513356if (common->quit != NULL)13357{13358SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);1335913360set_jumps(common->quit, LABEL());13361OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);13362copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);13363JUMPTO(SLJIT_JUMP, quit);13364}1336513366if (recurse_flags & recurse_flag_accept_found)13367{13368JUMPHERE(accept_exit);13369free_stack(common, 2);1337013371/* Save return address. */13372OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);1337313374copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);1337513376OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));13377free_stack(common, private_data_size + local_size);13378OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);13379OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);13380}1338113382if (common->accept != NULL)13383{13384SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);1338513386set_jumps(common->accept, LABEL());1338713388OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);13389OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);1339013391allocate_stack(common, 2);13392OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);13393}1339413395set_jumps(match, LABEL());1339613397OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);1339813399copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);1340013401OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));13402OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);13403OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);13404}1340513406#undef COMPILE_BACKTRACKINGPATH13407#undef CURRENT_AS1340813409#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \13410(PCRE2_JIT_INVALID_UTF)1341113412static int jit_compile(pcre2_code *code, sljit_u32 mode)13413{13414pcre2_real_code *re = (pcre2_real_code *)code;13415struct sljit_compiler *compiler;13416backtrack_common rootbacktrack;13417compiler_common common_data;13418compiler_common *common = &common_data;13419const sljit_u8 *tables = re->tables;13420void *allocator_data = &re->memctl;13421int private_data_size;13422PCRE2_SPTR ccend;13423executable_functions *functions;13424void *executable_func;13425sljit_uw executable_size, private_data_length, total_length;13426struct sljit_label *mainloop_label = NULL;13427struct sljit_label *continue_match_label;13428struct sljit_label *empty_match_found_label = NULL;13429struct sljit_label *empty_match_backtrack_label = NULL;13430struct sljit_label *reset_match_label;13431struct sljit_label *quit_label;13432struct sljit_jump *jump;13433struct sljit_jump *minlength_check_failed = NULL;13434struct sljit_jump *empty_match = NULL;13435struct sljit_jump *end_anchor_failed = NULL;13436jump_list *reqcu_not_found = NULL;1343713438SLJIT_ASSERT(tables);1343913440#if HAS_VIRTUAL_REGISTERS == 113441SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);13442#elif HAS_VIRTUAL_REGISTERS == 013443SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);13444#else13445#error "Invalid value for HAS_VIRTUAL_REGISTERS"13446#endif1344713448memset(&rootbacktrack, 0, sizeof(backtrack_common));13449memset(common, 0, sizeof(compiler_common));13450common->re = re;13451common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));13452rootbacktrack.cc = (PCRE2_SPTR)((uint8_t *)re + re->code_start);1345313454#ifdef SUPPORT_UNICODE13455common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;13456#endif /* SUPPORT_UNICODE */13457mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;1345813459common->start = rootbacktrack.cc;13460common->read_only_data_head = NULL;13461common->fcc = tables + fcc_offset;13462common->lcc = (sljit_sw)(tables + lcc_offset);13463common->mode = mode;13464common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);13465common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);13466common->nltype = NLTYPE_FIXED;13467switch(re->newline_convention)13468{13469case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;13470case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;13471case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;13472case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;13473case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;13474case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;13475default: return PCRE2_ERROR_INTERNAL;13476}13477common->nlmax = READ_CHAR_MAX;13478common->nlmin = 0;13479if (re->bsr_convention == PCRE2_BSR_UNICODE)13480common->bsr_nltype = NLTYPE_ANY;13481else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)13482common->bsr_nltype = NLTYPE_ANYCRLF;13483else13484{13485#ifdef BSR_ANYCRLF13486common->bsr_nltype = NLTYPE_ANYCRLF;13487#else13488common->bsr_nltype = NLTYPE_ANY;13489#endif13490}13491common->bsr_nlmax = READ_CHAR_MAX;13492common->bsr_nlmin = 0;13493common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;13494common->ctypes = (sljit_sw)(tables + ctypes_offset);13495common->name_count = re->name_count;13496common->name_entry_size = re->name_entry_size;13497common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;13498common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;13499#ifdef SUPPORT_UNICODE13500/* PCRE2_UTF[16|32] have the same value as PCRE2_UTF8. */13501common->utf = (re->overall_options & PCRE2_UTF) != 0;13502common->ucp = (re->overall_options & PCRE2_UCP) != 0;13503if (common->utf)13504{13505if (common->nltype == NLTYPE_ANY)13506common->nlmax = 0x2029;13507else if (common->nltype == NLTYPE_ANYCRLF)13508common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;13509else13510{13511/* We only care about the first newline character. */13512common->nlmax = common->newline & 0xff;13513}1351413515if (common->nltype == NLTYPE_FIXED)13516common->nlmin = common->newline & 0xff;13517else13518common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;1351913520if (common->bsr_nltype == NLTYPE_ANY)13521common->bsr_nlmax = 0x2029;13522else13523common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;13524common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;13525}13526else13527common->invalid_utf = FALSE;13528#endif /* SUPPORT_UNICODE */13529ccend = bracketend(common->start);1353013531/* Calculate the local space size on the stack. */13532common->ovector_start = LOCAL0;13533/* Allocate space for temporary data structures. */13534private_data_length = ccend - common->start;13535/* The chance of overflow is very low, but might happen on 32 bit. */13536if (private_data_length > ~(sljit_uw)0 / sizeof(sljit_s32))13537return PCRE2_ERROR_NOMEMORY;1353813539private_data_length *= sizeof(sljit_s32);13540/* Align to 32 bit. */13541common->cbracket_bitset_length = ((re->top_bracket + 1) + (sljit_u32)7) & ~(sljit_u32)7;13542total_length = common->cbracket_bitset_length << 1;13543if (~(sljit_uw)0 - private_data_length < total_length)13544return PCRE2_ERROR_NOMEMORY;1354513546total_length += private_data_length;13547common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length, allocator_data);13548if (!common->private_data_ptrs)13549return PCRE2_ERROR_NOMEMORY;1355013551memset(common->private_data_ptrs, 0, private_data_length);13552common->optimized_cbrackets = ((sljit_u8 *)common->private_data_ptrs) + private_data_length;13553#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 113554memset(common->optimized_cbrackets, 0, common->cbracket_bitset_length);13555#else13556memset(common->optimized_cbrackets, 0xff, common->cbracket_bitset_length);13557#endif13558common->cbracket_bitset = common->optimized_cbrackets + common->cbracket_bitset_length;1355913560SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);13561#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 213562common->capture_last_ptr = common->ovector_start;13563common->ovector_start += sizeof(sljit_sw);13564#endif13565if (!check_opcode_types(common, common->start, ccend))13566{13567SLJIT_FREE(common->private_data_ptrs, allocator_data);13568return PCRE2_ERROR_JIT_UNSUPPORTED;13569}1357013571/* Checking flags and updating ovector_start. */13572if (mode == PCRE2_JIT_COMPLETE &&13573(re->flags & PCRE2_LASTSET) != 0 &&13574(re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)13575{13576common->req_char_ptr = common->ovector_start;13577common->ovector_start += sizeof(sljit_sw);13578}1357913580if (mode != PCRE2_JIT_COMPLETE)13581{13582common->start_used_ptr = common->ovector_start;13583common->ovector_start += sizeof(sljit_sw);13584if (mode == PCRE2_JIT_PARTIAL_SOFT)13585{13586common->hit_start = common->ovector_start;13587common->ovector_start += sizeof(sljit_sw);13588}13589}1359013591if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)13592{13593common->match_end_ptr = common->ovector_start;13594common->ovector_start += sizeof(sljit_sw);13595}1359613597#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD13598common->control_head_ptr = 1;13599#endif1360013601if (common->control_head_ptr != 0)13602{13603common->control_head_ptr = common->ovector_start;13604common->ovector_start += sizeof(sljit_sw);13605}1360613607if (common->has_set_som)13608{13609/* Saving the real start pointer is necessary. */13610common->start_ptr = common->ovector_start;13611common->ovector_start += sizeof(sljit_sw);13612}1361313614/* Aligning ovector to even number of sljit words. */13615if ((common->ovector_start & sizeof(sljit_sw)) != 0)13616common->ovector_start += sizeof(sljit_sw);1361713618if (common->start_ptr == 0)13619common->start_ptr = OVECTOR(0);1362013621/* Capturing brackets cannot be optimized if callouts are allowed. */13622if (common->capture_last_ptr != 0)13623memset(common->optimized_cbrackets, 0, common->cbracket_bitset_length);1362413625SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));13626common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);13627private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);1362813629if ((re->overall_options & PCRE2_ANCHORED) == 0 &&13630(re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 &&13631!common->has_skip_in_assert_back)13632detect_early_fail(common, common->start, &private_data_size, 0, 0);1363313634set_private_data_ptrs(common, &private_data_size, ccend);1363513636SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);1363713638if (private_data_size > 65536)13639{13640SLJIT_FREE(common->private_data_ptrs, allocator_data);13641return PCRE2_ERROR_JIT_UNSUPPORTED;13642}1364313644if (common->has_then)13645{13646total_length = ccend - common->start;13647common->then_offsets = (sljit_u8 *)SLJIT_MALLOC(total_length, allocator_data);13648if (!common->then_offsets)13649{13650SLJIT_FREE(common->private_data_ptrs, allocator_data);13651return PCRE2_ERROR_NOMEMORY;13652}13653memset(common->then_offsets, 0, total_length);13654set_then_offsets(common, common->start, NULL);13655}1365613657compiler = sljit_create_compiler(allocator_data);13658if (!compiler)13659{13660SLJIT_FREE(common->private_data_ptrs, allocator_data);13661if (common->has_then)13662SLJIT_FREE(common->then_offsets, allocator_data);13663return PCRE2_ERROR_NOMEMORY;13664}13665common->compiler = compiler;1366613667/* Main pcre2_jit_exec entry. */13668SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);13669sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS), 5, private_data_size);1367013671/* Register init. */13672reset_ovector(common, (re->top_bracket + 1) * 2);13673if (common->req_char_ptr != 0)13674OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);1367513676OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);13677OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);13678OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));13679OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));13680OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));13681OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));13682OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));13683OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));13684OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);13685OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);1368613687if (common->early_fail_start_ptr < common->early_fail_end_ptr)13688reset_early_fail(common);1368913690if (mode == PCRE2_JIT_PARTIAL_SOFT)13691OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);13692if (common->mark_ptr != 0)13693OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);13694if (common->control_head_ptr != 0)13695OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);1369613697/* Main part of the matching */13698if ((re->overall_options & PCRE2_ANCHORED) == 0)13699{13700mainloop_label = mainloop_entry(common);13701continue_match_label = LABEL();13702/* Forward search if possible. */13703if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)13704{13705if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))13706;13707else if ((re->flags & PCRE2_FIRSTSET) != 0)13708fast_forward_first_char(common);13709else if ((re->flags & PCRE2_STARTLINE) != 0)13710fast_forward_newline(common);13711else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)13712fast_forward_start_bits(common);13713}13714}13715else13716continue_match_label = LABEL();1371713718if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 &&13719(re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)13720{13721OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);13722OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));13723minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);13724}13725if (common->req_char_ptr != 0)13726reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);1372713728/* Store the current STR_PTR in OVECTOR(0). */13729OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);13730/* Copy the limit of allowed recursions. */13731OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);13732if (common->capture_last_ptr != 0)13733OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);13734if (common->fast_forward_bc_ptr != NULL)13735OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);1373613737if (common->start_ptr != OVECTOR(0))13738OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);1373913740/* Copy the beginning of the string. */13741if (mode == PCRE2_JIT_PARTIAL_SOFT)13742{13743jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);13744OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);13745JUMPHERE(jump);13746}13747else if (mode == PCRE2_JIT_PARTIAL_HARD)13748OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);1374913750compile_matchingpath(common, common->start, ccend, &rootbacktrack);13751if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))13752{13753sljit_free_compiler(compiler);13754SLJIT_FREE(common->private_data_ptrs, allocator_data);13755if (common->has_then)13756SLJIT_FREE(common->then_offsets, allocator_data);13757PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);13758return PCRE2_ERROR_NOMEMORY;13759}1376013761if ((re->overall_options & PCRE2_ENDANCHORED) != 0)13762end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);1376313764if (common->might_be_empty)13765{13766empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));13767empty_match_found_label = LABEL();13768}1376913770common->accept_label = LABEL();13771if (common->accept != NULL)13772set_jumps(common->accept, common->accept_label);1377313774/* Fail if we detect that the start position was moved to be either after13775the end position (\K in lookahead) or before the start offset (\K in13776lookbehind). */1377713778if (common->has_set_som &&13779(common->re->extra_options & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) == 0)13780{13781if (HAS_VIRTUAL_REGISTERS)13782{13783OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);13784OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));13785}13786else13787{13788OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));13789}13790OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));1379113792/* (ovector[0] < jit_arguments->str)? */13793OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, TMP1, 0);13794/* Unconditionally set R0 (aka TMP1), in between the comparison that needs to13795use TMP1, but before the jump. */13796OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_BAD_BACKSLASH_K);13797add_jump(compiler, &common->abort, JUMP(SLJIT_LESS));13798/* (ovector[0] > STR_PTR)? NB. ovector[1] hasn't yet been set to STR_PTR. */13799add_jump(compiler, &common->abort, CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0));13800}1380113802/* This means we have a match. Update the ovector. */13803copy_ovector(common, re->top_bracket + 1);13804common->quit_label = common->abort_label = LABEL();13805if (common->quit != NULL)13806set_jumps(common->quit, common->quit_label);13807if (common->abort != NULL)13808set_jumps(common->abort, common->abort_label);13809if (minlength_check_failed != NULL)13810SET_LABEL(minlength_check_failed, common->abort_label);1381113812sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);13813sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);1381413815if (common->failed_match != NULL)13816{13817SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);13818set_jumps(common->failed_match, LABEL());13819OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);13820JUMPTO(SLJIT_JUMP, common->abort_label);13821}1382213823if ((re->overall_options & PCRE2_ENDANCHORED) != 0)13824JUMPHERE(end_anchor_failed);1382513826if (mode != PCRE2_JIT_COMPLETE)13827{13828common->partialmatchlabel = LABEL();13829set_jumps(common->partialmatch, common->partialmatchlabel);13830return_with_partial_match(common, common->quit_label);13831}1383213833if (common->might_be_empty)13834empty_match_backtrack_label = LABEL();13835compile_backtrackingpath(common, rootbacktrack.top);13836if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))13837{13838sljit_free_compiler(compiler);13839SLJIT_FREE(common->private_data_ptrs, allocator_data);13840if (common->has_then)13841SLJIT_FREE(common->then_offsets, allocator_data);13842PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);13843return PCRE2_ERROR_NOMEMORY;13844}1384513846SLJIT_ASSERT(rootbacktrack.prev == NULL);13847reset_match_label = LABEL();1384813849if (mode == PCRE2_JIT_PARTIAL_SOFT)13850{13851/* Update hit_start only in the first time. */13852jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);13853OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);13854OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);13855OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);13856JUMPHERE(jump);13857}1385813859/* Check we have remaining characters. */13860if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)13861{13862OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);13863}1386413865OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),13866(common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);1386713868if ((re->overall_options & PCRE2_ANCHORED) == 0)13869{13870if (common->ff_newline_shortcut != NULL)13871{13872/* There cannot be more newlines if PCRE2_FIRSTLINE is set. */13873if ((re->overall_options & PCRE2_FIRSTLINE) == 0)13874{13875if (common->match_end_ptr != 0)13876{13877OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);13878OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);13879CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);13880OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);13881}13882else13883CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);13884}13885}13886else13887CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);13888}1388913890/* No more remaining characters. */13891if (reqcu_not_found != NULL)13892set_jumps(reqcu_not_found, LABEL());1389313894if (mode == PCRE2_JIT_PARTIAL_SOFT)13895CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);1389613897OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);13898JUMPTO(SLJIT_JUMP, common->quit_label);1389913900flush_stubs(common);1390113902if (common->might_be_empty)13903{13904JUMPHERE(empty_match);13905OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);13906OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));13907OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);13908JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);13909OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);13910JUMPTO(SLJIT_ZERO, empty_match_found_label);13911OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));13912CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);13913JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);13914}1391513916common->fast_forward_bc_ptr = NULL;13917common->early_fail_start_ptr = 0;13918common->early_fail_end_ptr = 0;13919common->currententry = common->entries;13920common->local_quit_available = TRUE;13921quit_label = common->quit_label;13922SLJIT_ASSERT(common->restore_end_ptr == 0);1392313924if (common->currententry != NULL)13925{13926/* A free bit for each private data. */13927common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;13928SLJIT_ASSERT(common->recurse_bitset_size > 0);13929common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;1393013931if (common->recurse_bitset != NULL)13932{13933do13934{13935/* Might add new entries. */13936compile_recurse(common);13937if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))13938break;13939flush_stubs(common);13940common->currententry = common->currententry->next;13941}13942while (common->currententry != NULL);1394313944SLJIT_FREE(common->recurse_bitset, allocator_data);13945}1394613947if (common->currententry != NULL)13948{13949/* The common->recurse_bitset has been freed. */13950SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);1395113952sljit_free_compiler(compiler);13953SLJIT_FREE(common->private_data_ptrs, allocator_data);13954if (common->has_then)13955SLJIT_FREE(common->then_offsets, allocator_data);13956PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);13957return PCRE2_ERROR_NOMEMORY;13958}13959}1396013961common->local_quit_available = FALSE;13962common->quit_label = quit_label;13963SLJIT_ASSERT(common->restore_end_ptr == 0);1396413965/* Allocating stack, returns with PCRE2_ERROR_JIT_STACKLIMIT if fails. */13966/* This is a (really) rare case. */13967set_jumps(common->stackalloc, LABEL());13968/* RETURN_ADDR is not a saved register. */13969SLJIT_ASSERT(common->locals_size >= 2 * SSIZE_OF(sw));13970sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCAL0);1397113972SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);1397313974OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, STR_PTR, 0);13975OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);13976OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);13977OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));13978OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);1397913980sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));1398113982jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);13983OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);13984OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);13985OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);13986OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);13987OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);1398813989/* Allocation failed. */13990JUMPHERE(jump);13991/* We break the return address cache here, but this is a really rare case. */13992OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);13993JUMPTO(SLJIT_JUMP, common->quit_label);1399413995/* Call limit reached. */13996set_jumps(common->calllimit, LABEL());13997OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);13998JUMPTO(SLJIT_JUMP, common->quit_label);1399914000if (common->revertframes != NULL)14001{14002set_jumps(common->revertframes, LABEL());14003do_revertframes(common);14004}14005if (common->wordboundary != NULL)14006{14007set_jumps(common->wordboundary, LABEL());14008check_wordboundary(common, FALSE);14009}14010if (common->ucp_wordboundary != NULL)14011{14012set_jumps(common->ucp_wordboundary, LABEL());14013check_wordboundary(common, TRUE);14014}14015if (common->anynewline != NULL)14016{14017set_jumps(common->anynewline, LABEL());14018check_anynewline(common);14019}14020if (common->hspace != NULL)14021{14022set_jumps(common->hspace, LABEL());14023check_hspace(common);14024}14025if (common->vspace != NULL)14026{14027set_jumps(common->vspace, LABEL());14028check_vspace(common);14029}14030if (common->casefulcmp != NULL)14031{14032set_jumps(common->casefulcmp, LABEL());14033do_casefulcmp(common);14034}14035if (common->caselesscmp != NULL)14036{14037set_jumps(common->caselesscmp, LABEL());14038do_caselesscmp(common);14039}14040if (common->reset_match != NULL || common->restart_match != NULL)14041{14042if (common->restart_match != NULL)14043{14044set_jumps(common->restart_match, LABEL());14045OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);14046}1404714048set_jumps(common->reset_match, LABEL());14049do_reset_match(common, (re->top_bracket + 1) * 2);14050/* The value of restart_match is in TMP1. */14051CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);14052OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);14053JUMPTO(SLJIT_JUMP, reset_match_label);14054}14055#ifdef SUPPORT_UNICODE14056#if PCRE2_CODE_UNIT_WIDTH == 814057if (common->utfreadchar != NULL)14058{14059set_jumps(common->utfreadchar, LABEL());14060do_utfreadchar(common);14061}14062if (common->utfreadtype8 != NULL)14063{14064set_jumps(common->utfreadtype8, LABEL());14065do_utfreadtype8(common);14066}14067if (common->utfpeakcharback != NULL)14068{14069set_jumps(common->utfpeakcharback, LABEL());14070do_utfpeakcharback(common);14071}14072#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */14073#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 1614074if (common->utfreadchar_invalid != NULL)14075{14076set_jumps(common->utfreadchar_invalid, LABEL());14077do_utfreadchar_invalid(common);14078}14079if (common->utfreadnewline_invalid != NULL)14080{14081set_jumps(common->utfreadnewline_invalid, LABEL());14082do_utfreadnewline_invalid(common);14083}14084if (common->utfmoveback_invalid)14085{14086set_jumps(common->utfmoveback_invalid, LABEL());14087do_utfmoveback_invalid(common);14088}14089if (common->utfpeakcharback_invalid)14090{14091set_jumps(common->utfpeakcharback_invalid, LABEL());14092do_utfpeakcharback_invalid(common);14093}14094#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */14095if (common->getucd != NULL)14096{14097set_jumps(common->getucd, LABEL());14098do_getucd(common);14099}14100if (common->getucdtype != NULL)14101{14102set_jumps(common->getucdtype, LABEL());14103do_getucdtype(common);14104}14105#endif /* SUPPORT_UNICODE */1410614107SLJIT_FREE(common->private_data_ptrs, allocator_data);14108if (common->has_then)14109SLJIT_FREE(common->then_offsets, allocator_data);1411014111executable_func = sljit_generate_code(compiler, 0, NULL);14112executable_size = sljit_get_generated_code_size(compiler);14113sljit_free_compiler(compiler);1411414115if (executable_func == NULL)14116{14117PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);14118return PCRE2_ERROR_NOMEMORY;14119}1412014121/* Reuse the function descriptor if possible. */14122if (re->executable_jit != NULL)14123functions = (executable_functions *)re->executable_jit;14124else14125{14126functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);14127if (functions == NULL)14128{14129/* This case is highly unlikely since we just recently14130freed a lot of memory. Not impossible though. */14131sljit_free_code(executable_func, NULL);14132PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);14133return PCRE2_ERROR_NOMEMORY;14134}14135memset(functions, 0, sizeof(executable_functions));14136functions->top_bracket = re->top_bracket + 1;14137functions->limit_match = re->limit_match;14138re->executable_jit = functions;14139}1414014141/* Turn mode into an index. */14142if (mode == PCRE2_JIT_COMPLETE)14143mode = 0;14144else14145mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;1414614147SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);14148functions->executable_funcs[mode] = executable_func;14149functions->read_only_data_heads[mode] = common->read_only_data_head;14150functions->executable_sizes[mode] = executable_size;14151return 0;14152}1415314154#endif1415514156/*************************************************14157* JIT compile a Regular Expression *14158*************************************************/1415914160/* This function used JIT to convert a previously-compiled pattern into machine14161code.1416214163Arguments:14164code a compiled pattern14165options JIT option bits1416614167Returns: 0: success or (*NOJIT) was used14168<0: an error code14169*/1417014171#define PUBLIC_JIT_COMPILE_OPTIONS \14172(PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)1417314174PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION14175pcre2_jit_compile(pcre2_code *code, uint32_t options)14176{14177pcre2_real_code *re = (pcre2_real_code *)code;14178#ifdef SUPPORT_JIT14179void *exec_memory;14180executable_functions *functions;14181static int executable_allocator_is_working = -1;1418214183if (executable_allocator_is_working == -1)14184{14185/* Checks whether the executable allocator is working. This check14186might run multiple times in multi-threaded environments, but the14187result should not be affected by it. */14188exec_memory = SLJIT_MALLOC_EXEC(32, NULL);14189if (exec_memory != NULL)14190{14191SLJIT_FREE_EXEC(((sljit_u8*)(exec_memory)) + SLJIT_EXEC_OFFSET(exec_memory), NULL);14192executable_allocator_is_working = 1;14193}14194else executable_allocator_is_working = 0;14195}14196#endif1419714198if (options & PCRE2_JIT_TEST_ALLOC)14199{14200if (options != PCRE2_JIT_TEST_ALLOC)14201return PCRE2_ERROR_JIT_BADOPTION;1420214203#ifdef SUPPORT_JIT14204return executable_allocator_is_working ? 0 : PCRE2_ERROR_NOMEMORY;14205#else14206return PCRE2_ERROR_JIT_UNSUPPORTED;14207#endif14208}1420914210if (code == NULL)14211return PCRE2_ERROR_NULL;1421214213if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)14214return PCRE2_ERROR_JIT_BADOPTION;1421514216/* Support for invalid UTF was first introduced in JIT, with the option14217PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the14218compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the14219preferred feature, with the earlier option deprecated. However, for backward14220compatibility, if the earlier option is set, it forces the new option so that14221if JIT matching falls back to the interpreter, there is still support for14222invalid UTF. However, if this function has already been successfully called14223without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that14224non-invalid-supporting JIT code was compiled), give an error.1422514226If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following14227actions are needed:14228142291. Remove the definition from pcre2.h.in and from the list in14230PUBLIC_JIT_COMPILE_OPTIONS above.14231142322. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.14233142343. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.14235142364. Delete the following short block of code. The setting of "re" and14237"functions" can be moved into the JIT-only block below, but if that is14238done, (void)re and (void)functions will be needed in the non-JIT case, to14239avoid compiler warnings.14240*/1424114242#ifdef SUPPORT_JIT14243functions = (executable_functions *)re->executable_jit;14244#endif1424514246if ((options & PCRE2_JIT_INVALID_UTF) != 0)14247{14248if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)14249{14250#ifdef SUPPORT_JIT14251if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;14252#endif14253re->overall_options |= PCRE2_MATCH_INVALID_UTF;14254}14255}1425614257/* The above tests are run with and without JIT support. This means that14258PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring14259interpreter support) even in the absence of JIT. But now, if there is no JIT14260support, give an error return. */1426114262#ifndef SUPPORT_JIT14263return PCRE2_ERROR_JIT_BADOPTION;14264#else /* SUPPORT_JIT */1426514266/* There is JIT support. Do the necessary. */1426714268if ((re->flags & PCRE2_NOJIT) != 0) return 0;1426914270if (!executable_allocator_is_working)14271return PCRE2_ERROR_NOMEMORY;1427214273if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)14274options |= PCRE2_JIT_INVALID_UTF;1427514276if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL14277|| functions->executable_funcs[0] == NULL)) {14278uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);14279int result = jit_compile(code, options & ~excluded_options);14280if (result != 0)14281return result;14282}1428314284if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL14285|| functions->executable_funcs[1] == NULL)) {14286uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);14287int result = jit_compile(code, options & ~excluded_options);14288if (result != 0)14289return result;14290}1429114292if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL14293|| functions->executable_funcs[2] == NULL)) {14294uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);14295int result = jit_compile(code, options & ~excluded_options);14296if (result != 0)14297return result;14298}1429914300return 0;1430114302#endif /* SUPPORT_JIT */14303}1430414305/* JIT compiler uses an all-in-one approach. This improves security,14306since the code generator functions are not exported. */1430714308#define INCLUDED_FROM_PCRE2_JIT_COMPILE1430914310#include "pcre2_jit_match_inc.h"14311#include "pcre2_jit_misc_inc.h"1431214313/* End of pcre2_jit_compile.c */143141431514316