Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/src/pcre2_jit_simd_inc.h
9898 views
1
/*************************************************
2
* Perl-Compatible Regular Expressions *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
Written by Philip Hazel
9
This module by Zoltan Herczeg
10
Original API code Copyright (c) 1997-2012 University of Cambridge
11
New API code Copyright (c) 2016-2019 University of Cambridge
12
13
-----------------------------------------------------------------------------
14
Redistribution and use in source and binary forms, with or without
15
modification, are permitted provided that the following conditions are met:
16
17
* Redistributions of source code must retain the above copyright notice,
18
this list of conditions and the following disclaimer.
19
20
* Redistributions in binary form must reproduce the above copyright
21
notice, this list of conditions and the following disclaimer in the
22
documentation and/or other materials provided with the distribution.
23
24
* Neither the name of the University of Cambridge nor the names of its
25
contributors may be used to endorse or promote products derived from
26
this software without specific prior written permission.
27
28
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
-----------------------------------------------------------------------------
40
*/
41
42
#if !(defined SUPPORT_VALGRIND)
43
44
#if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
45
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
46
|| (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64))
47
48
typedef enum {
49
vector_compare_match1,
50
vector_compare_match1i,
51
vector_compare_match2,
52
} vector_compare_type;
53
54
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
55
static SLJIT_INLINE sljit_s32 max_fast_forward_char_pair_offset(void)
56
{
57
#if PCRE2_CODE_UNIT_WIDTH == 8
58
/* The AVX2 code path is currently disabled. */
59
/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 31 : 15; */
60
return 15;
61
#elif PCRE2_CODE_UNIT_WIDTH == 16
62
/* The AVX2 code path is currently disabled. */
63
/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 15 : 7; */
64
return 7;
65
#elif PCRE2_CODE_UNIT_WIDTH == 32
66
/* The AVX2 code path is currently disabled. */
67
/* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 7 : 3; */
68
return 3;
69
#else
70
#error "Unsupported unit width"
71
#endif
72
}
73
#else /* !SLJIT_CONFIG_X86 */
74
static SLJIT_INLINE sljit_s32 max_fast_forward_char_pair_offset(void)
75
{
76
#if PCRE2_CODE_UNIT_WIDTH == 8
77
return 15;
78
#elif PCRE2_CODE_UNIT_WIDTH == 16
79
return 7;
80
#elif PCRE2_CODE_UNIT_WIDTH == 32
81
return 3;
82
#else
83
#error "Unsupported unit width"
84
#endif
85
}
86
#endif /* SLJIT_CONFIG_X86 */
87
88
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
89
static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
90
{
91
#if PCRE2_CODE_UNIT_WIDTH == 8
92
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
93
return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
94
#elif PCRE2_CODE_UNIT_WIDTH == 16
95
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
96
return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
97
#else
98
#error "Unknown code width"
99
#endif
100
}
101
#endif
102
103
#endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */
104
105
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
106
107
static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
108
{
109
sljit_u32 value = chr;
110
#if PCRE2_CODE_UNIT_WIDTH == 8
111
#define SIMD_COMPARE_TYPE_INDEX 0
112
return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value);
113
#elif PCRE2_CODE_UNIT_WIDTH == 16
114
#define SIMD_COMPARE_TYPE_INDEX 1
115
return (sljit_s32)((value << 16) | value);
116
#elif PCRE2_CODE_UNIT_WIDTH == 32
117
#define SIMD_COMPARE_TYPE_INDEX 2
118
return (sljit_s32)(value);
119
#else
120
#error "Unsupported unit width"
121
#endif
122
}
123
124
static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
125
sljit_s32 reg_type, int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
126
{
127
sljit_u8 instruction[4];
128
129
if (reg_type == SLJIT_SIMD_REG_128)
130
{
131
instruction[0] = 0x66;
132
instruction[1] = 0x0f;
133
}
134
else
135
{
136
/* Two byte VEX prefix. */
137
instruction[0] = 0xc5;
138
instruction[1] = 0xfd;
139
}
140
141
SLJIT_ASSERT(step >= 0 && step <= 3);
142
143
if (compare_type != vector_compare_match2)
144
{
145
if (step == 0)
146
{
147
if (compare_type == vector_compare_match1i)
148
{
149
/* POR xmm1, xmm2/m128 */
150
if (reg_type == SLJIT_SIMD_REG_256)
151
instruction[1] ^= (dst_ind << 3);
152
153
/* Prefix is filled. */
154
instruction[2] = 0xeb;
155
instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
156
sljit_emit_op_custom(compiler, instruction, 4);
157
}
158
return;
159
}
160
161
if (step != 2)
162
return;
163
164
/* PCMPEQB/W/D xmm1, xmm2/m128 */
165
if (reg_type == SLJIT_SIMD_REG_256)
166
instruction[1] ^= (dst_ind << 3);
167
168
/* Prefix is filled. */
169
instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
170
instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
171
sljit_emit_op_custom(compiler, instruction, 4);
172
return;
173
}
174
175
if (reg_type == SLJIT_SIMD_REG_256)
176
{
177
if (step == 2)
178
return;
179
180
if (step == 0)
181
{
182
step = 2;
183
instruction[1] ^= (dst_ind << 3);
184
}
185
}
186
187
switch (step)
188
{
189
case 0:
190
SLJIT_ASSERT(reg_type == SLJIT_SIMD_REG_128);
191
192
/* MOVDQA xmm1, xmm2/m128 */
193
/* Prefix is filled. */
194
instruction[2] = 0x6f;
195
instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
196
sljit_emit_op_custom(compiler, instruction, 4);
197
return;
198
199
case 1:
200
/* PCMPEQB/W/D xmm1, xmm2/m128 */
201
if (reg_type == SLJIT_SIMD_REG_256)
202
instruction[1] ^= (dst_ind << 3);
203
204
/* Prefix is filled. */
205
instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
206
instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
207
sljit_emit_op_custom(compiler, instruction, 4);
208
return;
209
210
case 2:
211
/* PCMPEQB/W/D xmm1, xmm2/m128 */
212
/* Prefix is filled. */
213
instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
214
instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
215
sljit_emit_op_custom(compiler, instruction, 4);
216
return;
217
218
case 3:
219
/* POR xmm1, xmm2/m128 */
220
if (reg_type == SLJIT_SIMD_REG_256)
221
instruction[1] ^= (dst_ind << 3);
222
223
/* Prefix is filled. */
224
instruction[2] = 0xeb;
225
instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
226
sljit_emit_op_custom(compiler, instruction, 4);
227
return;
228
}
229
}
230
231
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
232
233
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
234
{
235
DEFINE_COMPILER;
236
sljit_u8 instruction[8];
237
/* The AVX2 code path is currently disabled. */
238
/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
239
sljit_s32 reg_type = SLJIT_SIMD_REG_128;
240
sljit_s32 value;
241
struct sljit_label *start;
242
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
243
struct sljit_label *restart;
244
#endif
245
struct sljit_jump *quit;
246
struct sljit_jump *partial_quit[2];
247
vector_compare_type compare_type = vector_compare_match1;
248
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
249
sljit_s32 data_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
250
sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
251
sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
252
sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
253
sljit_u32 bit = 0;
254
int i;
255
256
SLJIT_UNUSED_ARG(offset);
257
258
if (char1 != char2)
259
{
260
bit = char1 ^ char2;
261
compare_type = vector_compare_match1i;
262
263
if (!is_powerof2(bit))
264
{
265
bit = 0;
266
compare_type = vector_compare_match2;
267
}
268
}
269
270
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
271
if (common->mode == PCRE2_JIT_COMPLETE)
272
add_jump(compiler, &common->failed_match, partial_quit[0]);
273
274
/* First part (unaligned start) */
275
value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
276
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
277
278
if (char1 != char2)
279
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
280
281
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
282
283
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR1, SLJIT_VR1, 0);
284
285
if (char1 != char2)
286
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
287
288
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
289
restart = LABEL();
290
#endif
291
292
value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
293
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value);
294
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
295
296
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
297
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
298
299
for (i = 0; i < 4; i++)
300
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
301
302
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
303
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
304
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
305
306
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
307
308
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
309
310
/* Second part (aligned) */
311
start = LABEL();
312
313
value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
314
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
315
316
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
317
if (common->mode == PCRE2_JIT_COMPLETE)
318
add_jump(compiler, &common->failed_match, partial_quit[1]);
319
320
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
321
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
322
for (i = 0; i < 4; i++)
323
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
324
325
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
326
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
327
328
JUMPHERE(quit);
329
330
SLJIT_ASSERT(tmp1_reg_ind < 8);
331
/* BSF r32, r/m32 */
332
instruction[0] = 0x0f;
333
instruction[1] = 0xbc;
334
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
335
sljit_emit_op_custom(compiler, instruction, 3);
336
337
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
338
339
if (common->mode != PCRE2_JIT_COMPLETE)
340
{
341
JUMPHERE(partial_quit[0]);
342
JUMPHERE(partial_quit[1]);
343
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
344
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
345
}
346
else
347
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
348
349
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
350
if (common->utf && offset > 0)
351
{
352
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
353
354
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
355
356
quit = jump_if_utf_char_start(compiler, TMP1);
357
358
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
359
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
360
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
361
JUMPTO(SLJIT_JUMP, restart);
362
363
JUMPHERE(quit);
364
}
365
#endif
366
}
367
368
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
369
370
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
371
{
372
DEFINE_COMPILER;
373
sljit_u8 instruction[8];
374
/* The AVX2 code path is currently disabled. */
375
/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
376
sljit_s32 reg_type = SLJIT_SIMD_REG_128;
377
sljit_s32 value;
378
struct sljit_label *start;
379
struct sljit_jump *quit;
380
jump_list *not_found = NULL;
381
vector_compare_type compare_type = vector_compare_match1;
382
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
383
sljit_s32 data_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
384
sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
385
sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
386
sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
387
sljit_u32 bit = 0;
388
int i;
389
390
if (char1 != char2)
391
{
392
bit = char1 ^ char2;
393
compare_type = vector_compare_match1i;
394
395
if (!is_powerof2(bit))
396
{
397
bit = 0;
398
compare_type = vector_compare_match2;
399
}
400
}
401
402
add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
403
OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
404
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
405
406
/* First part (unaligned start) */
407
408
value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
409
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
410
411
if (char1 != char2)
412
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
413
414
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
415
416
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR1, SLJIT_VR1, 0);
417
418
if (char1 != char2)
419
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
420
421
value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
422
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value);
423
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
424
425
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
426
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
427
428
for (i = 0; i < 4; i++)
429
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
430
431
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
432
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
433
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
434
435
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
436
437
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
438
439
/* Second part (aligned) */
440
start = LABEL();
441
442
value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
443
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
444
445
add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
446
447
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
448
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
449
450
for (i = 0; i < 4; i++)
451
fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
452
453
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
454
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
455
456
JUMPHERE(quit);
457
458
SLJIT_ASSERT(tmp1_reg_ind < 8);
459
/* BSF r32, r/m32 */
460
instruction[0] = 0x0f;
461
instruction[1] = 0xbc;
462
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
463
sljit_emit_op_custom(compiler, instruction, 3);
464
465
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
466
add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
467
468
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
469
return not_found;
470
}
471
472
#ifndef _WIN64
473
474
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
475
476
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
477
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
478
{
479
DEFINE_COMPILER;
480
sljit_u8 instruction[8];
481
/* The AVX2 code path is currently disabled. */
482
/* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
483
sljit_s32 reg_type = SLJIT_SIMD_REG_128;
484
sljit_s32 value;
485
vector_compare_type compare1_type = vector_compare_match1;
486
vector_compare_type compare2_type = vector_compare_match1;
487
sljit_u32 bit1 = 0;
488
sljit_u32 bit2 = 0;
489
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
490
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
491
sljit_s32 data1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
492
sljit_s32 data2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
493
sljit_s32 cmp1a_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
494
sljit_s32 cmp2a_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
495
sljit_s32 cmp1b_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR4);
496
sljit_s32 cmp2b_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR5);
497
sljit_s32 tmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR6);
498
sljit_s32 tmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_TMP_DEST_VREG);
499
struct sljit_label *start;
500
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
501
struct sljit_label *restart;
502
#endif
503
struct sljit_jump *jump[2];
504
int i;
505
506
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2 && offs2 >= 0);
507
SLJIT_ASSERT(diff <= (unsigned)IN_UCHARS(max_fast_forward_char_pair_offset()));
508
509
/* Initialize. */
510
if (common->match_end_ptr != 0)
511
{
512
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
513
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
514
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
515
516
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
517
SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
518
}
519
520
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
521
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
522
523
if (char1a == char1b)
524
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
525
else
526
{
527
bit1 = char1a ^ char1b;
528
if (is_powerof2(bit1))
529
{
530
compare1_type = vector_compare_match1i;
531
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
532
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
533
}
534
else
535
{
536
compare1_type = vector_compare_match2;
537
bit1 = 0;
538
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
539
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
540
}
541
}
542
543
value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
544
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, TMP1, 0);
545
546
if (char1a != char1b)
547
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR4, 0, TMP2, 0);
548
549
if (char2a == char2b)
550
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
551
else
552
{
553
bit2 = char2a ^ char2b;
554
if (is_powerof2(bit2))
555
{
556
compare2_type = vector_compare_match1i;
557
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
558
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
559
}
560
else
561
{
562
compare2_type = vector_compare_match2;
563
bit2 = 0;
564
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
565
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
566
}
567
}
568
569
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR3, 0, TMP1, 0);
570
571
if (char2a != char2b)
572
sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR5, 0, TMP2, 0);
573
574
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
575
if (char1a != char1b)
576
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR4, SLJIT_VR4, 0);
577
578
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR3, SLJIT_VR3, 0);
579
if (char2a != char2b)
580
sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR5, SLJIT_VR5, 0);
581
582
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
583
restart = LABEL();
584
#endif
585
586
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
587
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
588
value = (reg_type == SLJIT_SIMD_REG_256) ? ~0x1f : ~0xf;
589
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
590
591
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
592
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
593
594
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
595
596
sljit_emit_simd_mov(compiler, reg_type, SLJIT_VR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff);
597
jump[1] = JUMP(SLJIT_JUMP);
598
599
JUMPHERE(jump[0]);
600
601
if (reg_type == SLJIT_SIMD_REG_256)
602
{
603
if (diff != 16)
604
{
605
/* PSLLDQ ymm1, ymm2, imm8 */
606
instruction[0] = 0xc5;
607
instruction[1] = (sljit_u8)(0xf9 ^ (data2_ind << 3));
608
instruction[2] = 0x73;
609
instruction[3] = 0xc0 | (7 << 3) | data1_ind;
610
instruction[4] = diff & 0xf;
611
sljit_emit_op_custom(compiler, instruction, 5);
612
}
613
614
instruction[0] = 0xc4;
615
instruction[1] = 0xe3;
616
if (diff < 16)
617
{
618
/* VINSERTI128 xmm1, xmm2, xmm3/m128 */
619
/* instruction[0] = 0xc4; */
620
/* instruction[1] = 0xe3; */
621
instruction[2] = (sljit_u8)(0x7d ^ (data2_ind << 3));
622
instruction[3] = 0x38;
623
SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR) <= 7);
624
instruction[4] = 0x40 | (data2_ind << 3) | sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
625
instruction[5] = (sljit_u8)(16 - diff);
626
instruction[6] = 1;
627
sljit_emit_op_custom(compiler, instruction, 7);
628
}
629
else
630
{
631
/* VPERM2I128 xmm1, xmm2, xmm3/m128 */
632
/* instruction[0] = 0xc4; */
633
/* instruction[1] = 0xe3; */
634
value = (diff == 16) ? data1_ind : data2_ind;
635
instruction[2] = (sljit_u8)(0x7d ^ (value << 3));
636
instruction[3] = 0x46;
637
instruction[4] = 0xc0 | (data2_ind << 3) | value;
638
instruction[5] = 0x08;
639
sljit_emit_op_custom(compiler, instruction, 6);
640
}
641
}
642
else
643
{
644
/* MOVDQA xmm1, xmm2/m128 */
645
instruction[0] = 0x66;
646
instruction[1] = 0x0f;
647
instruction[2] = 0x6f;
648
instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
649
sljit_emit_op_custom(compiler, instruction, 4);
650
651
/* PSLLDQ xmm1, imm8 */
652
/* instruction[0] = 0x66; */
653
/* instruction[1] = 0x0f; */
654
instruction[2] = 0x73;
655
instruction[3] = 0xc0 | (7 << 3) | data2_ind;
656
instruction[4] = diff;
657
sljit_emit_op_custom(compiler, instruction, 5);
658
}
659
660
JUMPHERE(jump[1]);
661
662
value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
663
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
664
665
for (i = 0; i < 4; i++)
666
{
667
fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
668
fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
669
}
670
671
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_VR0, SLJIT_VR0, SLJIT_VR1, 0);
672
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
673
674
/* Ignore matches before the first STR_PTR. */
675
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
676
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
677
678
jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
679
680
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
681
682
/* Main loop. */
683
start = LABEL();
684
685
value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
686
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
687
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
688
689
value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
690
sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
691
sljit_emit_simd_mov(compiler, reg_type, SLJIT_VR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff);
692
693
for (i = 0; i < 4; i++)
694
{
695
fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
696
fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
697
}
698
699
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_VR0, SLJIT_VR0, SLJIT_VR1, 0);
700
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
701
702
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
703
704
JUMPHERE(jump[0]);
705
706
SLJIT_ASSERT(tmp1_reg_ind < 8);
707
/* BSF r32, r/m32 */
708
instruction[0] = 0x0f;
709
instruction[1] = 0xbc;
710
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
711
sljit_emit_op_custom(compiler, instruction, 3);
712
713
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
714
715
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
716
717
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
718
if (common->utf)
719
{
720
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
721
722
jump[0] = jump_if_utf_char_start(compiler, TMP1);
723
724
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
725
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
726
727
add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
728
729
JUMPHERE(jump[0]);
730
}
731
#endif
732
733
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
734
735
if (common->match_end_ptr != 0)
736
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
737
}
738
739
#endif /* !_WIN64 */
740
741
#undef SIMD_COMPARE_TYPE_INDEX
742
743
#endif /* SLJIT_CONFIG_X86 */
744
745
#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
746
747
#include <arm_neon.h>
748
749
typedef union {
750
unsigned int x;
751
struct { unsigned char c1, c2, c3, c4; } c;
752
} int_char;
753
754
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
755
static SLJIT_INLINE int utf_continue(PCRE2_SPTR s)
756
{
757
#if PCRE2_CODE_UNIT_WIDTH == 8
758
return (*s & 0xc0) == 0x80;
759
#elif PCRE2_CODE_UNIT_WIDTH == 16
760
return (*s & 0xfc00) == 0xdc00;
761
#else
762
#error "Unknown code width"
763
#endif
764
}
765
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
766
767
#if PCRE2_CODE_UNIT_WIDTH == 8
768
# define VECTOR_FACTOR 16
769
# define vect_t uint8x16_t
770
# define VLD1Q(X) vld1q_u8((sljit_u8 *)(X))
771
# define VCEQQ vceqq_u8
772
# define VORRQ vorrq_u8
773
# define VST1Q vst1q_u8
774
# define VDUPQ vdupq_n_u8
775
# define VEXTQ vextq_u8
776
# define VANDQ vandq_u8
777
typedef union {
778
uint8_t mem[16];
779
uint64_t dw[2];
780
} quad_word;
781
#elif PCRE2_CODE_UNIT_WIDTH == 16
782
# define VECTOR_FACTOR 8
783
# define vect_t uint16x8_t
784
# define VLD1Q(X) vld1q_u16((sljit_u16 *)(X))
785
# define VCEQQ vceqq_u16
786
# define VORRQ vorrq_u16
787
# define VST1Q vst1q_u16
788
# define VDUPQ vdupq_n_u16
789
# define VEXTQ vextq_u16
790
# define VANDQ vandq_u16
791
typedef union {
792
uint16_t mem[8];
793
uint64_t dw[2];
794
} quad_word;
795
#else
796
# define VECTOR_FACTOR 4
797
# define vect_t uint32x4_t
798
# define VLD1Q(X) vld1q_u32((sljit_u32 *)(X))
799
# define VCEQQ vceqq_u32
800
# define VORRQ vorrq_u32
801
# define VST1Q vst1q_u32
802
# define VDUPQ vdupq_n_u32
803
# define VEXTQ vextq_u32
804
# define VANDQ vandq_u32
805
typedef union {
806
uint32_t mem[4];
807
uint64_t dw[2];
808
} quad_word;
809
#endif
810
811
#define FFCS
812
#include "pcre2_jit_neon_inc.h"
813
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
814
# define FF_UTF
815
# include "pcre2_jit_neon_inc.h"
816
# undef FF_UTF
817
#endif
818
#undef FFCS
819
820
#define FFCS_2
821
#include "pcre2_jit_neon_inc.h"
822
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
823
# define FF_UTF
824
# include "pcre2_jit_neon_inc.h"
825
# undef FF_UTF
826
#endif
827
#undef FFCS_2
828
829
#define FFCS_MASK
830
#include "pcre2_jit_neon_inc.h"
831
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
832
# define FF_UTF
833
# include "pcre2_jit_neon_inc.h"
834
# undef FF_UTF
835
#endif
836
#undef FFCS_MASK
837
838
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
839
840
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
841
{
842
DEFINE_COMPILER;
843
int_char ic;
844
struct sljit_jump *partial_quit, *quit;
845
/* Save temporary registers. */
846
SLJIT_ASSERT(common->locals_size >= 2 * (int)sizeof(sljit_sw));
847
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);
848
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, TMP3, 0);
849
850
/* Prepare function arguments */
851
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
852
GET_LOCAL_BASE(SLJIT_R1, 0, LOCAL0);
853
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset);
854
855
if (char1 == char2)
856
{
857
ic.c.c1 = char1;
858
ic.c.c2 = char2;
859
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
860
861
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
862
if (common->utf && offset > 0)
863
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
864
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_utf));
865
else
866
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
867
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs));
868
#else
869
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
870
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs));
871
#endif
872
}
873
else
874
{
875
PCRE2_UCHAR mask = char1 ^ char2;
876
if (is_powerof2(mask))
877
{
878
ic.c.c1 = char1 | mask;
879
ic.c.c2 = mask;
880
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
881
882
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
883
if (common->utf && offset > 0)
884
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
885
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask_utf));
886
else
887
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
888
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask));
889
#else
890
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
891
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask));
892
#endif
893
}
894
else
895
{
896
ic.c.c1 = char1;
897
ic.c.c2 = char2;
898
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
899
900
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
901
if (common->utf && offset > 0)
902
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
903
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2_utf));
904
else
905
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
906
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2));
907
#else
908
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
909
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2));
910
#endif
911
}
912
}
913
/* Restore registers. */
914
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
915
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
916
917
/* Check return value. */
918
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
919
if (common->mode == PCRE2_JIT_COMPLETE)
920
add_jump(compiler, &common->failed_match, partial_quit);
921
922
/* Fast forward STR_PTR to the result of memchr. */
923
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
924
if (common->mode != PCRE2_JIT_COMPLETE)
925
{
926
quit = CMP(SLJIT_NOT_ZERO, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
927
JUMPHERE(partial_quit);
928
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
929
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
930
JUMPHERE(quit);
931
}
932
}
933
934
typedef enum {
935
compare_match1,
936
compare_match1i,
937
compare_match2,
938
} compare_type;
939
940
static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2)
941
{
942
if (ctype == compare_match2)
943
{
944
vect_t tmp = dst;
945
dst = VCEQQ(dst, cmp1);
946
tmp = VCEQQ(tmp, cmp2);
947
dst = VORRQ(dst, tmp);
948
return dst;
949
}
950
951
if (ctype == compare_match1i)
952
dst = VORRQ(dst, cmp2);
953
dst = VCEQQ(dst, cmp1);
954
return dst;
955
}
956
957
static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
958
{
959
#if PCRE2_CODE_UNIT_WIDTH == 8
960
return 15;
961
#elif PCRE2_CODE_UNIT_WIDTH == 16
962
return 7;
963
#elif PCRE2_CODE_UNIT_WIDTH == 32
964
return 3;
965
#else
966
#error "Unsupported unit width"
967
#endif
968
}
969
970
/* ARM doesn't have a shift left across lanes. */
971
static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n)
972
{
973
vect_t zero = VDUPQ(0);
974
SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR);
975
/* VEXTQ takes an immediate as last argument. */
976
#define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X);
977
switch (n)
978
{
979
C(1); C(2); C(3);
980
#if PCRE2_CODE_UNIT_WIDTH != 32
981
C(4); C(5); C(6); C(7);
982
# if PCRE2_CODE_UNIT_WIDTH != 16
983
C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15);
984
# endif
985
#endif
986
default:
987
/* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't
988
happen. The return is still here for compilers to not warn. */
989
return a;
990
}
991
}
992
993
#define FFCPS
994
#define FFCPS_DIFF1
995
#define FFCPS_CHAR1A2A
996
997
#define FFCPS_0
998
#include "pcre2_jit_neon_inc.h"
999
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1000
# define FF_UTF
1001
# include "pcre2_jit_neon_inc.h"
1002
# undef FF_UTF
1003
#endif
1004
#undef FFCPS_0
1005
1006
#undef FFCPS_CHAR1A2A
1007
1008
#define FFCPS_1
1009
#include "pcre2_jit_neon_inc.h"
1010
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1011
# define FF_UTF
1012
# include "pcre2_jit_neon_inc.h"
1013
# undef FF_UTF
1014
#endif
1015
#undef FFCPS_1
1016
1017
#undef FFCPS_DIFF1
1018
1019
#define FFCPS_DEFAULT
1020
#include "pcre2_jit_neon_inc.h"
1021
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1022
# define FF_UTF
1023
# include "pcre2_jit_neon_inc.h"
1024
# undef FF_UTF
1025
#endif
1026
#undef FFCPS
1027
1028
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
1029
1030
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
1031
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
1032
{
1033
DEFINE_COMPILER;
1034
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
1035
struct sljit_jump *partial_quit;
1036
int_char ic;
1037
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
1038
SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
1039
SLJIT_ASSERT(compiler->scratches == 5);
1040
1041
/* Save temporary register STR_PTR. */
1042
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);
1043
1044
/* Prepare arguments for the function call. */
1045
if (common->match_end_ptr == 0)
1046
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
1047
else
1048
{
1049
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
1050
OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
1051
1052
OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, SLJIT_R0, 0);
1053
SELECT(SLJIT_LESS, SLJIT_R0, STR_END, 0, SLJIT_R0);
1054
}
1055
1056
GET_LOCAL_BASE(SLJIT_R1, 0, LOCAL0);
1057
OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1);
1058
OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2);
1059
ic.c.c1 = char1a;
1060
ic.c.c2 = char1b;
1061
ic.c.c3 = char2a;
1062
ic.c.c4 = char2b;
1063
OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x);
1064
1065
if (diff == 1) {
1066
if (char1a == char1b && char2a == char2b) {
1067
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1068
if (common->utf)
1069
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
1070
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0_utf));
1071
else
1072
#endif
1073
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
1074
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0));
1075
} else {
1076
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1077
if (common->utf)
1078
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
1079
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1_utf));
1080
else
1081
#endif
1082
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
1083
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1));
1084
}
1085
} else {
1086
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1087
if (common->utf)
1088
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
1089
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default_utf));
1090
else
1091
#endif
1092
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
1093
SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default));
1094
}
1095
1096
/* Restore STR_PTR register. */
1097
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
1098
1099
/* Check return value. */
1100
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
1101
add_jump(compiler, &common->failed_match, partial_quit);
1102
1103
/* Fast forward STR_PTR to the result of memchr. */
1104
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
1105
1106
JUMPHERE(partial_quit);
1107
}
1108
1109
#endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */
1110
1111
#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
1112
1113
#if PCRE2_CODE_UNIT_WIDTH == 8
1114
#define VECTOR_ELEMENT_SIZE 0
1115
#elif PCRE2_CODE_UNIT_WIDTH == 16
1116
#define VECTOR_ELEMENT_SIZE 1
1117
#elif PCRE2_CODE_UNIT_WIDTH == 32
1118
#define VECTOR_ELEMENT_SIZE 2
1119
#else
1120
#error "Unsupported unit width"
1121
#endif
1122
1123
static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg,
1124
sljit_s32 base_reg, sljit_s32 index_reg)
1125
{
1126
sljit_u16 instruction[3];
1127
1128
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg);
1129
instruction[1] = (sljit_u16)(base_reg << 12);
1130
instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06));
1131
1132
sljit_emit_op_custom(compiler, instruction, 6);
1133
}
1134
1135
#if PCRE2_CODE_UNIT_WIDTH == 32
1136
1137
static void replicate_imm_vector(struct sljit_compiler *compiler, int step, sljit_s32 dst_vreg,
1138
PCRE2_UCHAR chr, sljit_s32 tmp_general_reg)
1139
{
1140
sljit_u16 instruction[3];
1141
1142
SLJIT_ASSERT(step >= 0 && step <= 1);
1143
1144
if (chr < 0x7fff)
1145
{
1146
if (step == 1)
1147
return;
1148
1149
/* VREPI */
1150
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4));
1151
instruction[1] = (sljit_u16)chr;
1152
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
1153
sljit_emit_op_custom(compiler, instruction, 6);
1154
return;
1155
}
1156
1157
if (step == 0)
1158
{
1159
OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr);
1160
1161
/* VLVG */
1162
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(SLJIT_GP_REGISTER, tmp_general_reg));
1163
instruction[1] = 0;
1164
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22);
1165
sljit_emit_op_custom(compiler, instruction, 6);
1166
return;
1167
}
1168
1169
/* VREP */
1170
instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg);
1171
instruction[1] = 0;
1172
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d);
1173
sljit_emit_op_custom(compiler, instruction, 6);
1174
}
1175
1176
#endif
1177
1178
static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
1179
int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
1180
{
1181
sljit_u16 instruction[3];
1182
1183
SLJIT_ASSERT(step >= 0 && step <= 2);
1184
1185
if (step == 1)
1186
{
1187
/* VCEQ */
1188
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
1189
instruction[1] = (sljit_u16)(cmp1_ind << 12);
1190
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
1191
sljit_emit_op_custom(compiler, instruction, 6);
1192
return;
1193
}
1194
1195
if (compare_type != vector_compare_match2)
1196
{
1197
if (step == 0 && compare_type == vector_compare_match1i)
1198
{
1199
/* VO */
1200
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
1201
instruction[1] = (sljit_u16)(cmp2_ind << 12);
1202
instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
1203
sljit_emit_op_custom(compiler, instruction, 6);
1204
}
1205
return;
1206
}
1207
1208
switch (step)
1209
{
1210
case 0:
1211
/* VCEQ */
1212
instruction[0] = (sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind);
1213
instruction[1] = (sljit_u16)(cmp2_ind << 12);
1214
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
1215
sljit_emit_op_custom(compiler, instruction, 6);
1216
return;
1217
1218
case 2:
1219
/* VO */
1220
instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
1221
instruction[1] = (sljit_u16)(tmp_ind << 12);
1222
instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
1223
sljit_emit_op_custom(compiler, instruction, 6);
1224
return;
1225
}
1226
}
1227
1228
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
1229
1230
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
1231
{
1232
DEFINE_COMPILER;
1233
sljit_u16 instruction[3];
1234
struct sljit_label *start;
1235
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1236
struct sljit_label *restart;
1237
#endif
1238
struct sljit_jump *quit;
1239
struct sljit_jump *partial_quit[2];
1240
vector_compare_type compare_type = vector_compare_match1;
1241
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
1242
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
1243
sljit_s32 data_ind = 0;
1244
sljit_s32 tmp_ind = 1;
1245
sljit_s32 cmp1_ind = 2;
1246
sljit_s32 cmp2_ind = 3;
1247
sljit_s32 zero_ind = 4;
1248
sljit_u32 bit = 0;
1249
int i;
1250
1251
SLJIT_UNUSED_ARG(offset);
1252
1253
if (char1 != char2)
1254
{
1255
bit = char1 ^ char2;
1256
compare_type = vector_compare_match1i;
1257
1258
if (!is_powerof2(bit))
1259
{
1260
bit = 0;
1261
compare_type = vector_compare_match2;
1262
}
1263
}
1264
1265
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1266
if (common->mode == PCRE2_JIT_COMPLETE)
1267
add_jump(compiler, &common->failed_match, partial_quit[0]);
1268
1269
/* First part (unaligned start) */
1270
1271
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
1272
1273
#if PCRE2_CODE_UNIT_WIDTH != 32
1274
1275
/* VREPI */
1276
instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
1277
instruction[1] = (sljit_u16)(char1 | bit);
1278
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
1279
sljit_emit_op_custom(compiler, instruction, 6);
1280
1281
if (char1 != char2)
1282
{
1283
/* VREPI */
1284
instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
1285
instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
1286
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
1287
sljit_emit_op_custom(compiler, instruction, 6);
1288
}
1289
1290
#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
1291
1292
for (int i = 0; i < 2; i++)
1293
{
1294
replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1);
1295
1296
if (char1 != char2)
1297
replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1);
1298
}
1299
1300
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
1301
1302
if (compare_type == vector_compare_match2)
1303
{
1304
/* VREPI */
1305
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
1306
instruction[1] = 0;
1307
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
1308
sljit_emit_op_custom(compiler, instruction, 6);
1309
}
1310
1311
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1312
restart = LABEL();
1313
#endif
1314
1315
load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
1316
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
1317
1318
if (compare_type != vector_compare_match2)
1319
{
1320
if (compare_type == vector_compare_match1i)
1321
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1322
1323
/* VFEE */
1324
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1325
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
1326
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
1327
sljit_emit_op_custom(compiler, instruction, 6);
1328
}
1329
else
1330
{
1331
for (i = 0; i < 3; i++)
1332
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1333
1334
/* VFENE */
1335
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1336
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
1337
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
1338
sljit_emit_op_custom(compiler, instruction, 6);
1339
}
1340
1341
/* VLGVB */
1342
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
1343
instruction[1] = 7;
1344
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
1345
sljit_emit_op_custom(compiler, instruction, 6);
1346
1347
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1348
quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
1349
1350
OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
1351
1352
/* Second part (aligned) */
1353
start = LABEL();
1354
1355
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
1356
1357
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1358
if (common->mode == PCRE2_JIT_COMPLETE)
1359
add_jump(compiler, &common->failed_match, partial_quit[1]);
1360
1361
load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
1362
1363
if (compare_type != vector_compare_match2)
1364
{
1365
if (compare_type == vector_compare_match1i)
1366
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1367
1368
/* VFEE */
1369
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1370
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
1371
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
1372
sljit_emit_op_custom(compiler, instruction, 6);
1373
}
1374
else
1375
{
1376
for (i = 0; i < 3; i++)
1377
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1378
1379
/* VFENE */
1380
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1381
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
1382
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
1383
sljit_emit_op_custom(compiler, instruction, 6);
1384
}
1385
1386
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
1387
JUMPTO(SLJIT_OVERFLOW, start);
1388
1389
/* VLGVB */
1390
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
1391
instruction[1] = 7;
1392
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
1393
sljit_emit_op_custom(compiler, instruction, 6);
1394
1395
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1396
1397
JUMPHERE(quit);
1398
1399
if (common->mode != PCRE2_JIT_COMPLETE)
1400
{
1401
JUMPHERE(partial_quit[0]);
1402
JUMPHERE(partial_quit[1]);
1403
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
1404
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
1405
}
1406
else
1407
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1408
1409
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1410
if (common->utf && offset > 0)
1411
{
1412
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
1413
1414
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
1415
1416
quit = jump_if_utf_char_start(compiler, TMP1);
1417
1418
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1419
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1420
1421
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
1422
JUMPTO(SLJIT_JUMP, restart);
1423
1424
JUMPHERE(quit);
1425
}
1426
#endif
1427
}
1428
1429
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1
1430
1431
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
1432
{
1433
DEFINE_COMPILER;
1434
sljit_u16 instruction[3];
1435
struct sljit_label *start;
1436
struct sljit_jump *quit;
1437
jump_list *not_found = NULL;
1438
vector_compare_type compare_type = vector_compare_match1;
1439
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
1440
sljit_s32 tmp3_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP3);
1441
sljit_s32 data_ind = 0;
1442
sljit_s32 tmp_ind = 1;
1443
sljit_s32 cmp1_ind = 2;
1444
sljit_s32 cmp2_ind = 3;
1445
sljit_s32 zero_ind = 4;
1446
sljit_u32 bit = 0;
1447
int i;
1448
1449
if (char1 != char2)
1450
{
1451
bit = char1 ^ char2;
1452
compare_type = vector_compare_match1i;
1453
1454
if (!is_powerof2(bit))
1455
{
1456
bit = 0;
1457
compare_type = vector_compare_match2;
1458
}
1459
}
1460
1461
add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
1462
1463
/* First part (unaligned start) */
1464
1465
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, 16);
1466
1467
#if PCRE2_CODE_UNIT_WIDTH != 32
1468
1469
/* VREPI */
1470
instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
1471
instruction[1] = (sljit_u16)(char1 | bit);
1472
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
1473
sljit_emit_op_custom(compiler, instruction, 6);
1474
1475
if (char1 != char2)
1476
{
1477
/* VREPI */
1478
instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
1479
instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
1480
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
1481
sljit_emit_op_custom(compiler, instruction, 6);
1482
}
1483
1484
#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
1485
1486
for (int i = 0; i < 2; i++)
1487
{
1488
replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3);
1489
1490
if (char1 != char2)
1491
replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3);
1492
}
1493
1494
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
1495
1496
if (compare_type == vector_compare_match2)
1497
{
1498
/* VREPI */
1499
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
1500
instruction[1] = 0;
1501
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
1502
sljit_emit_op_custom(compiler, instruction, 6);
1503
}
1504
1505
load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
1506
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
1507
1508
if (compare_type != vector_compare_match2)
1509
{
1510
if (compare_type == vector_compare_match1i)
1511
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1512
1513
/* VFEE */
1514
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1515
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
1516
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
1517
sljit_emit_op_custom(compiler, instruction, 6);
1518
}
1519
else
1520
{
1521
for (i = 0; i < 3; i++)
1522
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1523
1524
/* VFENE */
1525
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1526
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
1527
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
1528
sljit_emit_op_custom(compiler, instruction, 6);
1529
}
1530
1531
/* VLGVB */
1532
instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
1533
instruction[1] = 7;
1534
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
1535
sljit_emit_op_custom(compiler, instruction, 6);
1536
1537
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
1538
quit = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
1539
1540
OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 16);
1541
1542
/* Second part (aligned) */
1543
start = LABEL();
1544
1545
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 16);
1546
1547
add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
1548
1549
load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
1550
1551
if (compare_type != vector_compare_match2)
1552
{
1553
if (compare_type == vector_compare_match1i)
1554
fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1555
1556
/* VFEE */
1557
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1558
instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
1559
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
1560
sljit_emit_op_custom(compiler, instruction, 6);
1561
}
1562
else
1563
{
1564
for (i = 0; i < 3; i++)
1565
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1566
1567
/* VFENE */
1568
instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1569
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
1570
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
1571
sljit_emit_op_custom(compiler, instruction, 6);
1572
}
1573
1574
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
1575
JUMPTO(SLJIT_OVERFLOW, start);
1576
1577
/* VLGVB */
1578
instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
1579
instruction[1] = 7;
1580
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
1581
sljit_emit_op_custom(compiler, instruction, 6);
1582
1583
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
1584
1585
JUMPHERE(quit);
1586
add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
1587
1588
return not_found;
1589
}
1590
1591
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
1592
1593
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
1594
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
1595
{
1596
DEFINE_COMPILER;
1597
sljit_u16 instruction[3];
1598
struct sljit_label *start;
1599
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1600
struct sljit_label *restart;
1601
#endif
1602
struct sljit_jump *quit;
1603
struct sljit_jump *jump[2];
1604
vector_compare_type compare1_type = vector_compare_match1;
1605
vector_compare_type compare2_type = vector_compare_match1;
1606
sljit_u32 bit1 = 0;
1607
sljit_u32 bit2 = 0;
1608
sljit_s32 diff = IN_UCHARS(offs2 - offs1);
1609
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
1610
sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
1611
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
1612
sljit_s32 data1_ind = 0;
1613
sljit_s32 data2_ind = 1;
1614
sljit_s32 tmp1_ind = 2;
1615
sljit_s32 tmp2_ind = 3;
1616
sljit_s32 cmp1a_ind = 4;
1617
sljit_s32 cmp1b_ind = 5;
1618
sljit_s32 cmp2a_ind = 6;
1619
sljit_s32 cmp2b_ind = 7;
1620
sljit_s32 zero_ind = 8;
1621
int i;
1622
1623
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
1624
SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset()));
1625
SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0);
1626
1627
if (char1a != char1b)
1628
{
1629
bit1 = char1a ^ char1b;
1630
compare1_type = vector_compare_match1i;
1631
1632
if (!is_powerof2(bit1))
1633
{
1634
bit1 = 0;
1635
compare1_type = vector_compare_match2;
1636
}
1637
}
1638
1639
if (char2a != char2b)
1640
{
1641
bit2 = char2a ^ char2b;
1642
compare2_type = vector_compare_match1i;
1643
1644
if (!is_powerof2(bit2))
1645
{
1646
bit2 = 0;
1647
compare2_type = vector_compare_match2;
1648
}
1649
}
1650
1651
/* Initialize. */
1652
if (common->match_end_ptr != 0)
1653
{
1654
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
1655
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
1656
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
1657
1658
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
1659
SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
1660
}
1661
1662
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
1663
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1664
OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
1665
1666
#if PCRE2_CODE_UNIT_WIDTH != 32
1667
1668
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
1669
1670
/* VREPI */
1671
instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4));
1672
instruction[1] = (sljit_u16)(char1a | bit1);
1673
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
1674
sljit_emit_op_custom(compiler, instruction, 6);
1675
1676
if (char1a != char1b)
1677
{
1678
/* VREPI */
1679
instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4));
1680
instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b);
1681
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
1682
sljit_emit_op_custom(compiler, instruction, 6);
1683
}
1684
1685
/* VREPI */
1686
instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4));
1687
instruction[1] = (sljit_u16)(char2a | bit2);
1688
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
1689
sljit_emit_op_custom(compiler, instruction, 6);
1690
1691
if (char2a != char2b)
1692
{
1693
/* VREPI */
1694
instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4));
1695
instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b);
1696
/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
1697
sljit_emit_op_custom(compiler, instruction, 6);
1698
}
1699
1700
#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
1701
1702
for (int i = 0; i < 2; i++)
1703
{
1704
replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1);
1705
1706
if (char1a != char1b)
1707
replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1);
1708
1709
replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1);
1710
1711
if (char2a != char2b)
1712
replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1);
1713
}
1714
1715
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
1716
1717
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
1718
1719
/* VREPI */
1720
instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
1721
instruction[1] = 0;
1722
instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
1723
sljit_emit_op_custom(compiler, instruction, 6);
1724
1725
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1726
restart = LABEL();
1727
#endif
1728
1729
jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
1730
load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0);
1731
jump[1] = JUMP(SLJIT_JUMP);
1732
JUMPHERE(jump[0]);
1733
load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0);
1734
JUMPHERE(jump[1]);
1735
1736
load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0);
1737
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
1738
1739
for (i = 0; i < 3; i++)
1740
{
1741
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
1742
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
1743
}
1744
1745
/* VN */
1746
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
1747
instruction[1] = (sljit_u16)(data2_ind << 12);
1748
instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
1749
sljit_emit_op_custom(compiler, instruction, 6);
1750
1751
/* VFENE */
1752
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
1753
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
1754
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
1755
sljit_emit_op_custom(compiler, instruction, 6);
1756
1757
/* VLGVB */
1758
instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind);
1759
instruction[1] = 7;
1760
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
1761
sljit_emit_op_custom(compiler, instruction, 6);
1762
1763
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1764
quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
1765
1766
OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
1767
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff);
1768
1769
/* Main loop. */
1770
start = LABEL();
1771
1772
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
1773
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1774
1775
load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0);
1776
load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind);
1777
1778
for (i = 0; i < 3; i++)
1779
{
1780
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
1781
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
1782
}
1783
1784
/* VN */
1785
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
1786
instruction[1] = (sljit_u16)(data2_ind << 12);
1787
instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
1788
sljit_emit_op_custom(compiler, instruction, 6);
1789
1790
/* VFENE */
1791
instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
1792
instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
1793
instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
1794
sljit_emit_op_custom(compiler, instruction, 6);
1795
1796
sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
1797
JUMPTO(SLJIT_OVERFLOW, start);
1798
1799
/* VLGVB */
1800
instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind);
1801
instruction[1] = 7;
1802
instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
1803
sljit_emit_op_custom(compiler, instruction, 6);
1804
1805
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1806
1807
JUMPHERE(quit);
1808
1809
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1810
1811
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1812
if (common->utf)
1813
{
1814
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
1815
1816
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
1817
1818
quit = jump_if_utf_char_start(compiler, TMP1);
1819
1820
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1821
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1822
1823
/* TMP1 contains diff. */
1824
OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
1825
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
1826
JUMPTO(SLJIT_JUMP, restart);
1827
1828
JUMPHERE(quit);
1829
}
1830
#endif
1831
1832
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
1833
1834
if (common->match_end_ptr != 0)
1835
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
1836
}
1837
1838
#endif /* SLJIT_CONFIG_S390X */
1839
1840
#if (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64)
1841
1842
#ifdef __linux__
1843
/* Using getauxval(AT_HWCAP) under Linux for detecting whether LSX is available */
1844
#include <sys/auxv.h>
1845
#define LOONGARCH_HWCAP_LSX (1 << 4)
1846
#define HAS_LSX_SUPPORT ((getauxval(AT_HWCAP) & LOONGARCH_HWCAP_LSX) != 0)
1847
#else
1848
#define HAS_LSX_SUPPORT 0
1849
#endif
1850
1851
typedef sljit_ins sljit_u32;
1852
1853
#define SI12_IMM_MASK 0x003ffc00
1854
#define UI5_IMM_MASK 0x00007c00
1855
#define UI2_IMM_MASK 0x00000c00
1856
1857
#define VD(vd) ((sljit_ins)vd << 0)
1858
#define VJ(vj) ((sljit_ins)vj << 5)
1859
#define VK(vk) ((sljit_ins)vk << 10)
1860
#define RD_V(rd) ((sljit_ins)rd << 0)
1861
#define RJ_V(rj) ((sljit_ins)rj << 5)
1862
1863
#define IMM_SI12(imm) (((sljit_ins)(imm) << 10) & SI12_IMM_MASK)
1864
#define IMM_UI5(imm) (((sljit_ins)(imm) << 10) & UI5_IMM_MASK)
1865
#define IMM_UI2(imm) (((sljit_ins)(imm) << 10) & UI2_IMM_MASK)
1866
1867
// LSX OPCODES:
1868
#define VLD 0x2c000000
1869
#define VOR_V 0x71268000
1870
#define VAND_V 0x71260000
1871
#define VBSLL_V 0x728e0000
1872
#define VMSKLTZ_B 0x729c4000
1873
#define VPICKVE2GR_WU 0x72f3e000
1874
1875
#if PCRE2_CODE_UNIT_WIDTH == 8
1876
#define VREPLGR2VR 0x729f0000
1877
#define VSEQ 0x70000000
1878
#elif PCRE2_CODE_UNIT_WIDTH == 16
1879
#define VREPLGR2VR 0x729f0400
1880
#define VSEQ 0x70008000
1881
#else
1882
#define VREPLGR2VR 0x729f0800
1883
#define VSEQ 0x70010000
1884
#endif
1885
1886
static void fast_forward_char_pair_lsx_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
1887
sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
1888
{
1889
if (compare_type != vector_compare_match2)
1890
{
1891
if (compare_type == vector_compare_match1i)
1892
{
1893
/* VOR.V vd, vj, vk */
1894
push_inst(compiler, VOR_V | VD(dst_ind) | VJ(cmp2_ind) | VK(dst_ind));
1895
}
1896
1897
/* VSEQ.B/H/W vd, vj, vk */
1898
push_inst(compiler, VSEQ | VD(dst_ind) | VJ(dst_ind) | VK(cmp1_ind));
1899
return;
1900
}
1901
1902
/* VBSLL.V vd, vj, ui5 */
1903
push_inst(compiler, VBSLL_V | VD(tmp_ind) | VJ(dst_ind) | IMM_UI5(0));
1904
1905
/* VSEQ.B/H/W vd, vj, vk */
1906
push_inst(compiler, VSEQ | VD(dst_ind) | VJ(dst_ind) | VK(cmp1_ind));
1907
1908
/* VSEQ.B/H/W vd, vj, vk */
1909
push_inst(compiler, VSEQ | VD(tmp_ind) | VJ(tmp_ind) | VK(cmp2_ind));
1910
1911
/* VOR vd, vj, vk */
1912
push_inst(compiler, VOR_V | VD(dst_ind) | VJ(tmp_ind) | VK(dst_ind));
1913
return;
1914
}
1915
1916
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD HAS_LSX_SUPPORT
1917
1918
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
1919
{
1920
DEFINE_COMPILER;
1921
struct sljit_label *start;
1922
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1923
struct sljit_label *restart;
1924
#endif
1925
struct sljit_jump *quit;
1926
struct sljit_jump *partial_quit[2];
1927
vector_compare_type compare_type = vector_compare_match1;
1928
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
1929
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
1930
sljit_s32 data_ind = 0;
1931
sljit_s32 tmp_ind = 1;
1932
sljit_s32 cmp1_ind = 2;
1933
sljit_s32 cmp2_ind = 3;
1934
sljit_u32 bit = 0;
1935
1936
SLJIT_UNUSED_ARG(offset);
1937
1938
if (char1 != char2)
1939
{
1940
bit = char1 ^ char2;
1941
compare_type = vector_compare_match1i;
1942
1943
if (!is_powerof2(bit))
1944
{
1945
bit = 0;
1946
compare_type = vector_compare_match2;
1947
}
1948
}
1949
1950
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1951
if (common->mode == PCRE2_JIT_COMPLETE)
1952
add_jump(compiler, &common->failed_match, partial_quit[0]);
1953
1954
/* First part (unaligned start) */
1955
1956
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1 | bit);
1957
1958
/* VREPLGR2VR.B/H/W vd, rj */
1959
push_inst(compiler, VREPLGR2VR | VD(cmp1_ind) | RJ_V(tmp1_reg_ind));
1960
1961
if (char1 != char2)
1962
{
1963
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, bit != 0 ? bit : char2);
1964
1965
/* VREPLGR2VR.B/H/W vd, rj */
1966
push_inst(compiler, VREPLGR2VR | VD(cmp2_ind) | RJ_V(tmp1_reg_ind));
1967
}
1968
1969
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
1970
1971
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1972
restart = LABEL();
1973
#endif
1974
1975
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
1976
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1977
1978
/* VLD vd, rj, si12 */
1979
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
1980
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1981
1982
/* VMSKLTZ.B vd, vj */
1983
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
1984
1985
/* VPICKVE2GR.WU rd, vj, ui2 */
1986
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
1987
1988
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1989
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
1990
1991
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
1992
1993
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1994
1995
/* Second part (aligned) */
1996
start = LABEL();
1997
1998
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
1999
2000
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2001
if (common->mode == PCRE2_JIT_COMPLETE)
2002
add_jump(compiler, &common->failed_match, partial_quit[1]);
2003
2004
/* VLD vd, rj, si12 */
2005
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2006
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
2007
2008
/* VMSKLTZ.B vd, vj */
2009
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
2010
2011
/* VPICKVE2GR.WU rd, vj, ui2 */
2012
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
2013
2014
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
2015
2016
JUMPHERE(quit);
2017
2018
/* CTZ.W rd, rj */
2019
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
2020
2021
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2022
2023
if (common->mode != PCRE2_JIT_COMPLETE)
2024
{
2025
JUMPHERE(partial_quit[0]);
2026
JUMPHERE(partial_quit[1]);
2027
OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
2028
SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
2029
}
2030
else
2031
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2032
2033
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2034
if (common->utf && offset > 0)
2035
{
2036
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
2037
2038
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
2039
2040
quit = jump_if_utf_char_start(compiler, TMP1);
2041
2042
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2043
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2044
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
2045
JUMPTO(SLJIT_JUMP, restart);
2046
2047
JUMPHERE(quit);
2048
}
2049
#endif
2050
}
2051
2052
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD HAS_LSX_SUPPORT
2053
2054
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
2055
{
2056
DEFINE_COMPILER;
2057
struct sljit_label *start;
2058
struct sljit_jump *quit;
2059
jump_list *not_found = NULL;
2060
vector_compare_type compare_type = vector_compare_match1;
2061
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
2062
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
2063
sljit_s32 data_ind = 0;
2064
sljit_s32 tmp_ind = 1;
2065
sljit_s32 cmp1_ind = 2;
2066
sljit_s32 cmp2_ind = 3;
2067
sljit_u32 bit = 0;
2068
2069
if (char1 != char2)
2070
{
2071
bit = char1 ^ char2;
2072
compare_type = vector_compare_match1i;
2073
2074
if (!is_powerof2(bit))
2075
{
2076
bit = 0;
2077
compare_type = vector_compare_match2;
2078
}
2079
}
2080
2081
add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
2082
OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
2083
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2084
2085
/* First part (unaligned start) */
2086
2087
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1 | bit);
2088
2089
/* VREPLGR2VR vd, rj */
2090
push_inst(compiler, VREPLGR2VR | VD(cmp1_ind) | RJ_V(tmp1_reg_ind));
2091
2092
if (char1 != char2)
2093
{
2094
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, bit != 0 ? bit : char2);
2095
/* VREPLGR2VR vd, rj */
2096
push_inst(compiler, VREPLGR2VR | VD(cmp2_ind) | RJ_V(tmp1_reg_ind));
2097
}
2098
2099
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
2100
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
2101
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2102
2103
/* VLD vd, rj, si12 */
2104
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2105
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
2106
2107
/* VMSKLTZ.B vd, vj */
2108
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
2109
2110
/* VPICKVE2GR.WU rd, vj, ui2 */
2111
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
2112
2113
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2114
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
2115
2116
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
2117
2118
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2119
2120
/* Second part (aligned) */
2121
start = LABEL();
2122
2123
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
2124
2125
add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2126
2127
/* VLD vd, rj, si12 */
2128
push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2129
fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
2130
2131
/* VMSKLTZ.B vd, vj */
2132
push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
2133
2134
/* VPICKVE2GR.WU rd, vj, ui2 */
2135
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
2136
2137
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
2138
2139
JUMPHERE(quit);
2140
2141
/* CTZ.W rd, rj */
2142
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
2143
2144
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
2145
add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
2146
2147
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2148
return not_found;
2149
}
2150
2151
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD HAS_LSX_SUPPORT
2152
2153
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
2154
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
2155
{
2156
DEFINE_COMPILER;
2157
vector_compare_type compare1_type = vector_compare_match1;
2158
vector_compare_type compare2_type = vector_compare_match1;
2159
sljit_u32 bit1 = 0;
2160
sljit_u32 bit2 = 0;
2161
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
2162
sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
2163
sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
2164
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
2165
sljit_s32 data1_ind = 0;
2166
sljit_s32 data2_ind = 1;
2167
sljit_s32 tmp1_ind = 2;
2168
sljit_s32 tmp2_ind = 3;
2169
sljit_s32 cmp1a_ind = 4;
2170
sljit_s32 cmp1b_ind = 5;
2171
sljit_s32 cmp2a_ind = 6;
2172
sljit_s32 cmp2b_ind = 7;
2173
struct sljit_label *start;
2174
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2175
struct sljit_label *restart;
2176
#endif
2177
struct sljit_jump *jump[2];
2178
2179
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
2180
SLJIT_ASSERT(diff <= (unsigned)IN_UCHARS(max_fast_forward_char_pair_offset()));
2181
2182
/* Initialize. */
2183
if (common->match_end_ptr != 0)
2184
{
2185
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
2186
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
2187
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2188
2189
OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
2190
SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
2191
}
2192
2193
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
2194
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2195
2196
if (char1a == char1b)
2197
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a);
2198
else
2199
{
2200
bit1 = char1a ^ char1b;
2201
if (is_powerof2(bit1))
2202
{
2203
compare1_type = vector_compare_match1i;
2204
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a | bit1);
2205
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, bit1);
2206
}
2207
else
2208
{
2209
compare1_type = vector_compare_match2;
2210
bit1 = 0;
2211
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a);
2212
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, char1b);
2213
}
2214
}
2215
2216
/* VREPLGR2VR vd, rj */
2217
push_inst(compiler, VREPLGR2VR | VD(cmp1a_ind) | RJ_V(tmp1_reg_ind));
2218
2219
if (char1a != char1b)
2220
{
2221
/* VREPLGR2VR vd, rj */
2222
push_inst(compiler, VREPLGR2VR | VD(cmp1b_ind) | RJ_V(tmp2_reg_ind));
2223
}
2224
2225
if (char2a == char2b)
2226
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a);
2227
else
2228
{
2229
bit2 = char2a ^ char2b;
2230
if (is_powerof2(bit2))
2231
{
2232
compare2_type = vector_compare_match1i;
2233
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a | bit2);
2234
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, bit2);
2235
}
2236
else
2237
{
2238
compare2_type = vector_compare_match2;
2239
bit2 = 0;
2240
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a);
2241
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, char2b);
2242
}
2243
}
2244
2245
/* VREPLGR2VR vd, rj */
2246
push_inst(compiler, VREPLGR2VR | VD(cmp2a_ind) | RJ_V(tmp1_reg_ind));
2247
2248
if (char2a != char2b)
2249
{
2250
/* VREPLGR2VR vd, rj */
2251
push_inst(compiler, VREPLGR2VR | VD(cmp2b_ind) | RJ_V(tmp2_reg_ind));
2252
}
2253
2254
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2255
restart = LABEL();
2256
#endif
2257
2258
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
2259
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
2260
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
2261
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2262
2263
/* VLD vd, rj, si12 */
2264
push_inst(compiler, VLD | VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2265
2266
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
2267
2268
/* VLD vd, rj, si12 */
2269
push_inst(compiler, VLD | VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(sljit_s8)diff));
2270
jump[1] = JUMP(SLJIT_JUMP);
2271
2272
JUMPHERE(jump[0]);
2273
2274
/* VBSLL.V vd, vj, ui5 */
2275
push_inst(compiler, VBSLL_V | VD(data2_ind) | VJ(data1_ind) | IMM_UI5(diff));
2276
2277
JUMPHERE(jump[1]);
2278
2279
fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
2280
fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
2281
2282
/* VAND vd, vj, vk */
2283
push_inst(compiler, VOR_V | VD(data1_ind) | VJ(data1_ind) | VK(data2_ind));
2284
2285
/* VMSKLTZ.B vd, vj */
2286
push_inst(compiler, VMSKLTZ_B | VD(tmp1_ind) | VJ(data1_ind));
2287
2288
/* VPICKVE2GR.WU rd, vj, ui2 */
2289
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0));
2290
2291
/* Ignore matches before the first STR_PTR. */
2292
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2293
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
2294
2295
jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
2296
2297
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2298
2299
/* Main loop. */
2300
start = LABEL();
2301
2302
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
2303
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2304
2305
/* VLD vd, rj, si12 */
2306
push_inst(compiler, VLD | VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2307
push_inst(compiler, VLD | VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(sljit_s8)diff));
2308
2309
fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
2310
fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
2311
2312
/* VAND.V vd, vj, vk */
2313
push_inst(compiler, VAND_V | VD(data1_ind) | VJ(data1_ind) | VK(data2_ind));
2314
2315
/* VMSKLTZ.B vd, vj */
2316
push_inst(compiler, VMSKLTZ_B | VD(tmp1_ind) | VJ(data1_ind));
2317
2318
/* VPICKVE2GR.WU rd, vj, ui2 */
2319
push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0));
2320
2321
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
2322
2323
JUMPHERE(jump[0]);
2324
2325
/* CTZ.W rd, rj */
2326
push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
2327
2328
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2329
2330
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2331
2332
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2333
if (common->utf)
2334
{
2335
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
2336
2337
jump[0] = jump_if_utf_char_start(compiler, TMP1);
2338
2339
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2340
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
2341
2342
add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
2343
2344
JUMPHERE(jump[0]);
2345
}
2346
#endif
2347
2348
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
2349
2350
if (common->match_end_ptr != 0)
2351
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2352
}
2353
2354
#endif /* SLJIT_CONFIG_LOONGARCH_64 */
2355
2356
#endif /* !SUPPORT_VALGRIND */
2357
2358