Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/jdk17u
Path: blob/master/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
64441 views
1
/*
2
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation.
8
*
9
* This code is distributed in the hope that it will be useful, but WITHOUT
10
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12
* version 2 for more details (a copy is included in the LICENSE file that
13
* accompanied this code).
14
*
15
* You should have received a copy of the GNU General Public License version
16
* 2 along with this work; if not, write to the Free Software Foundation,
17
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
*
19
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
* or visit www.oracle.com if you need additional information or have any
21
* questions.
22
*
23
*/
24
25
#include "precompiled.hpp"
26
#include "asm/assembler.hpp"
27
#include "asm/assembler.inline.hpp"
28
#include "opto/c2_MacroAssembler.hpp"
29
#include "opto/intrinsicnode.hpp"
30
#include "opto/subnode.hpp"
31
#include "runtime/stubRoutines.hpp"
32
33
#ifdef PRODUCT
34
#define BLOCK_COMMENT(str) /* nothing */
35
#define STOP(error) stop(error)
36
#else
37
#define BLOCK_COMMENT(str) block_comment(str)
38
#define STOP(error) block_comment(error); stop(error)
39
#endif
40
41
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
42
43
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
44
45
// Search for str1 in str2 and return index or -1
46
void C2_MacroAssembler::string_indexof(Register str2, Register str1,
47
Register cnt2, Register cnt1,
48
Register tmp1, Register tmp2,
49
Register tmp3, Register tmp4,
50
Register tmp5, Register tmp6,
51
int icnt1, Register result, int ae) {
52
// NOTE: tmp5, tmp6 can be zr depending on specific method version
53
Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH;
54
55
Register ch1 = rscratch1;
56
Register ch2 = rscratch2;
57
Register cnt1tmp = tmp1;
58
Register cnt2tmp = tmp2;
59
Register cnt1_neg = cnt1;
60
Register cnt2_neg = cnt2;
61
Register result_tmp = tmp4;
62
63
bool isL = ae == StrIntrinsicNode::LL;
64
65
bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
66
bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
67
int str1_chr_shift = str1_isL ? 0:1;
68
int str2_chr_shift = str2_isL ? 0:1;
69
int str1_chr_size = str1_isL ? 1:2;
70
int str2_chr_size = str2_isL ? 1:2;
71
chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
72
(chr_insn)&MacroAssembler::ldrh;
73
chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
74
(chr_insn)&MacroAssembler::ldrh;
75
chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
76
chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
77
78
// Note, inline_string_indexOf() generates checks:
79
// if (substr.count > string.count) return -1;
80
// if (substr.count == 0) return 0;
81
82
// We have two strings, a source string in str2, cnt2 and a pattern string
83
// in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
84
85
// For larger pattern and source we use a simplified Boyer Moore algorithm.
86
// With a small pattern and source we use linear scan.
87
88
if (icnt1 == -1) {
89
sub(result_tmp, cnt2, cnt1);
90
cmp(cnt1, (u1)8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
91
br(LT, LINEARSEARCH);
92
dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
93
subs(zr, cnt1, 256);
94
lsr(tmp1, cnt2, 2);
95
ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
96
br(GE, LINEARSTUB);
97
}
98
99
// The Boyer Moore alogorithm is based on the description here:-
100
//
101
// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
102
//
103
// This describes and algorithm with 2 shift rules. The 'Bad Character' rule
104
// and the 'Good Suffix' rule.
105
//
106
// These rules are essentially heuristics for how far we can shift the
107
// pattern along the search string.
108
//
109
// The implementation here uses the 'Bad Character' rule only because of the
110
// complexity of initialisation for the 'Good Suffix' rule.
111
//
112
// This is also known as the Boyer-Moore-Horspool algorithm:-
113
//
114
// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
115
//
116
// This particular implementation has few java-specific optimizations.
117
//
118
// #define ASIZE 256
119
//
120
// int bm(unsigned char *x, int m, unsigned char *y, int n) {
121
// int i, j;
122
// unsigned c;
123
// unsigned char bc[ASIZE];
124
//
125
// /* Preprocessing */
126
// for (i = 0; i < ASIZE; ++i)
127
// bc[i] = m;
128
// for (i = 0; i < m - 1; ) {
129
// c = x[i];
130
// ++i;
131
// // c < 256 for Latin1 string, so, no need for branch
132
// #ifdef PATTERN_STRING_IS_LATIN1
133
// bc[c] = m - i;
134
// #else
135
// if (c < ASIZE) bc[c] = m - i;
136
// #endif
137
// }
138
//
139
// /* Searching */
140
// j = 0;
141
// while (j <= n - m) {
142
// c = y[i+j];
143
// if (x[m-1] == c)
144
// for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
145
// if (i < 0) return j;
146
// // c < 256 for Latin1 string, so, no need for branch
147
// #ifdef SOURCE_STRING_IS_LATIN1
148
// // LL case: (c< 256) always true. Remove branch
149
// j += bc[y[j+m-1]];
150
// #endif
151
// #ifndef PATTERN_STRING_IS_UTF
152
// // UU case: need if (c<ASIZE) check. Skip 1 character if not.
153
// if (c < ASIZE)
154
// j += bc[y[j+m-1]];
155
// else
156
// j += 1
157
// #endif
158
// #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF
159
// // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
160
// if (c < ASIZE)
161
// j += bc[y[j+m-1]];
162
// else
163
// j += m
164
// #endif
165
// }
166
// }
167
168
if (icnt1 == -1) {
169
Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
170
BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
171
Register cnt1end = tmp2;
172
Register str2end = cnt2;
173
Register skipch = tmp2;
174
175
// str1 length is >=8, so, we can read at least 1 register for cases when
176
// UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
177
// UL case. We'll re-read last character in inner pre-loop code to have
178
// single outer pre-loop load
179
const int firstStep = isL ? 7 : 3;
180
181
const int ASIZE = 256;
182
const int STORED_BYTES = 32; // amount of bytes stored per instruction
183
sub(sp, sp, ASIZE);
184
mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
185
mov(ch1, sp);
186
BIND(BM_INIT_LOOP);
187
stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
188
subs(tmp5, tmp5, 1);
189
br(GT, BM_INIT_LOOP);
190
191
sub(cnt1tmp, cnt1, 1);
192
mov(tmp5, str2);
193
add(str2end, str2, result_tmp, LSL, str2_chr_shift);
194
sub(ch2, cnt1, 1);
195
mov(tmp3, str1);
196
BIND(BCLOOP);
197
(this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
198
if (!str1_isL) {
199
subs(zr, ch1, ASIZE);
200
br(HS, BCSKIP);
201
}
202
strb(ch2, Address(sp, ch1));
203
BIND(BCSKIP);
204
subs(ch2, ch2, 1);
205
br(GT, BCLOOP);
206
207
add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
208
if (str1_isL == str2_isL) {
209
// load last 8 bytes (8LL/4UU symbols)
210
ldr(tmp6, Address(tmp6, -wordSize));
211
} else {
212
ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
213
// convert Latin1 to UTF. We'll have to wait until load completed, but
214
// it's still faster than per-character loads+checks
215
lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
216
ubfx(ch1, tmp6, 8, 8); // str1[N-2]
217
ubfx(ch2, tmp6, 16, 8); // str1[N-3]
218
andr(tmp6, tmp6, 0xFF); // str1[N-4]
219
orr(ch2, ch1, ch2, LSL, 16);
220
orr(tmp6, tmp6, tmp3, LSL, 48);
221
orr(tmp6, tmp6, ch2, LSL, 16);
222
}
223
BIND(BMLOOPSTR2);
224
(this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
225
sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8
226
if (str1_isL == str2_isL) {
227
// re-init tmp3. It's for free because it's executed in parallel with
228
// load above. Alternative is to initialize it before loop, but it'll
229
// affect performance on in-order systems with 2 or more ld/st pipelines
230
lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size));
231
}
232
if (!isL) { // UU/UL case
233
lsl(ch2, cnt1tmp, 1); // offset in bytes
234
}
235
cmp(tmp3, skipch);
236
br(NE, BMSKIP);
237
ldr(ch2, Address(str2, isL ? cnt1tmp : ch2));
238
mov(ch1, tmp6);
239
if (isL) {
240
b(BMLOOPSTR1_AFTER_LOAD);
241
} else {
242
sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
243
b(BMLOOPSTR1_CMP);
244
}
245
BIND(BMLOOPSTR1);
246
(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
247
(this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
248
BIND(BMLOOPSTR1_AFTER_LOAD);
249
subs(cnt1tmp, cnt1tmp, 1);
250
br(LT, BMLOOPSTR1_LASTCMP);
251
BIND(BMLOOPSTR1_CMP);
252
cmp(ch1, ch2);
253
br(EQ, BMLOOPSTR1);
254
BIND(BMSKIP);
255
if (!isL) {
256
// if we've met UTF symbol while searching Latin1 pattern, then we can
257
// skip cnt1 symbols
258
if (str1_isL != str2_isL) {
259
mov(result_tmp, cnt1);
260
} else {
261
mov(result_tmp, 1);
262
}
263
subs(zr, skipch, ASIZE);
264
br(HS, BMADV);
265
}
266
ldrb(result_tmp, Address(sp, skipch)); // load skip distance
267
BIND(BMADV);
268
sub(cnt1tmp, cnt1, 1);
269
add(str2, str2, result_tmp, LSL, str2_chr_shift);
270
cmp(str2, str2end);
271
br(LE, BMLOOPSTR2);
272
add(sp, sp, ASIZE);
273
b(NOMATCH);
274
BIND(BMLOOPSTR1_LASTCMP);
275
cmp(ch1, ch2);
276
br(NE, BMSKIP);
277
BIND(BMMATCH);
278
sub(result, str2, tmp5);
279
if (!str2_isL) lsr(result, result, 1);
280
add(sp, sp, ASIZE);
281
b(DONE);
282
283
BIND(LINEARSTUB);
284
cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm
285
br(LT, LINEAR_MEDIUM);
286
mov(result, zr);
287
RuntimeAddress stub = NULL;
288
if (isL) {
289
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll());
290
assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
291
} else if (str1_isL) {
292
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul());
293
assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
294
} else {
295
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu());
296
assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
297
}
298
trampoline_call(stub);
299
b(DONE);
300
}
301
302
BIND(LINEARSEARCH);
303
{
304
Label DO1, DO2, DO3;
305
306
Register str2tmp = tmp2;
307
Register first = tmp3;
308
309
if (icnt1 == -1)
310
{
311
Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
312
313
cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2));
314
br(LT, DOSHORT);
315
BIND(LINEAR_MEDIUM);
316
(this->*str1_load_1chr)(first, Address(str1));
317
lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));
318
sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
319
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
320
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
321
322
BIND(FIRST_LOOP);
323
(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
324
cmp(first, ch2);
325
br(EQ, STR1_LOOP);
326
BIND(STR2_NEXT);
327
adds(cnt2_neg, cnt2_neg, str2_chr_size);
328
br(LE, FIRST_LOOP);
329
b(NOMATCH);
330
331
BIND(STR1_LOOP);
332
adds(cnt1tmp, cnt1_neg, str1_chr_size);
333
add(cnt2tmp, cnt2_neg, str2_chr_size);
334
br(GE, MATCH);
335
336
BIND(STR1_NEXT);
337
(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));
338
(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
339
cmp(ch1, ch2);
340
br(NE, STR2_NEXT);
341
adds(cnt1tmp, cnt1tmp, str1_chr_size);
342
add(cnt2tmp, cnt2tmp, str2_chr_size);
343
br(LT, STR1_NEXT);
344
b(MATCH);
345
346
BIND(DOSHORT);
347
if (str1_isL == str2_isL) {
348
cmp(cnt1, (u1)2);
349
br(LT, DO1);
350
br(GT, DO3);
351
}
352
}
353
354
if (icnt1 == 4) {
355
Label CH1_LOOP;
356
357
(this->*load_4chr)(ch1, str1);
358
sub(result_tmp, cnt2, 4);
359
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
360
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
361
362
BIND(CH1_LOOP);
363
(this->*load_4chr)(ch2, Address(str2, cnt2_neg));
364
cmp(ch1, ch2);
365
br(EQ, MATCH);
366
adds(cnt2_neg, cnt2_neg, str2_chr_size);
367
br(LE, CH1_LOOP);
368
b(NOMATCH);
369
}
370
371
if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
372
Label CH1_LOOP;
373
374
BIND(DO2);
375
(this->*load_2chr)(ch1, str1);
376
if (icnt1 == 2) {
377
sub(result_tmp, cnt2, 2);
378
}
379
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
380
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
381
BIND(CH1_LOOP);
382
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
383
cmp(ch1, ch2);
384
br(EQ, MATCH);
385
adds(cnt2_neg, cnt2_neg, str2_chr_size);
386
br(LE, CH1_LOOP);
387
b(NOMATCH);
388
}
389
390
if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {
391
Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
392
393
BIND(DO3);
394
(this->*load_2chr)(first, str1);
395
(this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
396
if (icnt1 == 3) {
397
sub(result_tmp, cnt2, 3);
398
}
399
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
400
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
401
BIND(FIRST_LOOP);
402
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
403
cmpw(first, ch2);
404
br(EQ, STR1_LOOP);
405
BIND(STR2_NEXT);
406
adds(cnt2_neg, cnt2_neg, str2_chr_size);
407
br(LE, FIRST_LOOP);
408
b(NOMATCH);
409
410
BIND(STR1_LOOP);
411
add(cnt2tmp, cnt2_neg, 2*str2_chr_size);
412
(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
413
cmp(ch1, ch2);
414
br(NE, STR2_NEXT);
415
b(MATCH);
416
}
417
418
if (icnt1 == -1 || icnt1 == 1) {
419
Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;
420
421
BIND(DO1);
422
(this->*str1_load_1chr)(ch1, str1);
423
cmp(cnt2, (u1)8);
424
br(LT, DO1_SHORT);
425
426
sub(result_tmp, cnt2, 8/str2_chr_size);
427
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
428
mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
429
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
430
431
if (str2_isL) {
432
orr(ch1, ch1, ch1, LSL, 8);
433
}
434
orr(ch1, ch1, ch1, LSL, 16);
435
orr(ch1, ch1, ch1, LSL, 32);
436
BIND(CH1_LOOP);
437
ldr(ch2, Address(str2, cnt2_neg));
438
eor(ch2, ch1, ch2);
439
sub(tmp1, ch2, tmp3);
440
orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
441
bics(tmp1, tmp1, tmp2);
442
br(NE, HAS_ZERO);
443
adds(cnt2_neg, cnt2_neg, 8);
444
br(LT, CH1_LOOP);
445
446
cmp(cnt2_neg, (u1)8);
447
mov(cnt2_neg, 0);
448
br(LT, CH1_LOOP);
449
b(NOMATCH);
450
451
BIND(HAS_ZERO);
452
rev(tmp1, tmp1);
453
clz(tmp1, tmp1);
454
add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);
455
b(MATCH);
456
457
BIND(DO1_SHORT);
458
mov(result_tmp, cnt2);
459
lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
460
sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
461
BIND(DO1_LOOP);
462
(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
463
cmpw(ch1, ch2);
464
br(EQ, MATCH);
465
adds(cnt2_neg, cnt2_neg, str2_chr_size);
466
br(LT, DO1_LOOP);
467
}
468
}
469
BIND(NOMATCH);
470
mov(result, -1);
471
b(DONE);
472
BIND(MATCH);
473
add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift);
474
BIND(DONE);
475
}
476
477
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
478
typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn);
479
480
void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
481
Register ch, Register result,
482
Register tmp1, Register tmp2, Register tmp3)
483
{
484
Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
485
Register cnt1_neg = cnt1;
486
Register ch1 = rscratch1;
487
Register result_tmp = rscratch2;
488
489
cbz(cnt1, NOMATCH);
490
491
cmp(cnt1, (u1)4);
492
br(LT, DO1_SHORT);
493
494
orr(ch, ch, ch, LSL, 16);
495
orr(ch, ch, ch, LSL, 32);
496
497
sub(cnt1, cnt1, 4);
498
mov(result_tmp, cnt1);
499
lea(str1, Address(str1, cnt1, Address::uxtw(1)));
500
sub(cnt1_neg, zr, cnt1, LSL, 1);
501
502
mov(tmp3, 0x0001000100010001);
503
504
BIND(CH1_LOOP);
505
ldr(ch1, Address(str1, cnt1_neg));
506
eor(ch1, ch, ch1);
507
sub(tmp1, ch1, tmp3);
508
orr(tmp2, ch1, 0x7fff7fff7fff7fff);
509
bics(tmp1, tmp1, tmp2);
510
br(NE, HAS_ZERO);
511
adds(cnt1_neg, cnt1_neg, 8);
512
br(LT, CH1_LOOP);
513
514
cmp(cnt1_neg, (u1)8);
515
mov(cnt1_neg, 0);
516
br(LT, CH1_LOOP);
517
b(NOMATCH);
518
519
BIND(HAS_ZERO);
520
rev(tmp1, tmp1);
521
clz(tmp1, tmp1);
522
add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
523
b(MATCH);
524
525
BIND(DO1_SHORT);
526
mov(result_tmp, cnt1);
527
lea(str1, Address(str1, cnt1, Address::uxtw(1)));
528
sub(cnt1_neg, zr, cnt1, LSL, 1);
529
BIND(DO1_LOOP);
530
ldrh(ch1, Address(str1, cnt1_neg));
531
cmpw(ch, ch1);
532
br(EQ, MATCH);
533
adds(cnt1_neg, cnt1_neg, 2);
534
br(LT, DO1_LOOP);
535
BIND(NOMATCH);
536
mov(result, -1);
537
b(DONE);
538
BIND(MATCH);
539
add(result, result_tmp, cnt1_neg, ASR, 1);
540
BIND(DONE);
541
}
542
543
void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,
544
Register ch, Register result,
545
Register tmp1, Register tmp2, Register tmp3)
546
{
547
Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
548
Register cnt1_neg = cnt1;
549
Register ch1 = rscratch1;
550
Register result_tmp = rscratch2;
551
552
cbz(cnt1, NOMATCH);
553
554
cmp(cnt1, (u1)8);
555
br(LT, DO1_SHORT);
556
557
orr(ch, ch, ch, LSL, 8);
558
orr(ch, ch, ch, LSL, 16);
559
orr(ch, ch, ch, LSL, 32);
560
561
sub(cnt1, cnt1, 8);
562
mov(result_tmp, cnt1);
563
lea(str1, Address(str1, cnt1));
564
sub(cnt1_neg, zr, cnt1);
565
566
mov(tmp3, 0x0101010101010101);
567
568
BIND(CH1_LOOP);
569
ldr(ch1, Address(str1, cnt1_neg));
570
eor(ch1, ch, ch1);
571
sub(tmp1, ch1, tmp3);
572
orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f);
573
bics(tmp1, tmp1, tmp2);
574
br(NE, HAS_ZERO);
575
adds(cnt1_neg, cnt1_neg, 8);
576
br(LT, CH1_LOOP);
577
578
cmp(cnt1_neg, (u1)8);
579
mov(cnt1_neg, 0);
580
br(LT, CH1_LOOP);
581
b(NOMATCH);
582
583
BIND(HAS_ZERO);
584
rev(tmp1, tmp1);
585
clz(tmp1, tmp1);
586
add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
587
b(MATCH);
588
589
BIND(DO1_SHORT);
590
mov(result_tmp, cnt1);
591
lea(str1, Address(str1, cnt1));
592
sub(cnt1_neg, zr, cnt1);
593
BIND(DO1_LOOP);
594
ldrb(ch1, Address(str1, cnt1_neg));
595
cmp(ch, ch1);
596
br(EQ, MATCH);
597
adds(cnt1_neg, cnt1_neg, 1);
598
br(LT, DO1_LOOP);
599
BIND(NOMATCH);
600
mov(result, -1);
601
b(DONE);
602
BIND(MATCH);
603
add(result, result_tmp, cnt1_neg);
604
BIND(DONE);
605
}
606
607
// Compare strings.
608
void C2_MacroAssembler::string_compare(Register str1, Register str2,
609
Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
610
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {
611
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
612
DIFF, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
613
SHORT_LOOP_START, TAIL_CHECK;
614
615
bool isLL = ae == StrIntrinsicNode::LL;
616
bool isLU = ae == StrIntrinsicNode::LU;
617
bool isUL = ae == StrIntrinsicNode::UL;
618
619
// The stub threshold for LL strings is: 72 (64 + 8) chars
620
// UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch)
621
// LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least)
622
const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36);
623
624
bool str1_isL = isLL || isLU;
625
bool str2_isL = isLL || isUL;
626
627
int str1_chr_shift = str1_isL ? 0 : 1;
628
int str2_chr_shift = str2_isL ? 0 : 1;
629
int str1_chr_size = str1_isL ? 1 : 2;
630
int str2_chr_size = str2_isL ? 1 : 2;
631
int minCharsInWord = isLL ? wordSize : wordSize/2;
632
633
FloatRegister vtmpZ = vtmp1, vtmp = vtmp2;
634
chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
635
(chr_insn)&MacroAssembler::ldrh;
636
chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
637
(chr_insn)&MacroAssembler::ldrh;
638
uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw :
639
(uxt_insn)&MacroAssembler::uxthw;
640
641
BLOCK_COMMENT("string_compare {");
642
643
// Bizzarely, the counts are passed in bytes, regardless of whether they
644
// are L or U strings, however the result is always in characters.
645
if (!str1_isL) asrw(cnt1, cnt1, 1);
646
if (!str2_isL) asrw(cnt2, cnt2, 1);
647
648
// Compute the minimum of the string lengths and save the difference.
649
subsw(result, cnt1, cnt2);
650
cselw(cnt2, cnt1, cnt2, Assembler::LE); // min
651
652
// A very short string
653
cmpw(cnt2, minCharsInWord);
654
br(Assembler::LE, SHORT_STRING);
655
656
// Compare longwords
657
// load first parts of strings and finish initialization while loading
658
{
659
if (str1_isL == str2_isL) { // LL or UU
660
ldr(tmp1, Address(str1));
661
cmp(str1, str2);
662
br(Assembler::EQ, DONE);
663
ldr(tmp2, Address(str2));
664
cmp(cnt2, stub_threshold);
665
br(GE, STUB);
666
subsw(cnt2, cnt2, minCharsInWord);
667
br(EQ, TAIL_CHECK);
668
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
669
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
670
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
671
} else if (isLU) {
672
ldrs(vtmp, Address(str1));
673
ldr(tmp2, Address(str2));
674
cmp(cnt2, stub_threshold);
675
br(GE, STUB);
676
subw(cnt2, cnt2, 4);
677
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
678
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
679
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
680
zip1(vtmp, T8B, vtmp, vtmpZ);
681
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
682
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
683
add(cnt1, cnt1, 4);
684
fmovd(tmp1, vtmp);
685
} else { // UL case
686
ldr(tmp1, Address(str1));
687
ldrs(vtmp, Address(str2));
688
cmp(cnt2, stub_threshold);
689
br(GE, STUB);
690
subw(cnt2, cnt2, 4);
691
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
692
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
693
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
694
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
695
zip1(vtmp, T8B, vtmp, vtmpZ);
696
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
697
add(cnt1, cnt1, 8);
698
fmovd(tmp2, vtmp);
699
}
700
adds(cnt2, cnt2, isUL ? 4 : 8);
701
br(GE, TAIL);
702
eor(rscratch2, tmp1, tmp2);
703
cbnz(rscratch2, DIFF);
704
// main loop
705
bind(NEXT_WORD);
706
if (str1_isL == str2_isL) {
707
ldr(tmp1, Address(str1, cnt2));
708
ldr(tmp2, Address(str2, cnt2));
709
adds(cnt2, cnt2, 8);
710
} else if (isLU) {
711
ldrs(vtmp, Address(str1, cnt1));
712
ldr(tmp2, Address(str2, cnt2));
713
add(cnt1, cnt1, 4);
714
zip1(vtmp, T8B, vtmp, vtmpZ);
715
fmovd(tmp1, vtmp);
716
adds(cnt2, cnt2, 8);
717
} else { // UL
718
ldrs(vtmp, Address(str2, cnt2));
719
ldr(tmp1, Address(str1, cnt1));
720
zip1(vtmp, T8B, vtmp, vtmpZ);
721
add(cnt1, cnt1, 8);
722
fmovd(tmp2, vtmp);
723
adds(cnt2, cnt2, 4);
724
}
725
br(GE, TAIL);
726
727
eor(rscratch2, tmp1, tmp2);
728
cbz(rscratch2, NEXT_WORD);
729
b(DIFF);
730
bind(TAIL);
731
eor(rscratch2, tmp1, tmp2);
732
cbnz(rscratch2, DIFF);
733
// Last longword. In the case where length == 4 we compare the
734
// same longword twice, but that's still faster than another
735
// conditional branch.
736
if (str1_isL == str2_isL) {
737
ldr(tmp1, Address(str1));
738
ldr(tmp2, Address(str2));
739
} else if (isLU) {
740
ldrs(vtmp, Address(str1));
741
ldr(tmp2, Address(str2));
742
zip1(vtmp, T8B, vtmp, vtmpZ);
743
fmovd(tmp1, vtmp);
744
} else { // UL
745
ldrs(vtmp, Address(str2));
746
ldr(tmp1, Address(str1));
747
zip1(vtmp, T8B, vtmp, vtmpZ);
748
fmovd(tmp2, vtmp);
749
}
750
bind(TAIL_CHECK);
751
eor(rscratch2, tmp1, tmp2);
752
cbz(rscratch2, DONE);
753
754
// Find the first different characters in the longwords and
755
// compute their difference.
756
bind(DIFF);
757
rev(rscratch2, rscratch2);
758
clz(rscratch2, rscratch2);
759
andr(rscratch2, rscratch2, isLL ? -8 : -16);
760
lsrv(tmp1, tmp1, rscratch2);
761
(this->*ext_chr)(tmp1, tmp1);
762
lsrv(tmp2, tmp2, rscratch2);
763
(this->*ext_chr)(tmp2, tmp2);
764
subw(result, tmp1, tmp2);
765
b(DONE);
766
}
767
768
bind(STUB);
769
RuntimeAddress stub = NULL;
770
switch(ae) {
771
case StrIntrinsicNode::LL:
772
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL());
773
break;
774
case StrIntrinsicNode::UU:
775
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU());
776
break;
777
case StrIntrinsicNode::LU:
778
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU());
779
break;
780
case StrIntrinsicNode::UL:
781
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL());
782
break;
783
default:
784
ShouldNotReachHere();
785
}
786
assert(stub.target() != NULL, "compare_long_string stub has not been generated");
787
trampoline_call(stub);
788
b(DONE);
789
790
bind(SHORT_STRING);
791
// Is the minimum length zero?
792
cbz(cnt2, DONE);
793
// arrange code to do most branches while loading and loading next characters
794
// while comparing previous
795
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
796
subs(cnt2, cnt2, 1);
797
br(EQ, SHORT_LAST_INIT);
798
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
799
b(SHORT_LOOP_START);
800
bind(SHORT_LOOP);
801
subs(cnt2, cnt2, 1);
802
br(EQ, SHORT_LAST);
803
bind(SHORT_LOOP_START);
804
(this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size)));
805
(this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size)));
806
cmp(tmp1, cnt1);
807
br(NE, SHORT_LOOP_TAIL);
808
subs(cnt2, cnt2, 1);
809
br(EQ, SHORT_LAST2);
810
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
811
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
812
cmp(tmp2, rscratch1);
813
br(EQ, SHORT_LOOP);
814
sub(result, tmp2, rscratch1);
815
b(DONE);
816
bind(SHORT_LOOP_TAIL);
817
sub(result, tmp1, cnt1);
818
b(DONE);
819
bind(SHORT_LAST2);
820
cmp(tmp2, rscratch1);
821
br(EQ, DONE);
822
sub(result, tmp2, rscratch1);
823
824
b(DONE);
825
bind(SHORT_LAST_INIT);
826
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
827
bind(SHORT_LAST);
828
cmp(tmp1, cnt1);
829
br(EQ, DONE);
830
sub(result, tmp1, cnt1);
831
832
bind(DONE);
833
834
BLOCK_COMMENT("} string_compare");
835
}
836
837
void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
838
FloatRegister src2, int cond, bool isQ) {
839
SIMD_Arrangement size = esize2arrangement(type2aelembytes(bt), isQ);
840
if (bt == T_FLOAT || bt == T_DOUBLE) {
841
switch (cond) {
842
case BoolTest::eq: fcmeq(dst, size, src1, src2); break;
843
case BoolTest::ne: {
844
fcmeq(dst, size, src1, src2);
845
notr(dst, T16B, dst);
846
break;
847
}
848
case BoolTest::ge: fcmge(dst, size, src1, src2); break;
849
case BoolTest::gt: fcmgt(dst, size, src1, src2); break;
850
case BoolTest::le: fcmge(dst, size, src2, src1); break;
851
case BoolTest::lt: fcmgt(dst, size, src2, src1); break;
852
default:
853
assert(false, "unsupported");
854
ShouldNotReachHere();
855
}
856
} else {
857
switch (cond) {
858
case BoolTest::eq: cmeq(dst, size, src1, src2); break;
859
case BoolTest::ne: {
860
cmeq(dst, size, src1, src2);
861
notr(dst, T16B, dst);
862
break;
863
}
864
case BoolTest::ge: cmge(dst, size, src1, src2); break;
865
case BoolTest::gt: cmgt(dst, size, src1, src2); break;
866
case BoolTest::le: cmge(dst, size, src2, src1); break;
867
case BoolTest::lt: cmgt(dst, size, src2, src1); break;
868
case BoolTest::uge: cmhs(dst, size, src1, src2); break;
869
case BoolTest::ugt: cmhi(dst, size, src1, src2); break;
870
case BoolTest::ult: cmhi(dst, size, src2, src1); break;
871
case BoolTest::ule: cmhs(dst, size, src2, src1); break;
872
default:
873
assert(false, "unsupported");
874
ShouldNotReachHere();
875
}
876
}
877
}
878
879