Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
40930 views
1
/*
2
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation.
8
*
9
* This code is distributed in the hope that it will be useful, but WITHOUT
10
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12
* version 2 for more details (a copy is included in the LICENSE file that
13
* accompanied this code).
14
*
15
* You should have received a copy of the GNU General Public License version
16
* 2 along with this work; if not, write to the Free Software Foundation,
17
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
*
19
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
* or visit www.oracle.com if you need additional information or have any
21
* questions.
22
*
23
*/
24
25
#include "precompiled.hpp"
26
#include "asm/assembler.hpp"
27
#include "asm/assembler.inline.hpp"
28
#include "opto/c2_MacroAssembler.hpp"
29
#include "opto/intrinsicnode.hpp"
30
#include "runtime/stubRoutines.hpp"
31
32
#ifdef PRODUCT
33
#define BLOCK_COMMENT(str) /* nothing */
34
#define STOP(error) stop(error)
35
#else
36
#define BLOCK_COMMENT(str) block_comment(str)
37
#define STOP(error) block_comment(error); stop(error)
38
#endif
39
40
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
41
42
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
43
44
// Search for str1 in str2 and return index or -1
45
void C2_MacroAssembler::string_indexof(Register str2, Register str1,
46
Register cnt2, Register cnt1,
47
Register tmp1, Register tmp2,
48
Register tmp3, Register tmp4,
49
Register tmp5, Register tmp6,
50
int icnt1, Register result, int ae) {
51
// NOTE: tmp5, tmp6 can be zr depending on specific method version
52
Label LINEARSEARCH, LINEARSTUB, LINEAR_MEDIUM, DONE, NOMATCH, MATCH;
53
54
Register ch1 = rscratch1;
55
Register ch2 = rscratch2;
56
Register cnt1tmp = tmp1;
57
Register cnt2tmp = tmp2;
58
Register cnt1_neg = cnt1;
59
Register cnt2_neg = cnt2;
60
Register result_tmp = tmp4;
61
62
bool isL = ae == StrIntrinsicNode::LL;
63
64
bool str1_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
65
bool str2_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
66
int str1_chr_shift = str1_isL ? 0:1;
67
int str2_chr_shift = str2_isL ? 0:1;
68
int str1_chr_size = str1_isL ? 1:2;
69
int str2_chr_size = str2_isL ? 1:2;
70
chr_insn str1_load_1chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
71
(chr_insn)&MacroAssembler::ldrh;
72
chr_insn str2_load_1chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
73
(chr_insn)&MacroAssembler::ldrh;
74
chr_insn load_2chr = isL ? (chr_insn)&MacroAssembler::ldrh : (chr_insn)&MacroAssembler::ldrw;
75
chr_insn load_4chr = isL ? (chr_insn)&MacroAssembler::ldrw : (chr_insn)&MacroAssembler::ldr;
76
77
// Note, inline_string_indexOf() generates checks:
78
// if (substr.count > string.count) return -1;
79
// if (substr.count == 0) return 0;
80
81
// We have two strings, a source string in str2, cnt2 and a pattern string
82
// in str1, cnt1. Find the 1st occurence of pattern in source or return -1.
83
84
// For larger pattern and source we use a simplified Boyer Moore algorithm.
85
// With a small pattern and source we use linear scan.
86
87
if (icnt1 == -1) {
88
sub(result_tmp, cnt2, cnt1);
89
cmp(cnt1, (u1)8); // Use Linear Scan if cnt1 < 8 || cnt1 >= 256
90
br(LT, LINEARSEARCH);
91
dup(v0, T16B, cnt1); // done in separate FPU pipeline. Almost no penalty
92
subs(zr, cnt1, 256);
93
lsr(tmp1, cnt2, 2);
94
ccmp(cnt1, tmp1, 0b0000, LT); // Source must be 4 * pattern for BM
95
br(GE, LINEARSTUB);
96
}
97
98
// The Boyer Moore alogorithm is based on the description here:-
99
//
100
// http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
101
//
102
// This describes and algorithm with 2 shift rules. The 'Bad Character' rule
103
// and the 'Good Suffix' rule.
104
//
105
// These rules are essentially heuristics for how far we can shift the
106
// pattern along the search string.
107
//
108
// The implementation here uses the 'Bad Character' rule only because of the
109
// complexity of initialisation for the 'Good Suffix' rule.
110
//
111
// This is also known as the Boyer-Moore-Horspool algorithm:-
112
//
113
// http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
114
//
115
// This particular implementation has few java-specific optimizations.
116
//
117
// #define ASIZE 256
118
//
119
// int bm(unsigned char *x, int m, unsigned char *y, int n) {
120
// int i, j;
121
// unsigned c;
122
// unsigned char bc[ASIZE];
123
//
124
// /* Preprocessing */
125
// for (i = 0; i < ASIZE; ++i)
126
// bc[i] = m;
127
// for (i = 0; i < m - 1; ) {
128
// c = x[i];
129
// ++i;
130
// // c < 256 for Latin1 string, so, no need for branch
131
// #ifdef PATTERN_STRING_IS_LATIN1
132
// bc[c] = m - i;
133
// #else
134
// if (c < ASIZE) bc[c] = m - i;
135
// #endif
136
// }
137
//
138
// /* Searching */
139
// j = 0;
140
// while (j <= n - m) {
141
// c = y[i+j];
142
// if (x[m-1] == c)
143
// for (i = m - 2; i >= 0 && x[i] == y[i + j]; --i);
144
// if (i < 0) return j;
145
// // c < 256 for Latin1 string, so, no need for branch
146
// #ifdef SOURCE_STRING_IS_LATIN1
147
// // LL case: (c< 256) always true. Remove branch
148
// j += bc[y[j+m-1]];
149
// #endif
150
// #ifndef PATTERN_STRING_IS_UTF
151
// // UU case: need if (c<ASIZE) check. Skip 1 character if not.
152
// if (c < ASIZE)
153
// j += bc[y[j+m-1]];
154
// else
155
// j += 1
156
// #endif
157
// #ifdef PATTERN_IS_LATIN1_AND_SOURCE_IS_UTF
158
// // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
159
// if (c < ASIZE)
160
// j += bc[y[j+m-1]];
161
// else
162
// j += m
163
// #endif
164
// }
165
// }
166
167
if (icnt1 == -1) {
168
Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
169
BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
170
Register cnt1end = tmp2;
171
Register str2end = cnt2;
172
Register skipch = tmp2;
173
174
// str1 length is >=8, so, we can read at least 1 register for cases when
175
// UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
176
// UL case. We'll re-read last character in inner pre-loop code to have
177
// single outer pre-loop load
178
const int firstStep = isL ? 7 : 3;
179
180
const int ASIZE = 256;
181
const int STORED_BYTES = 32; // amount of bytes stored per instruction
182
sub(sp, sp, ASIZE);
183
mov(tmp5, ASIZE/STORED_BYTES); // loop iterations
184
mov(ch1, sp);
185
BIND(BM_INIT_LOOP);
186
stpq(v0, v0, Address(post(ch1, STORED_BYTES)));
187
subs(tmp5, tmp5, 1);
188
br(GT, BM_INIT_LOOP);
189
190
sub(cnt1tmp, cnt1, 1);
191
mov(tmp5, str2);
192
add(str2end, str2, result_tmp, LSL, str2_chr_shift);
193
sub(ch2, cnt1, 1);
194
mov(tmp3, str1);
195
BIND(BCLOOP);
196
(this->*str1_load_1chr)(ch1, Address(post(tmp3, str1_chr_size)));
197
if (!str1_isL) {
198
subs(zr, ch1, ASIZE);
199
br(HS, BCSKIP);
200
}
201
strb(ch2, Address(sp, ch1));
202
BIND(BCSKIP);
203
subs(ch2, ch2, 1);
204
br(GT, BCLOOP);
205
206
add(tmp6, str1, cnt1, LSL, str1_chr_shift); // address after str1
207
if (str1_isL == str2_isL) {
208
// load last 8 bytes (8LL/4UU symbols)
209
ldr(tmp6, Address(tmp6, -wordSize));
210
} else {
211
ldrw(tmp6, Address(tmp6, -wordSize/2)); // load last 4 bytes(4 symbols)
212
// convert Latin1 to UTF. We'll have to wait until load completed, but
213
// it's still faster than per-character loads+checks
214
lsr(tmp3, tmp6, BitsPerByte * (wordSize/2 - str1_chr_size)); // str1[N-1]
215
ubfx(ch1, tmp6, 8, 8); // str1[N-2]
216
ubfx(ch2, tmp6, 16, 8); // str1[N-3]
217
andr(tmp6, tmp6, 0xFF); // str1[N-4]
218
orr(ch2, ch1, ch2, LSL, 16);
219
orr(tmp6, tmp6, tmp3, LSL, 48);
220
orr(tmp6, tmp6, ch2, LSL, 16);
221
}
222
BIND(BMLOOPSTR2);
223
(this->*str2_load_1chr)(skipch, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
224
sub(cnt1tmp, cnt1tmp, firstStep); // cnt1tmp is positive here, because cnt1 >= 8
225
if (str1_isL == str2_isL) {
226
// re-init tmp3. It's for free because it's executed in parallel with
227
// load above. Alternative is to initialize it before loop, but it'll
228
// affect performance on in-order systems with 2 or more ld/st pipelines
229
lsr(tmp3, tmp6, BitsPerByte * (wordSize - str1_chr_size));
230
}
231
if (!isL) { // UU/UL case
232
lsl(ch2, cnt1tmp, 1); // offset in bytes
233
}
234
cmp(tmp3, skipch);
235
br(NE, BMSKIP);
236
ldr(ch2, Address(str2, isL ? cnt1tmp : ch2));
237
mov(ch1, tmp6);
238
if (isL) {
239
b(BMLOOPSTR1_AFTER_LOAD);
240
} else {
241
sub(cnt1tmp, cnt1tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
242
b(BMLOOPSTR1_CMP);
243
}
244
BIND(BMLOOPSTR1);
245
(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp, Address::lsl(str1_chr_shift)));
246
(this->*str2_load_1chr)(ch2, Address(str2, cnt1tmp, Address::lsl(str2_chr_shift)));
247
BIND(BMLOOPSTR1_AFTER_LOAD);
248
subs(cnt1tmp, cnt1tmp, 1);
249
br(LT, BMLOOPSTR1_LASTCMP);
250
BIND(BMLOOPSTR1_CMP);
251
cmp(ch1, ch2);
252
br(EQ, BMLOOPSTR1);
253
BIND(BMSKIP);
254
if (!isL) {
255
// if we've met UTF symbol while searching Latin1 pattern, then we can
256
// skip cnt1 symbols
257
if (str1_isL != str2_isL) {
258
mov(result_tmp, cnt1);
259
} else {
260
mov(result_tmp, 1);
261
}
262
subs(zr, skipch, ASIZE);
263
br(HS, BMADV);
264
}
265
ldrb(result_tmp, Address(sp, skipch)); // load skip distance
266
BIND(BMADV);
267
sub(cnt1tmp, cnt1, 1);
268
add(str2, str2, result_tmp, LSL, str2_chr_shift);
269
cmp(str2, str2end);
270
br(LE, BMLOOPSTR2);
271
add(sp, sp, ASIZE);
272
b(NOMATCH);
273
BIND(BMLOOPSTR1_LASTCMP);
274
cmp(ch1, ch2);
275
br(NE, BMSKIP);
276
BIND(BMMATCH);
277
sub(result, str2, tmp5);
278
if (!str2_isL) lsr(result, result, 1);
279
add(sp, sp, ASIZE);
280
b(DONE);
281
282
BIND(LINEARSTUB);
283
cmp(cnt1, (u1)16); // small patterns still should be handled by simple algorithm
284
br(LT, LINEAR_MEDIUM);
285
mov(result, zr);
286
RuntimeAddress stub = NULL;
287
if (isL) {
288
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ll());
289
assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
290
} else if (str1_isL) {
291
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_ul());
292
assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
293
} else {
294
stub = RuntimeAddress(StubRoutines::aarch64::string_indexof_linear_uu());
295
assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
296
}
297
trampoline_call(stub);
298
b(DONE);
299
}
300
301
BIND(LINEARSEARCH);
302
{
303
Label DO1, DO2, DO3;
304
305
Register str2tmp = tmp2;
306
Register first = tmp3;
307
308
if (icnt1 == -1)
309
{
310
Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
311
312
cmp(cnt1, u1(str1_isL == str2_isL ? 4 : 2));
313
br(LT, DOSHORT);
314
BIND(LINEAR_MEDIUM);
315
(this->*str1_load_1chr)(first, Address(str1));
316
lea(str1, Address(str1, cnt1, Address::lsl(str1_chr_shift)));
317
sub(cnt1_neg, zr, cnt1, LSL, str1_chr_shift);
318
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
319
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
320
321
BIND(FIRST_LOOP);
322
(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
323
cmp(first, ch2);
324
br(EQ, STR1_LOOP);
325
BIND(STR2_NEXT);
326
adds(cnt2_neg, cnt2_neg, str2_chr_size);
327
br(LE, FIRST_LOOP);
328
b(NOMATCH);
329
330
BIND(STR1_LOOP);
331
adds(cnt1tmp, cnt1_neg, str1_chr_size);
332
add(cnt2tmp, cnt2_neg, str2_chr_size);
333
br(GE, MATCH);
334
335
BIND(STR1_NEXT);
336
(this->*str1_load_1chr)(ch1, Address(str1, cnt1tmp));
337
(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
338
cmp(ch1, ch2);
339
br(NE, STR2_NEXT);
340
adds(cnt1tmp, cnt1tmp, str1_chr_size);
341
add(cnt2tmp, cnt2tmp, str2_chr_size);
342
br(LT, STR1_NEXT);
343
b(MATCH);
344
345
BIND(DOSHORT);
346
if (str1_isL == str2_isL) {
347
cmp(cnt1, (u1)2);
348
br(LT, DO1);
349
br(GT, DO3);
350
}
351
}
352
353
if (icnt1 == 4) {
354
Label CH1_LOOP;
355
356
(this->*load_4chr)(ch1, str1);
357
sub(result_tmp, cnt2, 4);
358
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
359
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
360
361
BIND(CH1_LOOP);
362
(this->*load_4chr)(ch2, Address(str2, cnt2_neg));
363
cmp(ch1, ch2);
364
br(EQ, MATCH);
365
adds(cnt2_neg, cnt2_neg, str2_chr_size);
366
br(LE, CH1_LOOP);
367
b(NOMATCH);
368
}
369
370
if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 2) {
371
Label CH1_LOOP;
372
373
BIND(DO2);
374
(this->*load_2chr)(ch1, str1);
375
if (icnt1 == 2) {
376
sub(result_tmp, cnt2, 2);
377
}
378
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
379
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
380
BIND(CH1_LOOP);
381
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
382
cmp(ch1, ch2);
383
br(EQ, MATCH);
384
adds(cnt2_neg, cnt2_neg, str2_chr_size);
385
br(LE, CH1_LOOP);
386
b(NOMATCH);
387
}
388
389
if ((icnt1 == -1 && str1_isL == str2_isL) || icnt1 == 3) {
390
Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
391
392
BIND(DO3);
393
(this->*load_2chr)(first, str1);
394
(this->*str1_load_1chr)(ch1, Address(str1, 2*str1_chr_size));
395
if (icnt1 == 3) {
396
sub(result_tmp, cnt2, 3);
397
}
398
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
399
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
400
BIND(FIRST_LOOP);
401
(this->*load_2chr)(ch2, Address(str2, cnt2_neg));
402
cmpw(first, ch2);
403
br(EQ, STR1_LOOP);
404
BIND(STR2_NEXT);
405
adds(cnt2_neg, cnt2_neg, str2_chr_size);
406
br(LE, FIRST_LOOP);
407
b(NOMATCH);
408
409
BIND(STR1_LOOP);
410
add(cnt2tmp, cnt2_neg, 2*str2_chr_size);
411
(this->*str2_load_1chr)(ch2, Address(str2, cnt2tmp));
412
cmp(ch1, ch2);
413
br(NE, STR2_NEXT);
414
b(MATCH);
415
}
416
417
if (icnt1 == -1 || icnt1 == 1) {
418
Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP;
419
420
BIND(DO1);
421
(this->*str1_load_1chr)(ch1, str1);
422
cmp(cnt2, (u1)8);
423
br(LT, DO1_SHORT);
424
425
sub(result_tmp, cnt2, 8/str2_chr_size);
426
sub(cnt2_neg, zr, result_tmp, LSL, str2_chr_shift);
427
mov(tmp3, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
428
lea(str2, Address(str2, result_tmp, Address::lsl(str2_chr_shift)));
429
430
if (str2_isL) {
431
orr(ch1, ch1, ch1, LSL, 8);
432
}
433
orr(ch1, ch1, ch1, LSL, 16);
434
orr(ch1, ch1, ch1, LSL, 32);
435
BIND(CH1_LOOP);
436
ldr(ch2, Address(str2, cnt2_neg));
437
eor(ch2, ch1, ch2);
438
sub(tmp1, ch2, tmp3);
439
orr(tmp2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
440
bics(tmp1, tmp1, tmp2);
441
br(NE, HAS_ZERO);
442
adds(cnt2_neg, cnt2_neg, 8);
443
br(LT, CH1_LOOP);
444
445
cmp(cnt2_neg, (u1)8);
446
mov(cnt2_neg, 0);
447
br(LT, CH1_LOOP);
448
b(NOMATCH);
449
450
BIND(HAS_ZERO);
451
rev(tmp1, tmp1);
452
clz(tmp1, tmp1);
453
add(cnt2_neg, cnt2_neg, tmp1, LSR, 3);
454
b(MATCH);
455
456
BIND(DO1_SHORT);
457
mov(result_tmp, cnt2);
458
lea(str2, Address(str2, cnt2, Address::lsl(str2_chr_shift)));
459
sub(cnt2_neg, zr, cnt2, LSL, str2_chr_shift);
460
BIND(DO1_LOOP);
461
(this->*str2_load_1chr)(ch2, Address(str2, cnt2_neg));
462
cmpw(ch1, ch2);
463
br(EQ, MATCH);
464
adds(cnt2_neg, cnt2_neg, str2_chr_size);
465
br(LT, DO1_LOOP);
466
}
467
}
468
BIND(NOMATCH);
469
mov(result, -1);
470
b(DONE);
471
BIND(MATCH);
472
add(result, result_tmp, cnt2_neg, ASR, str2_chr_shift);
473
BIND(DONE);
474
}
475
476
typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
477
typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn);
478
479
void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
480
Register ch, Register result,
481
Register tmp1, Register tmp2, Register tmp3)
482
{
483
Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
484
Register cnt1_neg = cnt1;
485
Register ch1 = rscratch1;
486
Register result_tmp = rscratch2;
487
488
cbz(cnt1, NOMATCH);
489
490
cmp(cnt1, (u1)4);
491
br(LT, DO1_SHORT);
492
493
orr(ch, ch, ch, LSL, 16);
494
orr(ch, ch, ch, LSL, 32);
495
496
sub(cnt1, cnt1, 4);
497
mov(result_tmp, cnt1);
498
lea(str1, Address(str1, cnt1, Address::uxtw(1)));
499
sub(cnt1_neg, zr, cnt1, LSL, 1);
500
501
mov(tmp3, 0x0001000100010001);
502
503
BIND(CH1_LOOP);
504
ldr(ch1, Address(str1, cnt1_neg));
505
eor(ch1, ch, ch1);
506
sub(tmp1, ch1, tmp3);
507
orr(tmp2, ch1, 0x7fff7fff7fff7fff);
508
bics(tmp1, tmp1, tmp2);
509
br(NE, HAS_ZERO);
510
adds(cnt1_neg, cnt1_neg, 8);
511
br(LT, CH1_LOOP);
512
513
cmp(cnt1_neg, (u1)8);
514
mov(cnt1_neg, 0);
515
br(LT, CH1_LOOP);
516
b(NOMATCH);
517
518
BIND(HAS_ZERO);
519
rev(tmp1, tmp1);
520
clz(tmp1, tmp1);
521
add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
522
b(MATCH);
523
524
BIND(DO1_SHORT);
525
mov(result_tmp, cnt1);
526
lea(str1, Address(str1, cnt1, Address::uxtw(1)));
527
sub(cnt1_neg, zr, cnt1, LSL, 1);
528
BIND(DO1_LOOP);
529
ldrh(ch1, Address(str1, cnt1_neg));
530
cmpw(ch, ch1);
531
br(EQ, MATCH);
532
adds(cnt1_neg, cnt1_neg, 2);
533
br(LT, DO1_LOOP);
534
BIND(NOMATCH);
535
mov(result, -1);
536
b(DONE);
537
BIND(MATCH);
538
add(result, result_tmp, cnt1_neg, ASR, 1);
539
BIND(DONE);
540
}
541
542
void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,
543
Register ch, Register result,
544
Register tmp1, Register tmp2, Register tmp3)
545
{
546
Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
547
Register cnt1_neg = cnt1;
548
Register ch1 = rscratch1;
549
Register result_tmp = rscratch2;
550
551
cbz(cnt1, NOMATCH);
552
553
cmp(cnt1, (u1)8);
554
br(LT, DO1_SHORT);
555
556
orr(ch, ch, ch, LSL, 8);
557
orr(ch, ch, ch, LSL, 16);
558
orr(ch, ch, ch, LSL, 32);
559
560
sub(cnt1, cnt1, 8);
561
mov(result_tmp, cnt1);
562
lea(str1, Address(str1, cnt1));
563
sub(cnt1_neg, zr, cnt1);
564
565
mov(tmp3, 0x0101010101010101);
566
567
BIND(CH1_LOOP);
568
ldr(ch1, Address(str1, cnt1_neg));
569
eor(ch1, ch, ch1);
570
sub(tmp1, ch1, tmp3);
571
orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f);
572
bics(tmp1, tmp1, tmp2);
573
br(NE, HAS_ZERO);
574
adds(cnt1_neg, cnt1_neg, 8);
575
br(LT, CH1_LOOP);
576
577
cmp(cnt1_neg, (u1)8);
578
mov(cnt1_neg, 0);
579
br(LT, CH1_LOOP);
580
b(NOMATCH);
581
582
BIND(HAS_ZERO);
583
rev(tmp1, tmp1);
584
clz(tmp1, tmp1);
585
add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
586
b(MATCH);
587
588
BIND(DO1_SHORT);
589
mov(result_tmp, cnt1);
590
lea(str1, Address(str1, cnt1));
591
sub(cnt1_neg, zr, cnt1);
592
BIND(DO1_LOOP);
593
ldrb(ch1, Address(str1, cnt1_neg));
594
cmp(ch, ch1);
595
br(EQ, MATCH);
596
adds(cnt1_neg, cnt1_neg, 1);
597
br(LT, DO1_LOOP);
598
BIND(NOMATCH);
599
mov(result, -1);
600
b(DONE);
601
BIND(MATCH);
602
add(result, result_tmp, cnt1_neg);
603
BIND(DONE);
604
}
605
606
// Compare strings.
607
void C2_MacroAssembler::string_compare(Register str1, Register str2,
608
Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
609
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {
610
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
611
DIFF, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
612
SHORT_LOOP_START, TAIL_CHECK;
613
614
bool isLL = ae == StrIntrinsicNode::LL;
615
bool isLU = ae == StrIntrinsicNode::LU;
616
bool isUL = ae == StrIntrinsicNode::UL;
617
618
// The stub threshold for LL strings is: 72 (64 + 8) chars
619
// UU: 36 chars, or 72 bytes (valid for the 64-byte large loop with prefetch)
620
// LU/UL: 24 chars, or 48 bytes (valid for the 16-character loop at least)
621
const u1 stub_threshold = isLL ? 72 : ((isLU || isUL) ? 24 : 36);
622
623
bool str1_isL = isLL || isLU;
624
bool str2_isL = isLL || isUL;
625
626
int str1_chr_shift = str1_isL ? 0 : 1;
627
int str2_chr_shift = str2_isL ? 0 : 1;
628
int str1_chr_size = str1_isL ? 1 : 2;
629
int str2_chr_size = str2_isL ? 1 : 2;
630
int minCharsInWord = isLL ? wordSize : wordSize/2;
631
632
FloatRegister vtmpZ = vtmp1, vtmp = vtmp2;
633
chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
634
(chr_insn)&MacroAssembler::ldrh;
635
chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
636
(chr_insn)&MacroAssembler::ldrh;
637
uxt_insn ext_chr = isLL ? (uxt_insn)&MacroAssembler::uxtbw :
638
(uxt_insn)&MacroAssembler::uxthw;
639
640
BLOCK_COMMENT("string_compare {");
641
642
// Bizzarely, the counts are passed in bytes, regardless of whether they
643
// are L or U strings, however the result is always in characters.
644
if (!str1_isL) asrw(cnt1, cnt1, 1);
645
if (!str2_isL) asrw(cnt2, cnt2, 1);
646
647
// Compute the minimum of the string lengths and save the difference.
648
subsw(result, cnt1, cnt2);
649
cselw(cnt2, cnt1, cnt2, Assembler::LE); // min
650
651
// A very short string
652
cmpw(cnt2, minCharsInWord);
653
br(Assembler::LE, SHORT_STRING);
654
655
// Compare longwords
656
// load first parts of strings and finish initialization while loading
657
{
658
if (str1_isL == str2_isL) { // LL or UU
659
ldr(tmp1, Address(str1));
660
cmp(str1, str2);
661
br(Assembler::EQ, DONE);
662
ldr(tmp2, Address(str2));
663
cmp(cnt2, stub_threshold);
664
br(GE, STUB);
665
subsw(cnt2, cnt2, minCharsInWord);
666
br(EQ, TAIL_CHECK);
667
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
668
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
669
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
670
} else if (isLU) {
671
ldrs(vtmp, Address(str1));
672
ldr(tmp2, Address(str2));
673
cmp(cnt2, stub_threshold);
674
br(GE, STUB);
675
subw(cnt2, cnt2, 4);
676
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
677
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
678
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
679
zip1(vtmp, T8B, vtmp, vtmpZ);
680
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
681
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
682
add(cnt1, cnt1, 4);
683
fmovd(tmp1, vtmp);
684
} else { // UL case
685
ldr(tmp1, Address(str1));
686
ldrs(vtmp, Address(str2));
687
cmp(cnt2, stub_threshold);
688
br(GE, STUB);
689
subw(cnt2, cnt2, 4);
690
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
691
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
692
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
693
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
694
zip1(vtmp, T8B, vtmp, vtmpZ);
695
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
696
add(cnt1, cnt1, 8);
697
fmovd(tmp2, vtmp);
698
}
699
adds(cnt2, cnt2, isUL ? 4 : 8);
700
br(GE, TAIL);
701
eor(rscratch2, tmp1, tmp2);
702
cbnz(rscratch2, DIFF);
703
// main loop
704
bind(NEXT_WORD);
705
if (str1_isL == str2_isL) {
706
ldr(tmp1, Address(str1, cnt2));
707
ldr(tmp2, Address(str2, cnt2));
708
adds(cnt2, cnt2, 8);
709
} else if (isLU) {
710
ldrs(vtmp, Address(str1, cnt1));
711
ldr(tmp2, Address(str2, cnt2));
712
add(cnt1, cnt1, 4);
713
zip1(vtmp, T8B, vtmp, vtmpZ);
714
fmovd(tmp1, vtmp);
715
adds(cnt2, cnt2, 8);
716
} else { // UL
717
ldrs(vtmp, Address(str2, cnt2));
718
ldr(tmp1, Address(str1, cnt1));
719
zip1(vtmp, T8B, vtmp, vtmpZ);
720
add(cnt1, cnt1, 8);
721
fmovd(tmp2, vtmp);
722
adds(cnt2, cnt2, 4);
723
}
724
br(GE, TAIL);
725
726
eor(rscratch2, tmp1, tmp2);
727
cbz(rscratch2, NEXT_WORD);
728
b(DIFF);
729
bind(TAIL);
730
eor(rscratch2, tmp1, tmp2);
731
cbnz(rscratch2, DIFF);
732
// Last longword. In the case where length == 4 we compare the
733
// same longword twice, but that's still faster than another
734
// conditional branch.
735
if (str1_isL == str2_isL) {
736
ldr(tmp1, Address(str1));
737
ldr(tmp2, Address(str2));
738
} else if (isLU) {
739
ldrs(vtmp, Address(str1));
740
ldr(tmp2, Address(str2));
741
zip1(vtmp, T8B, vtmp, vtmpZ);
742
fmovd(tmp1, vtmp);
743
} else { // UL
744
ldrs(vtmp, Address(str2));
745
ldr(tmp1, Address(str1));
746
zip1(vtmp, T8B, vtmp, vtmpZ);
747
fmovd(tmp2, vtmp);
748
}
749
bind(TAIL_CHECK);
750
eor(rscratch2, tmp1, tmp2);
751
cbz(rscratch2, DONE);
752
753
// Find the first different characters in the longwords and
754
// compute their difference.
755
bind(DIFF);
756
rev(rscratch2, rscratch2);
757
clz(rscratch2, rscratch2);
758
andr(rscratch2, rscratch2, isLL ? -8 : -16);
759
lsrv(tmp1, tmp1, rscratch2);
760
(this->*ext_chr)(tmp1, tmp1);
761
lsrv(tmp2, tmp2, rscratch2);
762
(this->*ext_chr)(tmp2, tmp2);
763
subw(result, tmp1, tmp2);
764
b(DONE);
765
}
766
767
bind(STUB);
768
RuntimeAddress stub = NULL;
769
switch(ae) {
770
case StrIntrinsicNode::LL:
771
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL());
772
break;
773
case StrIntrinsicNode::UU:
774
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU());
775
break;
776
case StrIntrinsicNode::LU:
777
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU());
778
break;
779
case StrIntrinsicNode::UL:
780
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL());
781
break;
782
default:
783
ShouldNotReachHere();
784
}
785
assert(stub.target() != NULL, "compare_long_string stub has not been generated");
786
trampoline_call(stub);
787
b(DONE);
788
789
bind(SHORT_STRING);
790
// Is the minimum length zero?
791
cbz(cnt2, DONE);
792
// arrange code to do most branches while loading and loading next characters
793
// while comparing previous
794
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
795
subs(cnt2, cnt2, 1);
796
br(EQ, SHORT_LAST_INIT);
797
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
798
b(SHORT_LOOP_START);
799
bind(SHORT_LOOP);
800
subs(cnt2, cnt2, 1);
801
br(EQ, SHORT_LAST);
802
bind(SHORT_LOOP_START);
803
(this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size)));
804
(this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size)));
805
cmp(tmp1, cnt1);
806
br(NE, SHORT_LOOP_TAIL);
807
subs(cnt2, cnt2, 1);
808
br(EQ, SHORT_LAST2);
809
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
810
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
811
cmp(tmp2, rscratch1);
812
br(EQ, SHORT_LOOP);
813
sub(result, tmp2, rscratch1);
814
b(DONE);
815
bind(SHORT_LOOP_TAIL);
816
sub(result, tmp1, cnt1);
817
b(DONE);
818
bind(SHORT_LAST2);
819
cmp(tmp2, rscratch1);
820
br(EQ, DONE);
821
sub(result, tmp2, rscratch1);
822
823
b(DONE);
824
bind(SHORT_LAST_INIT);
825
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
826
bind(SHORT_LAST);
827
cmp(tmp1, cnt1);
828
br(EQ, DONE);
829
sub(result, tmp1, cnt1);
830
831
bind(DONE);
832
833
BLOCK_COMMENT("} string_compare");
834
}
835
836