Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/hotspot/cpu/s390/c2_MacroAssembler_s390.cpp
40930 views
1
/*
2
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation.
8
*
9
* This code is distributed in the hope that it will be useful, but WITHOUT
10
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12
* version 2 for more details (a copy is included in the LICENSE file that
13
* accompanied this code).
14
*
15
* You should have received a copy of the GNU General Public License version
16
* 2 along with this work; if not, write to the Free Software Foundation,
17
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
*
19
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
* or visit www.oracle.com if you need additional information or have any
21
* questions.
22
*
23
*/
24
25
#include "precompiled.hpp"
26
#include "asm/assembler.hpp"
27
#include "asm/assembler.inline.hpp"
28
#include "opto/c2_MacroAssembler.hpp"
29
#include "opto/intrinsicnode.hpp"
30
#include "runtime/stubRoutines.hpp"
31
32
#define BLOCK_COMMENT(str) block_comment(str)
33
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
34
35
//------------------------------------------------------
36
// Special String Intrinsics. Implementation
37
//------------------------------------------------------
38
39
// Intrinsics for CompactStrings
40
41
// Compress char[] to byte[].
42
// Restores: src, dst
43
// Uses: cnt
44
// Kills: tmp, Z_R0, Z_R1.
45
// Early clobber: result.
46
// Note:
47
// cnt is signed int. Do not rely on high word!
48
// counts # characters, not bytes.
49
// The result is the number of characters copied before the first incompatible character was found.
50
// If precise is true, the processing stops exactly at this point. Otherwise, the result may be off
51
// by a few bytes. The result always indicates the number of copied characters.
52
// When used as a character index, the returned value points to the first incompatible character.
53
//
54
// Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure:
55
// - Different number of characters may have been written to dead array (if precise is false).
56
// - Returns a number <cnt instead of 0. (Result gets compared with cnt.)
57
unsigned int C2_MacroAssembler::string_compress(Register result, Register src, Register dst, Register cnt,
58
Register tmp, bool precise) {
59
assert_different_registers(Z_R0, Z_R1, result, src, dst, cnt, tmp);
60
61
if (precise) {
62
BLOCK_COMMENT("encode_iso_array {");
63
} else {
64
BLOCK_COMMENT("string_compress {");
65
}
66
int block_start = offset();
67
68
Register Rsrc = src;
69
Register Rdst = dst;
70
Register Rix = tmp;
71
Register Rcnt = cnt;
72
Register Rmask = result; // holds incompatibility check mask until result value is stored.
73
Label ScalarShortcut, AllDone;
74
75
z_iilf(Rmask, 0xFF00FF00);
76
z_iihf(Rmask, 0xFF00FF00);
77
78
#if 0 // Sacrifice shortcuts for code compactness
79
{
80
//---< shortcuts for short strings (very frequent) >---
81
// Strings with 4 and 8 characters were fond to occur very frequently.
82
// Therefore, we handle them right away with minimal overhead.
83
Label skipShortcut, skip4Shortcut, skip8Shortcut;
84
Register Rout = Z_R0;
85
z_chi(Rcnt, 4);
86
z_brne(skip4Shortcut); // 4 characters are very frequent
87
z_lg(Z_R0, 0, Rsrc); // Treat exactly 4 characters specially.
88
if (VM_Version::has_DistinctOpnds()) {
89
Rout = Z_R0;
90
z_ngrk(Rix, Z_R0, Rmask);
91
} else {
92
Rout = Rix;
93
z_lgr(Rix, Z_R0);
94
z_ngr(Z_R0, Rmask);
95
}
96
z_brnz(skipShortcut);
97
z_stcmh(Rout, 5, 0, Rdst);
98
z_stcm(Rout, 5, 2, Rdst);
99
z_lgfr(result, Rcnt);
100
z_bru(AllDone);
101
bind(skip4Shortcut);
102
103
z_chi(Rcnt, 8);
104
z_brne(skip8Shortcut); // There's more to do...
105
z_lmg(Z_R0, Z_R1, 0, Rsrc); // Treat exactly 8 characters specially.
106
if (VM_Version::has_DistinctOpnds()) {
107
Rout = Z_R0;
108
z_ogrk(Rix, Z_R0, Z_R1);
109
z_ngr(Rix, Rmask);
110
} else {
111
Rout = Rix;
112
z_lgr(Rix, Z_R0);
113
z_ogr(Z_R0, Z_R1);
114
z_ngr(Z_R0, Rmask);
115
}
116
z_brnz(skipShortcut);
117
z_stcmh(Rout, 5, 0, Rdst);
118
z_stcm(Rout, 5, 2, Rdst);
119
z_stcmh(Z_R1, 5, 4, Rdst);
120
z_stcm(Z_R1, 5, 6, Rdst);
121
z_lgfr(result, Rcnt);
122
z_bru(AllDone);
123
124
bind(skip8Shortcut);
125
clear_reg(Z_R0, true, false); // #characters already processed (none). Precond for scalar loop.
126
z_brl(ScalarShortcut); // Just a few characters
127
128
bind(skipShortcut);
129
}
130
#endif
131
clear_reg(Z_R0); // make sure register is properly initialized.
132
133
if (VM_Version::has_VectorFacility()) {
134
const int min_vcnt = 32; // Minimum #characters required to use vector instructions.
135
// Otherwise just do nothing in vector mode.
136
// Must be multiple of 2*(vector register length in chars (8 HW = 128 bits)).
137
const int log_min_vcnt = exact_log2(min_vcnt);
138
Label VectorLoop, VectorDone, VectorBreak;
139
140
VectorRegister Vtmp1 = Z_V16;
141
VectorRegister Vtmp2 = Z_V17;
142
VectorRegister Vmask = Z_V18;
143
VectorRegister Vzero = Z_V19;
144
VectorRegister Vsrc_first = Z_V20;
145
VectorRegister Vsrc_last = Z_V23;
146
147
assert((Vsrc_last->encoding() - Vsrc_first->encoding() + 1) == min_vcnt/8, "logic error");
148
assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()");
149
z_srak(Rix, Rcnt, log_min_vcnt); // # vector loop iterations
150
z_brz(VectorDone); // not enough data for vector loop
151
152
z_vzero(Vzero); // all zeroes
153
z_vgmh(Vmask, 0, 7); // generate 0xff00 mask for all 2-byte elements
154
z_sllg(Z_R0, Rix, log_min_vcnt); // remember #chars that will be processed by vector loop
155
156
bind(VectorLoop);
157
z_vlm(Vsrc_first, Vsrc_last, 0, Rsrc);
158
add2reg(Rsrc, min_vcnt*2);
159
160
//---< check for incompatible character >---
161
z_vo(Vtmp1, Z_V20, Z_V21);
162
z_vo(Vtmp2, Z_V22, Z_V23);
163
z_vo(Vtmp1, Vtmp1, Vtmp2);
164
z_vn(Vtmp1, Vtmp1, Vmask);
165
z_vceqhs(Vtmp1, Vtmp1, Vzero); // high half of all chars must be zero for successful compress.
166
z_bvnt(VectorBreak); // break vector loop if not all vector elements compare eq -> incompatible character found.
167
// re-process data from current iteration in break handler.
168
169
//---< pack & store characters >---
170
z_vpkh(Vtmp1, Z_V20, Z_V21); // pack (src1, src2) -> tmp1
171
z_vpkh(Vtmp2, Z_V22, Z_V23); // pack (src3, src4) -> tmp2
172
z_vstm(Vtmp1, Vtmp2, 0, Rdst); // store packed string
173
add2reg(Rdst, min_vcnt);
174
175
z_brct(Rix, VectorLoop);
176
177
z_bru(VectorDone);
178
179
bind(VectorBreak);
180
add2reg(Rsrc, -min_vcnt*2); // Fix Rsrc. Rsrc was already updated, but Rdst and Rix are not.
181
z_sll(Rix, log_min_vcnt); // # chars processed so far in VectorLoop, excl. current iteration.
182
z_sr(Z_R0, Rix); // correct # chars processed in total.
183
184
bind(VectorDone);
185
}
186
187
{
188
const int min_cnt = 8; // Minimum #characters required to use unrolled loop.
189
// Otherwise just do nothing in unrolled loop.
190
// Must be multiple of 8.
191
const int log_min_cnt = exact_log2(min_cnt);
192
Label UnrolledLoop, UnrolledDone, UnrolledBreak;
193
194
if (VM_Version::has_DistinctOpnds()) {
195
z_srk(Rix, Rcnt, Z_R0); // remaining # chars to compress in unrolled loop
196
} else {
197
z_lr(Rix, Rcnt);
198
z_sr(Rix, Z_R0);
199
}
200
z_sra(Rix, log_min_cnt); // unrolled loop count
201
z_brz(UnrolledDone);
202
203
bind(UnrolledLoop);
204
z_lmg(Z_R0, Z_R1, 0, Rsrc);
205
if (precise) {
206
z_ogr(Z_R1, Z_R0); // check all 8 chars for incompatibility
207
z_ngr(Z_R1, Rmask);
208
z_brnz(UnrolledBreak);
209
210
z_lg(Z_R1, 8, Rsrc); // reload destroyed register
211
z_stcmh(Z_R0, 5, 0, Rdst);
212
z_stcm(Z_R0, 5, 2, Rdst);
213
} else {
214
z_stcmh(Z_R0, 5, 0, Rdst);
215
z_stcm(Z_R0, 5, 2, Rdst);
216
217
z_ogr(Z_R0, Z_R1);
218
z_ngr(Z_R0, Rmask);
219
z_brnz(UnrolledBreak);
220
}
221
z_stcmh(Z_R1, 5, 4, Rdst);
222
z_stcm(Z_R1, 5, 6, Rdst);
223
224
add2reg(Rsrc, min_cnt*2);
225
add2reg(Rdst, min_cnt);
226
z_brct(Rix, UnrolledLoop);
227
228
z_lgfr(Z_R0, Rcnt); // # chars processed in total after unrolled loop.
229
z_nilf(Z_R0, ~(min_cnt-1));
230
z_tmll(Rcnt, min_cnt-1);
231
z_brnaz(ScalarShortcut); // if all bits zero, there is nothing left to do for scalar loop.
232
// Rix == 0 in all cases.
233
z_sllg(Z_R1, Rcnt, 1); // # src bytes already processed. Only lower 32 bits are valid!
234
// Z_R1 contents must be treated as unsigned operand! For huge strings,
235
// (Rcnt >= 2**30), the value may spill into the sign bit by sllg.
236
z_lgfr(result, Rcnt); // all characters processed.
237
z_slgfr(Rdst, Rcnt); // restore ptr
238
z_slgfr(Rsrc, Z_R1); // restore ptr, double the element count for Rsrc restore
239
z_bru(AllDone);
240
241
bind(UnrolledBreak);
242
z_lgfr(Z_R0, Rcnt); // # chars processed in total after unrolled loop
243
z_nilf(Z_R0, ~(min_cnt-1));
244
z_sll(Rix, log_min_cnt); // # chars not yet processed in UnrolledLoop (due to break), broken iteration not included.
245
z_sr(Z_R0, Rix); // fix # chars processed OK so far.
246
if (!precise) {
247
z_lgfr(result, Z_R0);
248
z_sllg(Z_R1, Z_R0, 1); // # src bytes already processed. Only lower 32 bits are valid!
249
// Z_R1 contents must be treated as unsigned operand! For huge strings,
250
// (Rcnt >= 2**30), the value may spill into the sign bit by sllg.
251
z_aghi(result, min_cnt/2); // min_cnt/2 characters have already been written
252
// but ptrs were not updated yet.
253
z_slgfr(Rdst, Z_R0); // restore ptr
254
z_slgfr(Rsrc, Z_R1); // restore ptr, double the element count for Rsrc restore
255
z_bru(AllDone);
256
}
257
bind(UnrolledDone);
258
}
259
260
{
261
Label ScalarLoop, ScalarDone, ScalarBreak;
262
263
bind(ScalarShortcut);
264
z_ltgfr(result, Rcnt);
265
z_brz(AllDone);
266
267
#if 0 // Sacrifice shortcuts for code compactness
268
{
269
//---< Special treatment for very short strings (one or two characters) >---
270
// For these strings, we are sure that the above code was skipped.
271
// Thus, no registers were modified, register restore is not required.
272
Label ScalarDoit, Scalar2Char;
273
z_chi(Rcnt, 2);
274
z_brh(ScalarDoit);
275
z_llh(Z_R1, 0, Z_R0, Rsrc);
276
z_bre(Scalar2Char);
277
z_tmll(Z_R1, 0xff00);
278
z_lghi(result, 0); // cnt == 1, first char invalid, no chars successfully processed
279
z_brnaz(AllDone);
280
z_stc(Z_R1, 0, Z_R0, Rdst);
281
z_lghi(result, 1);
282
z_bru(AllDone);
283
284
bind(Scalar2Char);
285
z_llh(Z_R0, 2, Z_R0, Rsrc);
286
z_tmll(Z_R1, 0xff00);
287
z_lghi(result, 0); // cnt == 2, first char invalid, no chars successfully processed
288
z_brnaz(AllDone);
289
z_stc(Z_R1, 0, Z_R0, Rdst);
290
z_tmll(Z_R0, 0xff00);
291
z_lghi(result, 1); // cnt == 2, second char invalid, one char successfully processed
292
z_brnaz(AllDone);
293
z_stc(Z_R0, 1, Z_R0, Rdst);
294
z_lghi(result, 2);
295
z_bru(AllDone);
296
297
bind(ScalarDoit);
298
}
299
#endif
300
301
if (VM_Version::has_DistinctOpnds()) {
302
z_srk(Rix, Rcnt, Z_R0); // remaining # chars to compress in unrolled loop
303
} else {
304
z_lr(Rix, Rcnt);
305
z_sr(Rix, Z_R0);
306
}
307
z_lgfr(result, Rcnt); // # processed characters (if all runs ok).
308
z_brz(ScalarDone); // uses CC from Rix calculation
309
310
bind(ScalarLoop);
311
z_llh(Z_R1, 0, Z_R0, Rsrc);
312
z_tmll(Z_R1, 0xff00);
313
z_brnaz(ScalarBreak);
314
z_stc(Z_R1, 0, Z_R0, Rdst);
315
add2reg(Rsrc, 2);
316
add2reg(Rdst, 1);
317
z_brct(Rix, ScalarLoop);
318
319
z_bru(ScalarDone);
320
321
bind(ScalarBreak);
322
z_sr(result, Rix);
323
324
bind(ScalarDone);
325
z_sgfr(Rdst, result); // restore ptr
326
z_sgfr(Rsrc, result); // restore ptr, double the element count for Rsrc restore
327
z_sgfr(Rsrc, result);
328
}
329
bind(AllDone);
330
331
if (precise) {
332
BLOCK_COMMENT("} encode_iso_array");
333
} else {
334
BLOCK_COMMENT("} string_compress");
335
}
336
return offset() - block_start;
337
}
338
339
// Inflate byte[] to char[].
340
unsigned int C2_MacroAssembler::string_inflate_trot(Register src, Register dst, Register cnt, Register tmp) {
341
int block_start = offset();
342
343
BLOCK_COMMENT("string_inflate {");
344
345
Register stop_char = Z_R0;
346
Register table = Z_R1;
347
Register src_addr = tmp;
348
349
assert_different_registers(Z_R0, Z_R1, tmp, src, dst, cnt);
350
assert(dst->encoding()%2 == 0, "must be even reg");
351
assert(cnt->encoding()%2 == 1, "must be odd reg");
352
assert(cnt->encoding() - dst->encoding() == 1, "must be even/odd pair");
353
354
StubRoutines::zarch::generate_load_trot_table_addr(this, table); // kills Z_R0 (if ASSERT)
355
clear_reg(stop_char); // Stop character. Not used here, but initialized to have a defined value.
356
lgr_if_needed(src_addr, src);
357
z_llgfr(cnt, cnt); // # src characters, must be a positive simm32.
358
359
translate_ot(dst, src_addr, /* mask = */ 0x0001);
360
361
BLOCK_COMMENT("} string_inflate");
362
363
return offset() - block_start;
364
}
365
366
// Inflate byte[] to char[].
367
// Restores: src, dst
368
// Uses: cnt
369
// Kills: tmp, Z_R0, Z_R1.
370
// Note:
371
// cnt is signed int. Do not rely on high word!
372
// counts # characters, not bytes.
373
unsigned int C2_MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
374
assert_different_registers(Z_R0, Z_R1, src, dst, cnt, tmp);
375
376
BLOCK_COMMENT("string_inflate {");
377
int block_start = offset();
378
379
Register Rcnt = cnt; // # characters (src: bytes, dst: char (2-byte)), remaining after current loop.
380
Register Rix = tmp; // loop index
381
Register Rsrc = src; // addr(src array)
382
Register Rdst = dst; // addr(dst array)
383
Label ScalarShortcut, AllDone;
384
385
#if 0 // Sacrifice shortcuts for code compactness
386
{
387
//---< shortcuts for short strings (very frequent) >---
388
Label skipShortcut, skip4Shortcut;
389
z_ltr(Rcnt, Rcnt); // absolutely nothing to do for strings of len == 0.
390
z_brz(AllDone);
391
clear_reg(Z_R0); // make sure registers are properly initialized.
392
clear_reg(Z_R1);
393
z_chi(Rcnt, 4);
394
z_brne(skip4Shortcut); // 4 characters are very frequent
395
z_icm(Z_R0, 5, 0, Rsrc); // Treat exactly 4 characters specially.
396
z_icm(Z_R1, 5, 2, Rsrc);
397
z_stm(Z_R0, Z_R1, 0, Rdst);
398
z_bru(AllDone);
399
bind(skip4Shortcut);
400
401
z_chi(Rcnt, 8);
402
z_brh(skipShortcut); // There's a lot to do...
403
z_lgfr(Z_R0, Rcnt); // remaining #characters (<= 8). Precond for scalar loop.
404
// This does not destroy the "register cleared" state of Z_R0.
405
z_brl(ScalarShortcut); // Just a few characters
406
z_icmh(Z_R0, 5, 0, Rsrc); // Treat exactly 8 characters specially.
407
z_icmh(Z_R1, 5, 4, Rsrc);
408
z_icm(Z_R0, 5, 2, Rsrc);
409
z_icm(Z_R1, 5, 6, Rsrc);
410
z_stmg(Z_R0, Z_R1, 0, Rdst);
411
z_bru(AllDone);
412
bind(skipShortcut);
413
}
414
#endif
415
clear_reg(Z_R0); // make sure register is properly initialized.
416
417
if (VM_Version::has_VectorFacility()) {
418
const int min_vcnt = 32; // Minimum #characters required to use vector instructions.
419
// Otherwise just do nothing in vector mode.
420
// Must be multiple of vector register length (16 bytes = 128 bits).
421
const int log_min_vcnt = exact_log2(min_vcnt);
422
Label VectorLoop, VectorDone;
423
424
assert(VM_Version::has_DistinctOpnds(), "Assumption when has_VectorFacility()");
425
z_srak(Rix, Rcnt, log_min_vcnt); // calculate # vector loop iterations
426
z_brz(VectorDone); // skip if none
427
428
z_sllg(Z_R0, Rix, log_min_vcnt); // remember #chars that will be processed by vector loop
429
430
bind(VectorLoop);
431
z_vlm(Z_V20, Z_V21, 0, Rsrc); // get next 32 characters (single-byte)
432
add2reg(Rsrc, min_vcnt);
433
434
z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high)
435
z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low)
436
z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high)
437
z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low)
438
z_vstm(Z_V22, Z_V25, 0, Rdst); // store next 32 bytes
439
add2reg(Rdst, min_vcnt*2);
440
441
z_brct(Rix, VectorLoop);
442
443
bind(VectorDone);
444
}
445
446
const int min_cnt = 8; // Minimum #characters required to use unrolled scalar loop.
447
// Otherwise just do nothing in unrolled scalar mode.
448
// Must be multiple of 8.
449
{
450
const int log_min_cnt = exact_log2(min_cnt);
451
Label UnrolledLoop, UnrolledDone;
452
453
454
if (VM_Version::has_DistinctOpnds()) {
455
z_srk(Rix, Rcnt, Z_R0); // remaining # chars to process in unrolled loop
456
} else {
457
z_lr(Rix, Rcnt);
458
z_sr(Rix, Z_R0);
459
}
460
z_sra(Rix, log_min_cnt); // unrolled loop count
461
z_brz(UnrolledDone);
462
463
clear_reg(Z_R0);
464
clear_reg(Z_R1);
465
466
bind(UnrolledLoop);
467
z_icmh(Z_R0, 5, 0, Rsrc);
468
z_icmh(Z_R1, 5, 4, Rsrc);
469
z_icm(Z_R0, 5, 2, Rsrc);
470
z_icm(Z_R1, 5, 6, Rsrc);
471
add2reg(Rsrc, min_cnt);
472
473
z_stmg(Z_R0, Z_R1, 0, Rdst);
474
475
add2reg(Rdst, min_cnt*2);
476
z_brct(Rix, UnrolledLoop);
477
478
bind(UnrolledDone);
479
z_lgfr(Z_R0, Rcnt); // # chars left over after unrolled loop.
480
z_nilf(Z_R0, min_cnt-1);
481
z_brnz(ScalarShortcut); // if zero, there is nothing left to do for scalar loop.
482
// Rix == 0 in all cases.
483
z_sgfr(Z_R0, Rcnt); // negative # characters the ptrs have been advanced previously.
484
z_agr(Rdst, Z_R0); // restore ptr, double the element count for Rdst restore.
485
z_agr(Rdst, Z_R0);
486
z_agr(Rsrc, Z_R0); // restore ptr.
487
z_bru(AllDone);
488
}
489
490
{
491
bind(ScalarShortcut);
492
// Z_R0 must contain remaining # characters as 64-bit signed int here.
493
// register contents is preserved over scalar processing (for register fixup).
494
495
#if 0 // Sacrifice shortcuts for code compactness
496
{
497
Label ScalarDefault;
498
z_chi(Rcnt, 2);
499
z_brh(ScalarDefault);
500
z_llc(Z_R0, 0, Z_R0, Rsrc); // 6 bytes
501
z_sth(Z_R0, 0, Z_R0, Rdst); // 4 bytes
502
z_brl(AllDone);
503
z_llc(Z_R0, 1, Z_R0, Rsrc); // 6 bytes
504
z_sth(Z_R0, 2, Z_R0, Rdst); // 4 bytes
505
z_bru(AllDone);
506
bind(ScalarDefault);
507
}
508
#endif
509
510
Label CodeTable;
511
// Some comments on Rix calculation:
512
// - Rcnt is small, therefore no bits shifted out of low word (sll(g) instructions).
513
// - high word of both Rix and Rcnt may contain garbage
514
// - the final lngfr takes care of that garbage, extending the sign to high word
515
z_sllg(Rix, Z_R0, 2); // calculate 10*Rix = (4*Rix + Rix)*2
516
z_ar(Rix, Z_R0);
517
z_larl(Z_R1, CodeTable);
518
z_sll(Rix, 1);
519
z_lngfr(Rix, Rix); // ix range: [0..7], after inversion & mult: [-(7*12)..(0*12)].
520
z_bc(Assembler::bcondAlways, 0, Rix, Z_R1);
521
522
z_llc(Z_R1, 6, Z_R0, Rsrc); // 6 bytes
523
z_sth(Z_R1, 12, Z_R0, Rdst); // 4 bytes
524
525
z_llc(Z_R1, 5, Z_R0, Rsrc);
526
z_sth(Z_R1, 10, Z_R0, Rdst);
527
528
z_llc(Z_R1, 4, Z_R0, Rsrc);
529
z_sth(Z_R1, 8, Z_R0, Rdst);
530
531
z_llc(Z_R1, 3, Z_R0, Rsrc);
532
z_sth(Z_R1, 6, Z_R0, Rdst);
533
534
z_llc(Z_R1, 2, Z_R0, Rsrc);
535
z_sth(Z_R1, 4, Z_R0, Rdst);
536
537
z_llc(Z_R1, 1, Z_R0, Rsrc);
538
z_sth(Z_R1, 2, Z_R0, Rdst);
539
540
z_llc(Z_R1, 0, Z_R0, Rsrc);
541
z_sth(Z_R1, 0, Z_R0, Rdst);
542
bind(CodeTable);
543
544
z_chi(Rcnt, 8); // no fixup for small strings. Rdst, Rsrc were not modified.
545
z_brl(AllDone);
546
547
z_sgfr(Z_R0, Rcnt); // # characters the ptrs have been advanced previously.
548
z_agr(Rdst, Z_R0); // restore ptr, double the element count for Rdst restore.
549
z_agr(Rdst, Z_R0);
550
z_agr(Rsrc, Z_R0); // restore ptr.
551
}
552
bind(AllDone);
553
554
BLOCK_COMMENT("} string_inflate");
555
return offset() - block_start;
556
}
557
558
// Inflate byte[] to char[], length known at compile time.
559
// Restores: src, dst
560
// Kills: tmp, Z_R0, Z_R1.
561
// Note:
562
// len is signed int. Counts # characters, not bytes.
563
unsigned int C2_MacroAssembler::string_inflate_const(Register src, Register dst, Register tmp, int len) {
564
assert_different_registers(Z_R0, Z_R1, src, dst, tmp);
565
566
BLOCK_COMMENT("string_inflate_const {");
567
int block_start = offset();
568
569
Register Rix = tmp; // loop index
570
Register Rsrc = src; // addr(src array)
571
Register Rdst = dst; // addr(dst array)
572
Label ScalarShortcut, AllDone;
573
int nprocessed = 0;
574
int src_off = 0; // compensate for saved (optimized away) ptr advancement.
575
int dst_off = 0; // compensate for saved (optimized away) ptr advancement.
576
bool restore_inputs = false;
577
bool workreg_clear = false;
578
579
if ((len >= 32) && VM_Version::has_VectorFacility()) {
580
const int min_vcnt = 32; // Minimum #characters required to use vector instructions.
581
// Otherwise just do nothing in vector mode.
582
// Must be multiple of vector register length (16 bytes = 128 bits).
583
const int log_min_vcnt = exact_log2(min_vcnt);
584
const int iterations = (len - nprocessed) >> log_min_vcnt;
585
nprocessed += iterations << log_min_vcnt;
586
Label VectorLoop;
587
588
if (iterations == 1) {
589
z_vlm(Z_V20, Z_V21, 0+src_off, Rsrc); // get next 32 characters (single-byte)
590
z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high)
591
z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low)
592
z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high)
593
z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low)
594
z_vstm(Z_V22, Z_V25, 0+dst_off, Rdst); // store next 32 bytes
595
596
src_off += min_vcnt;
597
dst_off += min_vcnt*2;
598
} else {
599
restore_inputs = true;
600
601
z_lgfi(Rix, len>>log_min_vcnt);
602
bind(VectorLoop);
603
z_vlm(Z_V20, Z_V21, 0, Rsrc); // get next 32 characters (single-byte)
604
add2reg(Rsrc, min_vcnt);
605
606
z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high)
607
z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low)
608
z_vuplhb(Z_V24, Z_V21); // V4 <- (expand) V1(high)
609
z_vupllb(Z_V25, Z_V21); // V5 <- (expand) V1(low)
610
z_vstm(Z_V22, Z_V25, 0, Rdst); // store next 32 bytes
611
add2reg(Rdst, min_vcnt*2);
612
613
z_brct(Rix, VectorLoop);
614
}
615
}
616
617
if (((len-nprocessed) >= 16) && VM_Version::has_VectorFacility()) {
618
const int min_vcnt = 16; // Minimum #characters required to use vector instructions.
619
// Otherwise just do nothing in vector mode.
620
// Must be multiple of vector register length (16 bytes = 128 bits).
621
const int log_min_vcnt = exact_log2(min_vcnt);
622
const int iterations = (len - nprocessed) >> log_min_vcnt;
623
nprocessed += iterations << log_min_vcnt;
624
assert(iterations == 1, "must be!");
625
626
z_vl(Z_V20, 0+src_off, Z_R0, Rsrc); // get next 16 characters (single-byte)
627
z_vuplhb(Z_V22, Z_V20); // V2 <- (expand) V0(high)
628
z_vupllb(Z_V23, Z_V20); // V3 <- (expand) V0(low)
629
z_vstm(Z_V22, Z_V23, 0+dst_off, Rdst); // store next 32 bytes
630
631
src_off += min_vcnt;
632
dst_off += min_vcnt*2;
633
}
634
635
if ((len-nprocessed) > 8) {
636
const int min_cnt = 8; // Minimum #characters required to use unrolled scalar loop.
637
// Otherwise just do nothing in unrolled scalar mode.
638
// Must be multiple of 8.
639
const int log_min_cnt = exact_log2(min_cnt);
640
const int iterations = (len - nprocessed) >> log_min_cnt;
641
nprocessed += iterations << log_min_cnt;
642
643
//---< avoid loop overhead/ptr increment for small # iterations >---
644
if (iterations <= 2) {
645
clear_reg(Z_R0);
646
clear_reg(Z_R1);
647
workreg_clear = true;
648
649
z_icmh(Z_R0, 5, 0+src_off, Rsrc);
650
z_icmh(Z_R1, 5, 4+src_off, Rsrc);
651
z_icm(Z_R0, 5, 2+src_off, Rsrc);
652
z_icm(Z_R1, 5, 6+src_off, Rsrc);
653
z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);
654
655
src_off += min_cnt;
656
dst_off += min_cnt*2;
657
}
658
659
if (iterations == 2) {
660
z_icmh(Z_R0, 5, 0+src_off, Rsrc);
661
z_icmh(Z_R1, 5, 4+src_off, Rsrc);
662
z_icm(Z_R0, 5, 2+src_off, Rsrc);
663
z_icm(Z_R1, 5, 6+src_off, Rsrc);
664
z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);
665
666
src_off += min_cnt;
667
dst_off += min_cnt*2;
668
}
669
670
if (iterations > 2) {
671
Label UnrolledLoop;
672
restore_inputs = true;
673
674
clear_reg(Z_R0);
675
clear_reg(Z_R1);
676
workreg_clear = true;
677
678
z_lgfi(Rix, iterations);
679
bind(UnrolledLoop);
680
z_icmh(Z_R0, 5, 0, Rsrc);
681
z_icmh(Z_R1, 5, 4, Rsrc);
682
z_icm(Z_R0, 5, 2, Rsrc);
683
z_icm(Z_R1, 5, 6, Rsrc);
684
add2reg(Rsrc, min_cnt);
685
686
z_stmg(Z_R0, Z_R1, 0, Rdst);
687
add2reg(Rdst, min_cnt*2);
688
689
z_brct(Rix, UnrolledLoop);
690
}
691
}
692
693
if ((len-nprocessed) > 0) {
694
switch (len-nprocessed) {
695
case 8:
696
if (!workreg_clear) {
697
clear_reg(Z_R0);
698
clear_reg(Z_R1);
699
}
700
z_icmh(Z_R0, 5, 0+src_off, Rsrc);
701
z_icmh(Z_R1, 5, 4+src_off, Rsrc);
702
z_icm(Z_R0, 5, 2+src_off, Rsrc);
703
z_icm(Z_R1, 5, 6+src_off, Rsrc);
704
z_stmg(Z_R0, Z_R1, 0+dst_off, Rdst);
705
break;
706
case 7:
707
if (!workreg_clear) {
708
clear_reg(Z_R0);
709
clear_reg(Z_R1);
710
}
711
clear_reg(Rix);
712
z_icm(Z_R0, 5, 0+src_off, Rsrc);
713
z_icm(Z_R1, 5, 2+src_off, Rsrc);
714
z_icm(Rix, 5, 4+src_off, Rsrc);
715
z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);
716
z_llc(Z_R0, 6+src_off, Z_R0, Rsrc);
717
z_st(Rix, 8+dst_off, Z_R0, Rdst);
718
z_sth(Z_R0, 12+dst_off, Z_R0, Rdst);
719
break;
720
case 6:
721
if (!workreg_clear) {
722
clear_reg(Z_R0);
723
clear_reg(Z_R1);
724
}
725
clear_reg(Rix);
726
z_icm(Z_R0, 5, 0+src_off, Rsrc);
727
z_icm(Z_R1, 5, 2+src_off, Rsrc);
728
z_icm(Rix, 5, 4+src_off, Rsrc);
729
z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);
730
z_st(Rix, 8+dst_off, Z_R0, Rdst);
731
break;
732
case 5:
733
if (!workreg_clear) {
734
clear_reg(Z_R0);
735
clear_reg(Z_R1);
736
}
737
z_icm(Z_R0, 5, 0+src_off, Rsrc);
738
z_icm(Z_R1, 5, 2+src_off, Rsrc);
739
z_llc(Rix, 4+src_off, Z_R0, Rsrc);
740
z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);
741
z_sth(Rix, 8+dst_off, Z_R0, Rdst);
742
break;
743
case 4:
744
if (!workreg_clear) {
745
clear_reg(Z_R0);
746
clear_reg(Z_R1);
747
}
748
z_icm(Z_R0, 5, 0+src_off, Rsrc);
749
z_icm(Z_R1, 5, 2+src_off, Rsrc);
750
z_stm(Z_R0, Z_R1, 0+dst_off, Rdst);
751
break;
752
case 3:
753
if (!workreg_clear) {
754
clear_reg(Z_R0);
755
}
756
z_llc(Z_R1, 2+src_off, Z_R0, Rsrc);
757
z_icm(Z_R0, 5, 0+src_off, Rsrc);
758
z_sth(Z_R1, 4+dst_off, Z_R0, Rdst);
759
z_st(Z_R0, 0+dst_off, Rdst);
760
break;
761
case 2:
762
z_llc(Z_R0, 0+src_off, Z_R0, Rsrc);
763
z_llc(Z_R1, 1+src_off, Z_R0, Rsrc);
764
z_sth(Z_R0, 0+dst_off, Z_R0, Rdst);
765
z_sth(Z_R1, 2+dst_off, Z_R0, Rdst);
766
break;
767
case 1:
768
z_llc(Z_R0, 0+src_off, Z_R0, Rsrc);
769
z_sth(Z_R0, 0+dst_off, Z_R0, Rdst);
770
break;
771
default:
772
guarantee(false, "Impossible");
773
break;
774
}
775
src_off += len-nprocessed;
776
dst_off += (len-nprocessed)*2;
777
nprocessed = len;
778
}
779
780
//---< restore modified input registers >---
781
if ((nprocessed > 0) && restore_inputs) {
782
z_agfi(Rsrc, -(nprocessed-src_off));
783
if (nprocessed < 1000000000) { // avoid int overflow
784
z_agfi(Rdst, -(nprocessed*2-dst_off));
785
} else {
786
z_agfi(Rdst, -(nprocessed-dst_off));
787
z_agfi(Rdst, -nprocessed);
788
}
789
}
790
791
BLOCK_COMMENT("} string_inflate_const");
792
return offset() - block_start;
793
}
794
795
// Kills src.
796
unsigned int C2_MacroAssembler::has_negatives(Register result, Register src, Register cnt,
797
Register odd_reg, Register even_reg, Register tmp) {
798
int block_start = offset();
799
Label Lloop1, Lloop2, Lslow, Lnotfound, Ldone;
800
const Register addr = src, mask = tmp;
801
802
BLOCK_COMMENT("has_negatives {");
803
804
z_llgfr(Z_R1, cnt); // Number of bytes to read. (Must be a positive simm32.)
805
z_llilf(mask, 0x80808080);
806
z_lhi(result, 1); // Assume true.
807
// Last possible addr for fast loop.
808
z_lay(odd_reg, -16, Z_R1, src);
809
z_chi(cnt, 16);
810
z_brl(Lslow);
811
812
// ind1: index, even_reg: index increment, odd_reg: index limit
813
z_iihf(mask, 0x80808080);
814
z_lghi(even_reg, 16);
815
816
bind(Lloop1); // 16 bytes per iteration.
817
z_lg(Z_R0, Address(addr));
818
z_lg(Z_R1, Address(addr, 8));
819
z_ogr(Z_R0, Z_R1);
820
z_ngr(Z_R0, mask);
821
z_brne(Ldone); // If found return 1.
822
z_brxlg(addr, even_reg, Lloop1);
823
824
bind(Lslow);
825
z_aghi(odd_reg, 16-1); // Last possible addr for slow loop.
826
z_lghi(even_reg, 1);
827
z_cgr(addr, odd_reg);
828
z_brh(Lnotfound);
829
830
bind(Lloop2); // 1 byte per iteration.
831
z_cli(Address(addr), 0x80);
832
z_brnl(Ldone); // If found return 1.
833
z_brxlg(addr, even_reg, Lloop2);
834
835
bind(Lnotfound);
836
z_lhi(result, 0);
837
838
bind(Ldone);
839
840
BLOCK_COMMENT("} has_negatives");
841
842
return offset() - block_start;
843
}
844
845
// kill: cnt1, cnt2, odd_reg, even_reg; early clobber: result
846
unsigned int C2_MacroAssembler::string_compare(Register str1, Register str2,
847
Register cnt1, Register cnt2,
848
Register odd_reg, Register even_reg, Register result, int ae) {
849
int block_start = offset();
850
851
assert_different_registers(str1, cnt1, cnt2, odd_reg, even_reg, result);
852
assert_different_registers(str2, cnt1, cnt2, odd_reg, even_reg, result);
853
854
// If strings are equal up to min length, return the length difference.
855
const Register diff = result, // Pre-set result with length difference.
856
min = cnt1, // min number of bytes
857
tmp = cnt2;
858
859
// Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
860
// we interchange str1 and str2 in the UL case and negate the result.
861
// Like this, str1 is always latin1 encoded, except for the UU case.
862
// In addition, we need 0 (or sign which is 0) extend when using 64 bit register.
863
const bool used_as_LU = (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL);
864
865
BLOCK_COMMENT("string_compare {");
866
867
if (used_as_LU) {
868
z_srl(cnt2, 1);
869
}
870
871
// See if the lengths are different, and calculate min in cnt1.
872
// Save diff in case we need it for a tie-breaker.
873
874
// diff = cnt1 - cnt2
875
if (VM_Version::has_DistinctOpnds()) {
876
z_srk(diff, cnt1, cnt2);
877
} else {
878
z_lr(diff, cnt1);
879
z_sr(diff, cnt2);
880
}
881
if (str1 != str2) {
882
if (VM_Version::has_LoadStoreConditional()) {
883
z_locr(min, cnt2, Assembler::bcondHigh);
884
} else {
885
Label Lskip;
886
z_brl(Lskip); // min ok if cnt1 < cnt2
887
z_lr(min, cnt2); // min = cnt2
888
bind(Lskip);
889
}
890
}
891
892
if (ae == StrIntrinsicNode::UU) {
893
z_sra(diff, 1);
894
}
895
if (str1 != str2) {
896
Label Ldone;
897
if (used_as_LU) {
898
// Loop which searches the first difference character by character.
899
Label Lloop;
900
const Register ind1 = Z_R1,
901
ind2 = min;
902
int stride1 = 1, stride2 = 2; // See comment above.
903
904
// ind1: index, even_reg: index increment, odd_reg: index limit
905
z_llilf(ind1, (unsigned int)(-stride1));
906
z_lhi(even_reg, stride1);
907
add2reg(odd_reg, -stride1, min);
908
clear_reg(ind2); // kills min
909
910
bind(Lloop);
911
z_brxh(ind1, even_reg, Ldone);
912
z_llc(tmp, Address(str1, ind1));
913
z_llh(Z_R0, Address(str2, ind2));
914
z_ahi(ind2, stride2);
915
z_sr(tmp, Z_R0);
916
z_bre(Lloop);
917
918
z_lr(result, tmp);
919
920
} else {
921
// Use clcle in fast loop (only for same encoding).
922
z_lgr(Z_R0, str1);
923
z_lgr(even_reg, str2);
924
z_llgfr(Z_R1, min);
925
z_llgfr(odd_reg, min);
926
927
if (ae == StrIntrinsicNode::LL) {
928
compare_long_ext(Z_R0, even_reg, 0);
929
} else {
930
compare_long_uni(Z_R0, even_reg, 0);
931
}
932
z_bre(Ldone);
933
z_lgr(Z_R1, Z_R0);
934
if (ae == StrIntrinsicNode::LL) {
935
z_llc(Z_R0, Address(even_reg));
936
z_llc(result, Address(Z_R1));
937
} else {
938
z_llh(Z_R0, Address(even_reg));
939
z_llh(result, Address(Z_R1));
940
}
941
z_sr(result, Z_R0);
942
}
943
944
// Otherwise, return the difference between the first mismatched chars.
945
bind(Ldone);
946
}
947
948
if (ae == StrIntrinsicNode::UL) {
949
z_lcr(result, result); // Negate result (see note above).
950
}
951
952
BLOCK_COMMENT("} string_compare");
953
954
return offset() - block_start;
955
}
956
957
unsigned int C2_MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit,
958
Register odd_reg, Register even_reg, Register result, bool is_byte) {
959
int block_start = offset();
960
961
BLOCK_COMMENT("array_equals {");
962
963
assert_different_registers(ary1, limit, odd_reg, even_reg);
964
assert_different_registers(ary2, limit, odd_reg, even_reg);
965
966
Label Ldone, Ldone_true, Ldone_false, Lclcle, CLC_template;
967
int base_offset = 0;
968
969
if (ary1 != ary2) {
970
if (is_array_equ) {
971
base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
972
973
// Return true if the same array.
974
compareU64_and_branch(ary1, ary2, Assembler::bcondEqual, Ldone_true);
975
976
// Return false if one of them is NULL.
977
compareU64_and_branch(ary1, (intptr_t)0, Assembler::bcondEqual, Ldone_false);
978
compareU64_and_branch(ary2, (intptr_t)0, Assembler::bcondEqual, Ldone_false);
979
980
// Load the lengths of arrays.
981
z_llgf(odd_reg, Address(ary1, arrayOopDesc::length_offset_in_bytes()));
982
983
// Return false if the two arrays are not equal length.
984
z_c(odd_reg, Address(ary2, arrayOopDesc::length_offset_in_bytes()));
985
z_brne(Ldone_false);
986
987
// string len in bytes (right operand)
988
if (!is_byte) {
989
z_chi(odd_reg, 128);
990
z_sll(odd_reg, 1); // preserves flags
991
z_brh(Lclcle);
992
} else {
993
compareU32_and_branch(odd_reg, (intptr_t)256, Assembler::bcondHigh, Lclcle);
994
}
995
} else {
996
z_llgfr(odd_reg, limit); // Need to zero-extend prior to using the value.
997
compareU32_and_branch(limit, (intptr_t)256, Assembler::bcondHigh, Lclcle);
998
}
999
1000
1001
// Use clc instruction for up to 256 bytes.
1002
{
1003
Register str1_reg = ary1,
1004
str2_reg = ary2;
1005
if (is_array_equ) {
1006
str1_reg = Z_R1;
1007
str2_reg = even_reg;
1008
add2reg(str1_reg, base_offset, ary1); // string addr (left operand)
1009
add2reg(str2_reg, base_offset, ary2); // string addr (right operand)
1010
}
1011
z_ahi(odd_reg, -1); // Clc uses decremented limit. Also compare result to 0.
1012
z_brl(Ldone_true);
1013
// Note: We could jump to the template if equal.
1014
1015
assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware");
1016
z_exrl(odd_reg, CLC_template);
1017
z_bre(Ldone_true);
1018
// fall through
1019
1020
bind(Ldone_false);
1021
clear_reg(result);
1022
z_bru(Ldone);
1023
1024
bind(CLC_template);
1025
z_clc(0, 0, str1_reg, 0, str2_reg);
1026
}
1027
1028
// Use clcle instruction.
1029
{
1030
bind(Lclcle);
1031
add2reg(even_reg, base_offset, ary2); // string addr (right operand)
1032
add2reg(Z_R0, base_offset, ary1); // string addr (left operand)
1033
1034
z_lgr(Z_R1, odd_reg); // string len in bytes (left operand)
1035
if (is_byte) {
1036
compare_long_ext(Z_R0, even_reg, 0);
1037
} else {
1038
compare_long_uni(Z_R0, even_reg, 0);
1039
}
1040
z_lghi(result, 0); // Preserve flags.
1041
z_brne(Ldone);
1042
}
1043
}
1044
// fall through
1045
1046
bind(Ldone_true);
1047
z_lghi(result, 1); // All characters are equal.
1048
bind(Ldone);
1049
1050
BLOCK_COMMENT("} array_equals");
1051
1052
return offset() - block_start;
1053
}
1054
1055
// kill: haycnt, needlecnt, odd_reg, even_reg; early clobber: result
1056
unsigned int C2_MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
1057
Register needle, Register needlecnt, int needlecntval,
1058
Register odd_reg, Register even_reg, int ae) {
1059
int block_start = offset();
1060
1061
// Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
1062
assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
1063
const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
1064
const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
1065
Label L_needle1, L_Found, L_NotFound;
1066
1067
BLOCK_COMMENT("string_indexof {");
1068
1069
if (needle == haystack) {
1070
z_lhi(result, 0);
1071
} else {
1072
1073
// Load first character of needle (R0 used by search_string instructions).
1074
if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); }
1075
1076
// Compute last haystack addr to use if no match gets found.
1077
if (needlecnt != noreg) { // variable needlecnt
1078
z_ahi(needlecnt, -1); // Remaining characters after first one.
1079
z_sr(haycnt, needlecnt); // Compute index succeeding last element to compare.
1080
if (n_csize == 2) { z_sll(needlecnt, 1); } // In bytes.
1081
} else { // constant needlecnt
1082
assert((needlecntval & 0x7fff) == needlecntval, "must be positive simm16 immediate");
1083
// Compute index succeeding last element to compare.
1084
if (needlecntval != 1) { z_ahi(haycnt, 1 - needlecntval); }
1085
}
1086
1087
z_llgfr(haycnt, haycnt); // Clear high half.
1088
z_lgr(result, haystack); // Final result will be computed from needle start pointer.
1089
if (h_csize == 2) { z_sll(haycnt, 1); } // Scale to number of bytes.
1090
z_agr(haycnt, haystack); // Point to address succeeding last element (haystack+scale*(haycnt-needlecnt+1)).
1091
1092
if (h_csize != n_csize) {
1093
assert(ae == StrIntrinsicNode::UL, "Invalid encoding");
1094
1095
if (needlecnt != noreg || needlecntval != 1) {
1096
if (needlecnt != noreg) {
1097
compare32_and_branch(needlecnt, (intptr_t)0, Assembler::bcondEqual, L_needle1);
1098
}
1099
1100
// Main Loop: UL version (now we have at least 2 characters).
1101
Label L_OuterLoop, L_InnerLoop, L_Skip;
1102
bind(L_OuterLoop); // Search for 1st 2 characters.
1103
z_lgr(Z_R1, haycnt);
1104
MacroAssembler::search_string_uni(Z_R1, result);
1105
z_brc(Assembler::bcondNotFound, L_NotFound);
1106
z_lgr(result, Z_R1);
1107
1108
z_lghi(Z_R1, n_csize);
1109
z_lghi(even_reg, h_csize);
1110
bind(L_InnerLoop);
1111
z_llgc(odd_reg, Address(needle, Z_R1));
1112
z_ch(odd_reg, Address(result, even_reg));
1113
z_brne(L_Skip);
1114
if (needlecnt != noreg) { z_cr(Z_R1, needlecnt); } else { z_chi(Z_R1, needlecntval - 1); }
1115
z_brnl(L_Found);
1116
z_aghi(Z_R1, n_csize);
1117
z_aghi(even_reg, h_csize);
1118
z_bru(L_InnerLoop);
1119
1120
bind(L_Skip);
1121
z_aghi(result, h_csize); // This is the new address we want to use for comparing.
1122
z_bru(L_OuterLoop);
1123
}
1124
1125
} else {
1126
const intptr_t needle_bytes = (n_csize == 2) ? ((needlecntval - 1) << 1) : (needlecntval - 1);
1127
Label L_clcle;
1128
1129
if (needlecnt != noreg || (needlecntval != 1 && needle_bytes <= 256)) {
1130
if (needlecnt != noreg) {
1131
compare32_and_branch(needlecnt, 256, Assembler::bcondHigh, L_clcle);
1132
z_ahi(needlecnt, -1); // remaining bytes -1 (for CLC)
1133
z_brl(L_needle1);
1134
}
1135
1136
// Main Loop: clc version (now we have at least 2 characters).
1137
Label L_OuterLoop, CLC_template;
1138
bind(L_OuterLoop); // Search for 1st 2 characters.
1139
z_lgr(Z_R1, haycnt);
1140
if (h_csize == 1) {
1141
MacroAssembler::search_string(Z_R1, result);
1142
} else {
1143
MacroAssembler::search_string_uni(Z_R1, result);
1144
}
1145
z_brc(Assembler::bcondNotFound, L_NotFound);
1146
z_lgr(result, Z_R1);
1147
1148
if (needlecnt != noreg) {
1149
assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware");
1150
z_exrl(needlecnt, CLC_template);
1151
} else {
1152
z_clc(h_csize, needle_bytes -1, Z_R1, n_csize, needle);
1153
}
1154
z_bre(L_Found);
1155
z_aghi(result, h_csize); // This is the new address we want to use for comparing.
1156
z_bru(L_OuterLoop);
1157
1158
if (needlecnt != noreg) {
1159
bind(CLC_template);
1160
z_clc(h_csize, 0, Z_R1, n_csize, needle);
1161
}
1162
}
1163
1164
if (needlecnt != noreg || needle_bytes > 256) {
1165
bind(L_clcle);
1166
1167
// Main Loop: clcle version (now we have at least 256 bytes).
1168
Label L_OuterLoop, CLC_template;
1169
bind(L_OuterLoop); // Search for 1st 2 characters.
1170
z_lgr(Z_R1, haycnt);
1171
if (h_csize == 1) {
1172
MacroAssembler::search_string(Z_R1, result);
1173
} else {
1174
MacroAssembler::search_string_uni(Z_R1, result);
1175
}
1176
z_brc(Assembler::bcondNotFound, L_NotFound);
1177
1178
add2reg(Z_R0, n_csize, needle);
1179
add2reg(even_reg, h_csize, Z_R1);
1180
z_lgr(result, Z_R1);
1181
if (needlecnt != noreg) {
1182
z_llgfr(Z_R1, needlecnt); // needle len in bytes (left operand)
1183
z_llgfr(odd_reg, needlecnt);
1184
} else {
1185
load_const_optimized(Z_R1, needle_bytes);
1186
if (Immediate::is_simm16(needle_bytes)) { z_lghi(odd_reg, needle_bytes); } else { z_lgr(odd_reg, Z_R1); }
1187
}
1188
if (h_csize == 1) {
1189
compare_long_ext(Z_R0, even_reg, 0);
1190
} else {
1191
compare_long_uni(Z_R0, even_reg, 0);
1192
}
1193
z_bre(L_Found);
1194
1195
if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); } // Reload.
1196
z_aghi(result, h_csize); // This is the new address we want to use for comparing.
1197
z_bru(L_OuterLoop);
1198
}
1199
}
1200
1201
if (needlecnt != noreg || needlecntval == 1) {
1202
bind(L_needle1);
1203
1204
// Single needle character version.
1205
if (h_csize == 1) {
1206
MacroAssembler::search_string(haycnt, result);
1207
} else {
1208
MacroAssembler::search_string_uni(haycnt, result);
1209
}
1210
z_lgr(result, haycnt);
1211
z_brc(Assembler::bcondFound, L_Found);
1212
}
1213
1214
bind(L_NotFound);
1215
add2reg(result, -1, haystack); // Return -1.
1216
1217
bind(L_Found); // Return index (or -1 in fallthrough case).
1218
z_sgr(result, haystack);
1219
if (h_csize == 2) { z_srag(result, result, exact_log2(sizeof(jchar))); }
1220
}
1221
BLOCK_COMMENT("} string_indexof");
1222
1223
return offset() - block_start;
1224
}
1225
1226
// early clobber: result
1227
unsigned int C2_MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
1228
Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte) {
1229
int block_start = offset();
1230
1231
BLOCK_COMMENT("string_indexof_char {");
1232
1233
if (needle == haystack) {
1234
z_lhi(result, 0);
1235
} else {
1236
1237
Label Ldone;
1238
1239
z_llgfr(odd_reg, haycnt); // Preset loop ctr/searchrange end.
1240
if (needle == noreg) {
1241
load_const_optimized(Z_R0, (unsigned long)needleChar);
1242
} else {
1243
if (is_byte) {
1244
z_llgcr(Z_R0, needle); // First (and only) needle char.
1245
} else {
1246
z_llghr(Z_R0, needle); // First (and only) needle char.
1247
}
1248
}
1249
1250
if (!is_byte) {
1251
z_agr(odd_reg, odd_reg); // Calc #bytes to be processed with SRSTU.
1252
}
1253
1254
z_lgr(even_reg, haystack); // haystack addr
1255
z_agr(odd_reg, haystack); // First char after range end.
1256
z_lghi(result, -1);
1257
1258
if (is_byte) {
1259
MacroAssembler::search_string(odd_reg, even_reg);
1260
} else {
1261
MacroAssembler::search_string_uni(odd_reg, even_reg);
1262
}
1263
z_brc(Assembler::bcondNotFound, Ldone);
1264
if (is_byte) {
1265
if (VM_Version::has_DistinctOpnds()) {
1266
z_sgrk(result, odd_reg, haystack);
1267
} else {
1268
z_sgr(odd_reg, haystack);
1269
z_lgr(result, odd_reg);
1270
}
1271
} else {
1272
z_slgr(odd_reg, haystack);
1273
z_srlg(result, odd_reg, exact_log2(sizeof(jchar)));
1274
}
1275
1276
bind(Ldone);
1277
}
1278
BLOCK_COMMENT("} string_indexof_char");
1279
1280
return offset() - block_start;
1281
}
1282
1283
1284