Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/hotspot/cpu/s390/macroAssembler_s390.cpp
40930 views
1
/*
2
* Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
3
* Copyright (c) 2016, 2019 SAP SE. All rights reserved.
4
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5
*
6
* This code is free software; you can redistribute it and/or modify it
7
* under the terms of the GNU General Public License version 2 only, as
8
* published by the Free Software Foundation.
9
*
10
* This code is distributed in the hope that it will be useful, but WITHOUT
11
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13
* version 2 for more details (a copy is included in the LICENSE file that
14
* accompanied this code).
15
*
16
* You should have received a copy of the GNU General Public License version
17
* 2 along with this work; if not, write to the Free Software Foundation,
18
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19
*
20
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21
* or visit www.oracle.com if you need additional information or have any
22
* questions.
23
*
24
*/
25
26
#include "precompiled.hpp"
27
#include "asm/codeBuffer.hpp"
28
#include "asm/macroAssembler.inline.hpp"
29
#include "compiler/disassembler.hpp"
30
#include "gc/shared/barrierSet.hpp"
31
#include "gc/shared/barrierSetAssembler.hpp"
32
#include "gc/shared/collectedHeap.inline.hpp"
33
#include "interpreter/interpreter.hpp"
34
#include "gc/shared/cardTableBarrierSet.hpp"
35
#include "memory/resourceArea.hpp"
36
#include "memory/universe.hpp"
37
#include "oops/accessDecorators.hpp"
38
#include "oops/compressedOops.inline.hpp"
39
#include "oops/klass.inline.hpp"
40
#include "prims/methodHandles.hpp"
41
#include "registerSaver_s390.hpp"
42
#include "runtime/biasedLocking.hpp"
43
#include "runtime/icache.hpp"
44
#include "runtime/interfaceSupport.inline.hpp"
45
#include "runtime/objectMonitor.hpp"
46
#include "runtime/os.hpp"
47
#include "runtime/safepoint.hpp"
48
#include "runtime/safepointMechanism.hpp"
49
#include "runtime/sharedRuntime.hpp"
50
#include "runtime/stubRoutines.hpp"
51
#include "utilities/events.hpp"
52
#include "utilities/macros.hpp"
53
#include "utilities/powerOfTwo.hpp"
54
55
#include <ucontext.h>
56
57
#define BLOCK_COMMENT(str) block_comment(str)
58
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
59
60
// Move 32-bit register if destination and source are different.
61
void MacroAssembler::lr_if_needed(Register rd, Register rs) {
62
if (rs != rd) { z_lr(rd, rs); }
63
}
64
65
// Move register if destination and source are different.
66
void MacroAssembler::lgr_if_needed(Register rd, Register rs) {
67
if (rs != rd) { z_lgr(rd, rs); }
68
}
69
70
// Zero-extend 32-bit register into 64-bit register if destination and source are different.
71
void MacroAssembler::llgfr_if_needed(Register rd, Register rs) {
72
if (rs != rd) { z_llgfr(rd, rs); }
73
}
74
75
// Move float register if destination and source are different.
76
void MacroAssembler::ldr_if_needed(FloatRegister rd, FloatRegister rs) {
77
if (rs != rd) { z_ldr(rd, rs); }
78
}
79
80
// Move integer register if destination and source are different.
81
// It is assumed that shorter-than-int types are already
82
// appropriately sign-extended.
83
void MacroAssembler::move_reg_if_needed(Register dst, BasicType dst_type, Register src,
84
BasicType src_type) {
85
assert((dst_type != T_FLOAT) && (dst_type != T_DOUBLE), "use move_freg for float types");
86
assert((src_type != T_FLOAT) && (src_type != T_DOUBLE), "use move_freg for float types");
87
88
if (dst_type == src_type) {
89
lgr_if_needed(dst, src); // Just move all 64 bits.
90
return;
91
}
92
93
switch (dst_type) {
94
// Do not support these types for now.
95
// case T_BOOLEAN:
96
case T_BYTE: // signed byte
97
switch (src_type) {
98
case T_INT:
99
z_lgbr(dst, src);
100
break;
101
default:
102
ShouldNotReachHere();
103
}
104
return;
105
106
case T_CHAR:
107
case T_SHORT:
108
switch (src_type) {
109
case T_INT:
110
if (dst_type == T_CHAR) {
111
z_llghr(dst, src);
112
} else {
113
z_lghr(dst, src);
114
}
115
break;
116
default:
117
ShouldNotReachHere();
118
}
119
return;
120
121
case T_INT:
122
switch (src_type) {
123
case T_BOOLEAN:
124
case T_BYTE:
125
case T_CHAR:
126
case T_SHORT:
127
case T_INT:
128
case T_LONG:
129
case T_OBJECT:
130
case T_ARRAY:
131
case T_VOID:
132
case T_ADDRESS:
133
lr_if_needed(dst, src);
134
// llgfr_if_needed(dst, src); // zero-extend (in case we need to find a bug).
135
return;
136
137
default:
138
assert(false, "non-integer src type");
139
return;
140
}
141
case T_LONG:
142
switch (src_type) {
143
case T_BOOLEAN:
144
case T_BYTE:
145
case T_CHAR:
146
case T_SHORT:
147
case T_INT:
148
z_lgfr(dst, src); // sign extension
149
return;
150
151
case T_LONG:
152
case T_OBJECT:
153
case T_ARRAY:
154
case T_VOID:
155
case T_ADDRESS:
156
lgr_if_needed(dst, src);
157
return;
158
159
default:
160
assert(false, "non-integer src type");
161
return;
162
}
163
return;
164
case T_OBJECT:
165
case T_ARRAY:
166
case T_VOID:
167
case T_ADDRESS:
168
switch (src_type) {
169
// These types don't make sense to be converted to pointers:
170
// case T_BOOLEAN:
171
// case T_BYTE:
172
// case T_CHAR:
173
// case T_SHORT:
174
175
case T_INT:
176
z_llgfr(dst, src); // zero extension
177
return;
178
179
case T_LONG:
180
case T_OBJECT:
181
case T_ARRAY:
182
case T_VOID:
183
case T_ADDRESS:
184
lgr_if_needed(dst, src);
185
return;
186
187
default:
188
assert(false, "non-integer src type");
189
return;
190
}
191
return;
192
default:
193
assert(false, "non-integer dst type");
194
return;
195
}
196
}
197
198
// Move float register if destination and source are different.
199
void MacroAssembler::move_freg_if_needed(FloatRegister dst, BasicType dst_type,
200
FloatRegister src, BasicType src_type) {
201
assert((dst_type == T_FLOAT) || (dst_type == T_DOUBLE), "use move_reg for int types");
202
assert((src_type == T_FLOAT) || (src_type == T_DOUBLE), "use move_reg for int types");
203
if (dst_type == src_type) {
204
ldr_if_needed(dst, src); // Just move all 64 bits.
205
} else {
206
switch (dst_type) {
207
case T_FLOAT:
208
assert(src_type == T_DOUBLE, "invalid float type combination");
209
z_ledbr(dst, src);
210
return;
211
case T_DOUBLE:
212
assert(src_type == T_FLOAT, "invalid float type combination");
213
z_ldebr(dst, src);
214
return;
215
default:
216
assert(false, "non-float dst type");
217
return;
218
}
219
}
220
}
221
222
// Optimized emitter for reg to mem operations.
223
// Uses modern instructions if running on modern hardware, classic instructions
224
// otherwise. Prefers (usually shorter) classic instructions if applicable.
225
// Data register (reg) cannot be used as work register.
226
//
227
// Don't rely on register locking, instead pass a scratch register (Z_R0 by default).
228
// CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs!
229
void MacroAssembler::freg2mem_opt(FloatRegister reg,
230
int64_t disp,
231
Register index,
232
Register base,
233
void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
234
void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
235
Register scratch) {
236
index = (index == noreg) ? Z_R0 : index;
237
if (Displacement::is_shortDisp(disp)) {
238
(this->*classic)(reg, disp, index, base);
239
} else {
240
if (Displacement::is_validDisp(disp)) {
241
(this->*modern)(reg, disp, index, base);
242
} else {
243
if (scratch != Z_R0 && scratch != Z_R1) {
244
(this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
245
} else {
246
if (scratch != Z_R0) { // scratch == Z_R1
247
if ((scratch == index) || (index == base)) {
248
(this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
249
} else {
250
add2reg(scratch, disp, base);
251
(this->*classic)(reg, 0, index, scratch);
252
if (base == scratch) {
253
add2reg(base, -disp); // Restore base.
254
}
255
}
256
} else { // scratch == Z_R0
257
z_lgr(scratch, base);
258
add2reg(base, disp);
259
(this->*classic)(reg, 0, index, base);
260
z_lgr(base, scratch); // Restore base.
261
}
262
}
263
}
264
}
265
}
266
267
void MacroAssembler::freg2mem_opt(FloatRegister reg, const Address &a, bool is_double) {
268
if (is_double) {
269
freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stdy), CLASSIC_FFUN(z_std));
270
} else {
271
freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stey), CLASSIC_FFUN(z_ste));
272
}
273
}
274
275
// Optimized emitter for mem to reg operations.
276
// Uses modern instructions if running on modern hardware, classic instructions
277
// otherwise. Prefers (usually shorter) classic instructions if applicable.
278
// data register (reg) cannot be used as work register.
279
//
280
// Don't rely on register locking, instead pass a scratch register (Z_R0 by default).
281
// CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs!
282
void MacroAssembler::mem2freg_opt(FloatRegister reg,
283
int64_t disp,
284
Register index,
285
Register base,
286
void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register),
287
void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register),
288
Register scratch) {
289
index = (index == noreg) ? Z_R0 : index;
290
if (Displacement::is_shortDisp(disp)) {
291
(this->*classic)(reg, disp, index, base);
292
} else {
293
if (Displacement::is_validDisp(disp)) {
294
(this->*modern)(reg, disp, index, base);
295
} else {
296
if (scratch != Z_R0 && scratch != Z_R1) {
297
(this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
298
} else {
299
if (scratch != Z_R0) { // scratch == Z_R1
300
if ((scratch == index) || (index == base)) {
301
(this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
302
} else {
303
add2reg(scratch, disp, base);
304
(this->*classic)(reg, 0, index, scratch);
305
if (base == scratch) {
306
add2reg(base, -disp); // Restore base.
307
}
308
}
309
} else { // scratch == Z_R0
310
z_lgr(scratch, base);
311
add2reg(base, disp);
312
(this->*classic)(reg, 0, index, base);
313
z_lgr(base, scratch); // Restore base.
314
}
315
}
316
}
317
}
318
}
319
320
void MacroAssembler::mem2freg_opt(FloatRegister reg, const Address &a, bool is_double) {
321
if (is_double) {
322
mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ldy), CLASSIC_FFUN(z_ld));
323
} else {
324
mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ley), CLASSIC_FFUN(z_le));
325
}
326
}
327
328
// Optimized emitter for reg to mem operations.
329
// Uses modern instructions if running on modern hardware, classic instructions
330
// otherwise. Prefers (usually shorter) classic instructions if applicable.
331
// Data register (reg) cannot be used as work register.
332
//
333
// Don't rely on register locking, instead pass a scratch register
334
// (Z_R0 by default)
335
// CAUTION! passing registers >= Z_R2 may produce bad results on old CPUs!
336
void MacroAssembler::reg2mem_opt(Register reg,
337
int64_t disp,
338
Register index,
339
Register base,
340
void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
341
void (MacroAssembler::*classic)(Register, int64_t, Register, Register),
342
Register scratch) {
343
index = (index == noreg) ? Z_R0 : index;
344
if (Displacement::is_shortDisp(disp)) {
345
(this->*classic)(reg, disp, index, base);
346
} else {
347
if (Displacement::is_validDisp(disp)) {
348
(this->*modern)(reg, disp, index, base);
349
} else {
350
if (scratch != Z_R0 && scratch != Z_R1) {
351
(this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
352
} else {
353
if (scratch != Z_R0) { // scratch == Z_R1
354
if ((scratch == index) || (index == base)) {
355
(this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
356
} else {
357
add2reg(scratch, disp, base);
358
(this->*classic)(reg, 0, index, scratch);
359
if (base == scratch) {
360
add2reg(base, -disp); // Restore base.
361
}
362
}
363
} else { // scratch == Z_R0
364
if ((scratch == reg) || (scratch == base) || (reg == base)) {
365
(this->*modern)(reg, disp, index, base); // Will fail with disp out of range.
366
} else {
367
z_lgr(scratch, base);
368
add2reg(base, disp);
369
(this->*classic)(reg, 0, index, base);
370
z_lgr(base, scratch); // Restore base.
371
}
372
}
373
}
374
}
375
}
376
}
377
378
int MacroAssembler::reg2mem_opt(Register reg, const Address &a, bool is_double) {
379
int store_offset = offset();
380
if (is_double) {
381
reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_stg), CLASSIC_IFUN(z_stg));
382
} else {
383
reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_sty), CLASSIC_IFUN(z_st));
384
}
385
return store_offset;
386
}
387
388
// Optimized emitter for mem to reg operations.
389
// Uses modern instructions if running on modern hardware, classic instructions
390
// otherwise. Prefers (usually shorter) classic instructions if applicable.
391
// Data register (reg) will be used as work register where possible.
392
void MacroAssembler::mem2reg_opt(Register reg,
393
int64_t disp,
394
Register index,
395
Register base,
396
void (MacroAssembler::*modern) (Register, int64_t, Register, Register),
397
void (MacroAssembler::*classic)(Register, int64_t, Register, Register)) {
398
index = (index == noreg) ? Z_R0 : index;
399
if (Displacement::is_shortDisp(disp)) {
400
(this->*classic)(reg, disp, index, base);
401
} else {
402
if (Displacement::is_validDisp(disp)) {
403
(this->*modern)(reg, disp, index, base);
404
} else {
405
if ((reg == index) && (reg == base)) {
406
z_sllg(reg, reg, 1);
407
add2reg(reg, disp);
408
(this->*classic)(reg, 0, noreg, reg);
409
} else if ((reg == index) && (reg != Z_R0)) {
410
add2reg(reg, disp);
411
(this->*classic)(reg, 0, reg, base);
412
} else if (reg == base) {
413
add2reg(reg, disp);
414
(this->*classic)(reg, 0, index, reg);
415
} else if (reg != Z_R0) {
416
add2reg(reg, disp, base);
417
(this->*classic)(reg, 0, index, reg);
418
} else { // reg == Z_R0 && reg != base here
419
add2reg(base, disp);
420
(this->*classic)(reg, 0, index, base);
421
add2reg(base, -disp);
422
}
423
}
424
}
425
}
426
427
void MacroAssembler::mem2reg_opt(Register reg, const Address &a, bool is_double) {
428
if (is_double) {
429
z_lg(reg, a);
430
} else {
431
mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_ly), CLASSIC_IFUN(z_l));
432
}
433
}
434
435
void MacroAssembler::mem2reg_signed_opt(Register reg, const Address &a) {
436
mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_lgf), CLASSIC_IFUN(z_lgf));
437
}
438
439
void MacroAssembler::and_imm(Register r, long mask,
440
Register tmp /* = Z_R0 */,
441
bool wide /* = false */) {
442
assert(wide || Immediate::is_simm32(mask), "mask value too large");
443
444
if (!wide) {
445
z_nilf(r, mask);
446
return;
447
}
448
449
assert(r != tmp, " need a different temporary register !");
450
load_const_optimized(tmp, mask);
451
z_ngr(r, tmp);
452
}
453
454
// Calculate the 1's complement.
455
// Note: The condition code is neither preserved nor correctly set by this code!!!
456
// Note: (wide == false) does not protect the high order half of the target register
457
// from alteration. It only serves as optimization hint for 32-bit results.
458
void MacroAssembler::not_(Register r1, Register r2, bool wide) {
459
460
if ((r2 == noreg) || (r2 == r1)) { // Calc 1's complement in place.
461
z_xilf(r1, -1);
462
if (wide) {
463
z_xihf(r1, -1);
464
}
465
} else { // Distinct src and dst registers.
466
load_const_optimized(r1, -1);
467
z_xgr(r1, r2);
468
}
469
}
470
471
unsigned long MacroAssembler::create_mask(int lBitPos, int rBitPos) {
472
assert(lBitPos >= 0, "zero is leftmost bit position");
473
assert(rBitPos <= 63, "63 is rightmost bit position");
474
assert(lBitPos <= rBitPos, "inverted selection interval");
475
return (lBitPos == 0 ? (unsigned long)(-1L) : ((1UL<<(63-lBitPos+1))-1)) & (~((1UL<<(63-rBitPos))-1));
476
}
477
478
// Helper function for the "Rotate_then_<logicalOP>" emitters.
479
// Rotate src, then mask register contents such that only bits in range survive.
480
// For oneBits == false, all bits not in range are set to 0. Useful for deleting all bits outside range.
481
// For oneBits == true, all bits not in range are set to 1. Useful for preserving all bits outside range.
482
// The caller must ensure that the selected range only contains bits with defined value.
483
void MacroAssembler::rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos,
484
int nRotate, bool src32bit, bool dst32bit, bool oneBits) {
485
assert(!(dst32bit && lBitPos < 32), "selection interval out of range for int destination");
486
bool sll4rll = (nRotate >= 0) && (nRotate <= (63-rBitPos)); // Substitute SLL(G) for RLL(G).
487
bool srl4rll = (nRotate < 0) && (-nRotate <= lBitPos); // Substitute SRL(G) for RLL(G).
488
// Pre-determine which parts of dst will be zero after shift/rotate.
489
bool llZero = sll4rll && (nRotate >= 16);
490
bool lhZero = (sll4rll && (nRotate >= 32)) || (srl4rll && (nRotate <= -48));
491
bool lfZero = llZero && lhZero;
492
bool hlZero = (sll4rll && (nRotate >= 48)) || (srl4rll && (nRotate <= -32));
493
bool hhZero = (srl4rll && (nRotate <= -16));
494
bool hfZero = hlZero && hhZero;
495
496
// rotate then mask src operand.
497
// if oneBits == true, all bits outside selected range are 1s.
498
// if oneBits == false, all bits outside selected range are 0s.
499
if (src32bit) { // There might be garbage in the upper 32 bits which will get masked away.
500
if (dst32bit) {
501
z_rll(dst, src, nRotate); // Copy and rotate, upper half of reg remains undisturbed.
502
} else {
503
if (sll4rll) { z_sllg(dst, src, nRotate); }
504
else if (srl4rll) { z_srlg(dst, src, -nRotate); }
505
else { z_rllg(dst, src, nRotate); }
506
}
507
} else {
508
if (sll4rll) { z_sllg(dst, src, nRotate); }
509
else if (srl4rll) { z_srlg(dst, src, -nRotate); }
510
else { z_rllg(dst, src, nRotate); }
511
}
512
513
unsigned long range_mask = create_mask(lBitPos, rBitPos);
514
unsigned int range_mask_h = (unsigned int)(range_mask >> 32);
515
unsigned int range_mask_l = (unsigned int)range_mask;
516
unsigned short range_mask_hh = (unsigned short)(range_mask >> 48);
517
unsigned short range_mask_hl = (unsigned short)(range_mask >> 32);
518
unsigned short range_mask_lh = (unsigned short)(range_mask >> 16);
519
unsigned short range_mask_ll = (unsigned short)range_mask;
520
// Works for z9 and newer H/W.
521
if (oneBits) {
522
if ((~range_mask_l) != 0) { z_oilf(dst, ~range_mask_l); } // All bits outside range become 1s.
523
if (((~range_mask_h) != 0) && !dst32bit) { z_oihf(dst, ~range_mask_h); }
524
} else {
525
// All bits outside range become 0s
526
if (((~range_mask_l) != 0) && !lfZero) {
527
z_nilf(dst, range_mask_l);
528
}
529
if (((~range_mask_h) != 0) && !dst32bit && !hfZero) {
530
z_nihf(dst, range_mask_h);
531
}
532
}
533
}
534
535
// Rotate src, then insert selected range from rotated src into dst.
536
// Clear dst before, if requested.
537
void MacroAssembler::rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos,
538
int nRotate, bool clear_dst) {
539
// This version does not depend on src being zero-extended int2long.
540
nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
541
z_risbg(dst, src, lBitPos, rBitPos, nRotate, clear_dst); // Rotate, then insert selected, clear the rest.
542
}
543
544
// Rotate src, then and selected range from rotated src into dst.
545
// Set condition code only if so requested. Otherwise it is unpredictable.
546
// See performance note in macroAssembler_s390.hpp for important information.
547
void MacroAssembler::rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos,
548
int nRotate, bool test_only) {
549
guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
550
// This version does not depend on src being zero-extended int2long.
551
nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
552
z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
553
}
554
555
// Rotate src, then or selected range from rotated src into dst.
556
// Set condition code only if so requested. Otherwise it is unpredictable.
557
// See performance note in macroAssembler_s390.hpp for important information.
558
void MacroAssembler::rotate_then_or(Register dst, Register src, int lBitPos, int rBitPos,
559
int nRotate, bool test_only) {
560
guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
561
// This version does not depend on src being zero-extended int2long.
562
nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
563
z_rosbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
564
}
565
566
// Rotate src, then xor selected range from rotated src into dst.
567
// Set condition code only if so requested. Otherwise it is unpredictable.
568
// See performance note in macroAssembler_s390.hpp for important information.
569
void MacroAssembler::rotate_then_xor(Register dst, Register src, int lBitPos, int rBitPos,
570
int nRotate, bool test_only) {
571
guarantee(!test_only, "Emitter not fit for test_only instruction variant.");
572
// This version does not depend on src being zero-extended int2long.
573
nRotate &= 0x003f; // For risbg, pretend it's an unsigned value.
574
z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected.
575
}
576
577
void MacroAssembler::add64(Register r1, RegisterOrConstant inc) {
578
if (inc.is_register()) {
579
z_agr(r1, inc.as_register());
580
} else { // constant
581
intptr_t imm = inc.as_constant();
582
add2reg(r1, imm);
583
}
584
}
585
// Helper function to multiply the 64bit contents of a register by a 16bit constant.
586
// The optimization tries to avoid the mghi instruction, since it uses the FPU for
587
// calculation and is thus rather slow.
588
//
589
// There is no handling for special cases, e.g. cval==0 or cval==1.
590
//
591
// Returns len of generated code block.
592
unsigned int MacroAssembler::mul_reg64_const16(Register rval, Register work, int cval) {
593
int block_start = offset();
594
595
bool sign_flip = cval < 0;
596
cval = sign_flip ? -cval : cval;
597
598
BLOCK_COMMENT("Reg64*Con16 {");
599
600
int bit1 = cval & -cval;
601
if (bit1 == cval) {
602
z_sllg(rval, rval, exact_log2(bit1));
603
if (sign_flip) { z_lcgr(rval, rval); }
604
} else {
605
int bit2 = (cval-bit1) & -(cval-bit1);
606
if ((bit1+bit2) == cval) {
607
z_sllg(work, rval, exact_log2(bit1));
608
z_sllg(rval, rval, exact_log2(bit2));
609
z_agr(rval, work);
610
if (sign_flip) { z_lcgr(rval, rval); }
611
} else {
612
if (sign_flip) { z_mghi(rval, -cval); }
613
else { z_mghi(rval, cval); }
614
}
615
}
616
BLOCK_COMMENT("} Reg64*Con16");
617
618
int block_end = offset();
619
return block_end - block_start;
620
}
621
622
// Generic operation r1 := r2 + imm.
623
//
624
// Should produce the best code for each supported CPU version.
625
// r2 == noreg yields r1 := r1 + imm
626
// imm == 0 emits either no instruction or r1 := r2 !
627
// NOTES: 1) Don't use this function where fixed sized
628
// instruction sequences are required!!!
629
// 2) Don't use this function if condition code
630
// setting is required!
631
// 3) Despite being declared as int64_t, the parameter imm
632
// must be a simm_32 value (= signed 32-bit integer).
633
void MacroAssembler::add2reg(Register r1, int64_t imm, Register r2) {
634
assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong");
635
636
if (r2 == noreg) { r2 = r1; }
637
638
// Handle special case imm == 0.
639
if (imm == 0) {
640
lgr_if_needed(r1, r2);
641
// Nothing else to do.
642
return;
643
}
644
645
if (!PreferLAoverADD || (r2 == Z_R0)) {
646
bool distinctOpnds = VM_Version::has_DistinctOpnds();
647
648
// Can we encode imm in 16 bits signed?
649
if (Immediate::is_simm16(imm)) {
650
if (r1 == r2) {
651
z_aghi(r1, imm);
652
return;
653
}
654
if (distinctOpnds) {
655
z_aghik(r1, r2, imm);
656
return;
657
}
658
z_lgr(r1, r2);
659
z_aghi(r1, imm);
660
return;
661
}
662
} else {
663
// Can we encode imm in 12 bits unsigned?
664
if (Displacement::is_shortDisp(imm)) {
665
z_la(r1, imm, r2);
666
return;
667
}
668
// Can we encode imm in 20 bits signed?
669
if (Displacement::is_validDisp(imm)) {
670
// Always use LAY instruction, so we don't need the tmp register.
671
z_lay(r1, imm, r2);
672
return;
673
}
674
675
}
676
677
// Can handle it (all possible values) with long immediates.
678
lgr_if_needed(r1, r2);
679
z_agfi(r1, imm);
680
}
681
682
// Generic operation r := b + x + d
683
//
684
// Addition of several operands with address generation semantics - sort of:
685
// - no restriction on the registers. Any register will do for any operand.
686
// - x == noreg: operand will be disregarded.
687
// - b == noreg: will use (contents of) result reg as operand (r := r + d).
688
// - x == Z_R0: just disregard
689
// - b == Z_R0: use as operand. This is not address generation semantics!!!
690
//
691
// The same restrictions as on add2reg() are valid!!!
692
void MacroAssembler::add2reg_with_index(Register r, int64_t d, Register x, Register b) {
693
assert(Immediate::is_simm32(d), "probably an implicit conversion went wrong");
694
695
if (x == noreg) { x = Z_R0; }
696
if (b == noreg) { b = r; }
697
698
// Handle special case x == R0.
699
if (x == Z_R0) {
700
// Can simply add the immediate value to the base register.
701
add2reg(r, d, b);
702
return;
703
}
704
705
if (!PreferLAoverADD || (b == Z_R0)) {
706
bool distinctOpnds = VM_Version::has_DistinctOpnds();
707
// Handle special case d == 0.
708
if (d == 0) {
709
if (b == x) { z_sllg(r, b, 1); return; }
710
if (r == x) { z_agr(r, b); return; }
711
if (r == b) { z_agr(r, x); return; }
712
if (distinctOpnds) { z_agrk(r, x, b); return; }
713
z_lgr(r, b);
714
z_agr(r, x);
715
} else {
716
if (x == b) { z_sllg(r, x, 1); }
717
else if (r == x) { z_agr(r, b); }
718
else if (r == b) { z_agr(r, x); }
719
else if (distinctOpnds) { z_agrk(r, x, b); }
720
else {
721
z_lgr(r, b);
722
z_agr(r, x);
723
}
724
add2reg(r, d);
725
}
726
} else {
727
// Can we encode imm in 12 bits unsigned?
728
if (Displacement::is_shortDisp(d)) {
729
z_la(r, d, x, b);
730
return;
731
}
732
// Can we encode imm in 20 bits signed?
733
if (Displacement::is_validDisp(d)) {
734
z_lay(r, d, x, b);
735
return;
736
}
737
z_la(r, 0, x, b);
738
add2reg(r, d);
739
}
740
}
741
742
// Generic emitter (32bit) for direct memory increment.
743
// For optimal code, do not specify Z_R0 as temp register.
744
void MacroAssembler::add2mem_32(const Address &a, int64_t imm, Register tmp) {
745
if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) {
746
z_asi(a, imm);
747
} else {
748
z_lgf(tmp, a);
749
add2reg(tmp, imm);
750
z_st(tmp, a);
751
}
752
}
753
754
void MacroAssembler::add2mem_64(const Address &a, int64_t imm, Register tmp) {
755
if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) {
756
z_agsi(a, imm);
757
} else {
758
z_lg(tmp, a);
759
add2reg(tmp, imm);
760
z_stg(tmp, a);
761
}
762
}
763
764
void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
765
switch (size_in_bytes) {
766
case 8: z_lg(dst, src); break;
767
case 4: is_signed ? z_lgf(dst, src) : z_llgf(dst, src); break;
768
case 2: is_signed ? z_lgh(dst, src) : z_llgh(dst, src); break;
769
case 1: is_signed ? z_lgb(dst, src) : z_llgc(dst, src); break;
770
default: ShouldNotReachHere();
771
}
772
}
773
774
void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
775
switch (size_in_bytes) {
776
case 8: z_stg(src, dst); break;
777
case 4: z_st(src, dst); break;
778
case 2: z_sth(src, dst); break;
779
case 1: z_stc(src, dst); break;
780
default: ShouldNotReachHere();
781
}
782
}
783
784
// Split a si20 offset (20bit, signed) into an ui12 offset (12bit, unsigned) and
785
// a high-order summand in register tmp.
786
//
787
// return value: < 0: No split required, si20 actually has property uimm12.
788
// >= 0: Split performed. Use return value as uimm12 displacement and
789
// tmp as index register.
790
int MacroAssembler::split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate) {
791
assert(Immediate::is_simm20(si20_offset), "sanity");
792
int lg_off = (int)si20_offset & 0x0fff; // Punch out low-order 12 bits, always positive.
793
int ll_off = (int)si20_offset & ~0x0fff; // Force low-order 12 bits to zero.
794
assert((Displacement::is_shortDisp(si20_offset) && (ll_off == 0)) ||
795
!Displacement::is_shortDisp(si20_offset), "unexpected offset values");
796
assert((lg_off+ll_off) == si20_offset, "offset splitup error");
797
798
Register work = accumulate? Z_R0 : tmp;
799
800
if (fixed_codelen) { // Len of code = 10 = 4 + 6.
801
z_lghi(work, ll_off>>12); // Implicit sign extension.
802
z_slag(work, work, 12);
803
} else { // Len of code = 0..10.
804
if (ll_off == 0) { return -1; }
805
// ll_off has 8 significant bits (at most) plus sign.
806
if ((ll_off & 0x0000f000) == 0) { // Non-zero bits only in upper halfbyte.
807
z_llilh(work, ll_off >> 16);
808
if (ll_off < 0) { // Sign-extension required.
809
z_lgfr(work, work);
810
}
811
} else {
812
if ((ll_off & 0x000f0000) == 0) { // Non-zero bits only in lower halfbyte.
813
z_llill(work, ll_off);
814
} else { // Non-zero bits in both halfbytes.
815
z_lghi(work, ll_off>>12); // Implicit sign extension.
816
z_slag(work, work, 12);
817
}
818
}
819
}
820
if (accumulate) { z_algr(tmp, work); } // len of code += 4
821
return lg_off;
822
}
823
824
void MacroAssembler::load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) {
825
if (Displacement::is_validDisp(si20)) {
826
z_ley(t, si20, a);
827
} else {
828
// Fixed_codelen = true is a simple way to ensure that the size of load_float_largeoffset
829
// does not depend on si20 (scratch buffer emit size == code buffer emit size for constant
830
// pool loads).
831
bool accumulate = true;
832
bool fixed_codelen = true;
833
Register work;
834
835
if (fixed_codelen) {
836
z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen.
837
} else {
838
accumulate = (a == tmp);
839
}
840
work = tmp;
841
842
int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate);
843
if (disp12 < 0) {
844
z_le(t, si20, work);
845
} else {
846
if (accumulate) {
847
z_le(t, disp12, work);
848
} else {
849
z_le(t, disp12, work, a);
850
}
851
}
852
}
853
}
854
855
void MacroAssembler::load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) {
856
if (Displacement::is_validDisp(si20)) {
857
z_ldy(t, si20, a);
858
} else {
859
// Fixed_codelen = true is a simple way to ensure that the size of load_double_largeoffset
860
// does not depend on si20 (scratch buffer emit size == code buffer emit size for constant
861
// pool loads).
862
bool accumulate = true;
863
bool fixed_codelen = true;
864
Register work;
865
866
if (fixed_codelen) {
867
z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen.
868
} else {
869
accumulate = (a == tmp);
870
}
871
work = tmp;
872
873
int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate);
874
if (disp12 < 0) {
875
z_ld(t, si20, work);
876
} else {
877
if (accumulate) {
878
z_ld(t, disp12, work);
879
} else {
880
z_ld(t, disp12, work, a);
881
}
882
}
883
}
884
}
885
886
// PCrelative TOC access.
887
// Returns distance (in bytes) from current position to start of consts section.
888
// Returns 0 (zero) if no consts section exists or if it has size zero.
889
long MacroAssembler::toc_distance() {
890
CodeSection* cs = code()->consts();
891
return (long)((cs != NULL) ? cs->start()-pc() : 0);
892
}
893
894
// Implementation on x86/sparc assumes that constant and instruction section are
895
// adjacent, but this doesn't hold. Two special situations may occur, that we must
896
// be able to handle:
897
// 1. const section may be located apart from the inst section.
898
// 2. const section may be empty
899
// In both cases, we use the const section's start address to compute the "TOC",
900
// this seems to occur only temporarily; in the final step we always seem to end up
901
// with the pc-relatice variant.
902
//
903
// PC-relative offset could be +/-2**32 -> use long for disp
904
// Furthermore: makes no sense to have special code for
905
// adjacent const and inst sections.
906
void MacroAssembler::load_toc(Register Rtoc) {
907
// Simply use distance from start of const section (should be patched in the end).
908
long disp = toc_distance();
909
910
RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp);
911
relocate(rspec);
912
z_larl(Rtoc, RelAddr::pcrel_off32(disp)); // Offset is in halfwords.
913
}
914
915
// PCrelative TOC access.
916
// Load from anywhere pcrelative (with relocation of load instr)
917
void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) {
918
address pc = this->pc();
919
ptrdiff_t total_distance = dataLocation - pc;
920
RelocationHolder rspec = internal_word_Relocation::spec(dataLocation);
921
922
assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
923
assert(total_distance != 0, "sanity");
924
925
// Some extra safety net.
926
if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
927
guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance);
928
}
929
930
(this)->relocate(rspec, relocInfo::pcrel_addr_format);
931
z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
932
}
933
934
935
// PCrelative TOC access.
936
// Load from anywhere pcrelative (with relocation of load instr)
937
// loaded addr has to be relocated when added to constant pool.
938
void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) {
939
address pc = this->pc();
940
ptrdiff_t total_distance = addrLocation - pc;
941
RelocationHolder rspec = internal_word_Relocation::spec(addrLocation);
942
943
assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory");
944
945
// Some extra safety net.
946
if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) {
947
guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "load_long_pcrelative can't handle distance " INTPTR_FORMAT, total_distance);
948
}
949
950
(this)->relocate(rspec, relocInfo::pcrel_addr_format);
951
z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance));
952
}
953
954
// Generic operation: load a value from memory and test.
955
// CondCode indicates the sign (<0, ==0, >0) of the loaded value.
956
void MacroAssembler::load_and_test_byte(Register dst, const Address &a) {
957
z_lb(dst, a);
958
z_ltr(dst, dst);
959
}
960
961
void MacroAssembler::load_and_test_short(Register dst, const Address &a) {
962
int64_t disp = a.disp20();
963
if (Displacement::is_shortDisp(disp)) {
964
z_lh(dst, a);
965
} else if (Displacement::is_longDisp(disp)) {
966
z_lhy(dst, a);
967
} else {
968
guarantee(false, "displacement out of range");
969
}
970
z_ltr(dst, dst);
971
}
972
973
void MacroAssembler::load_and_test_int(Register dst, const Address &a) {
974
z_lt(dst, a);
975
}
976
977
void MacroAssembler::load_and_test_int2long(Register dst, const Address &a) {
978
z_ltgf(dst, a);
979
}
980
981
void MacroAssembler::load_and_test_long(Register dst, const Address &a) {
982
z_ltg(dst, a);
983
}
984
985
// Test a bit in memory.
986
void MacroAssembler::testbit(const Address &a, unsigned int bit) {
987
assert(a.index() == noreg, "no index reg allowed in testbit");
988
if (bit <= 7) {
989
z_tm(a.disp() + 3, a.base(), 1 << bit);
990
} else if (bit <= 15) {
991
z_tm(a.disp() + 2, a.base(), 1 << (bit - 8));
992
} else if (bit <= 23) {
993
z_tm(a.disp() + 1, a.base(), 1 << (bit - 16));
994
} else if (bit <= 31) {
995
z_tm(a.disp() + 0, a.base(), 1 << (bit - 24));
996
} else {
997
ShouldNotReachHere();
998
}
999
}
1000
1001
// Test a bit in a register. Result is reflected in CC.
1002
void MacroAssembler::testbit(Register r, unsigned int bitPos) {
1003
if (bitPos < 16) {
1004
z_tmll(r, 1U<<bitPos);
1005
} else if (bitPos < 32) {
1006
z_tmlh(r, 1U<<(bitPos-16));
1007
} else if (bitPos < 48) {
1008
z_tmhl(r, 1U<<(bitPos-32));
1009
} else if (bitPos < 64) {
1010
z_tmhh(r, 1U<<(bitPos-48));
1011
} else {
1012
ShouldNotReachHere();
1013
}
1014
}
1015
1016
void MacroAssembler::prefetch_read(Address a) {
1017
z_pfd(1, a.disp20(), a.indexOrR0(), a.base());
1018
}
1019
void MacroAssembler::prefetch_update(Address a) {
1020
z_pfd(2, a.disp20(), a.indexOrR0(), a.base());
1021
}
1022
1023
// Clear a register, i.e. load const zero into reg.
1024
// Return len (in bytes) of generated instruction(s).
1025
// whole_reg: Clear 64 bits if true, 32 bits otherwise.
1026
// set_cc: Use instruction that sets the condition code, if true.
1027
int MacroAssembler::clear_reg(Register r, bool whole_reg, bool set_cc) {
1028
unsigned int start_off = offset();
1029
if (whole_reg) {
1030
set_cc ? z_xgr(r, r) : z_laz(r, 0, Z_R0);
1031
} else { // Only 32bit register.
1032
set_cc ? z_xr(r, r) : z_lhi(r, 0);
1033
}
1034
return offset() - start_off;
1035
}
1036
1037
#ifdef ASSERT
1038
int MacroAssembler::preset_reg(Register r, unsigned long pattern, int pattern_len) {
1039
switch (pattern_len) {
1040
case 1:
1041
pattern = (pattern & 0x000000ff) | ((pattern & 0x000000ff)<<8);
1042
case 2:
1043
pattern = (pattern & 0x0000ffff) | ((pattern & 0x0000ffff)<<16);
1044
case 4:
1045
pattern = (pattern & 0xffffffffL) | ((pattern & 0xffffffffL)<<32);
1046
case 8:
1047
return load_const_optimized_rtn_len(r, pattern, true);
1048
break;
1049
default:
1050
guarantee(false, "preset_reg: bad len");
1051
}
1052
return 0;
1053
}
1054
#endif
1055
1056
// addr: Address descriptor of memory to clear index register will not be used !
1057
// size: Number of bytes to clear.
1058
// !!! DO NOT USE THEM FOR ATOMIC MEMORY CLEARING !!!
1059
// !!! Use store_const() instead !!!
1060
void MacroAssembler::clear_mem(const Address& addr, unsigned size) {
1061
guarantee(size <= 256, "MacroAssembler::clear_mem: size too large");
1062
1063
if (size == 1) {
1064
z_mvi(addr, 0);
1065
return;
1066
}
1067
1068
switch (size) {
1069
case 2: z_mvhhi(addr, 0);
1070
return;
1071
case 4: z_mvhi(addr, 0);
1072
return;
1073
case 8: z_mvghi(addr, 0);
1074
return;
1075
default: ; // Fallthru to xc.
1076
}
1077
1078
z_xc(addr, size, addr);
1079
}
1080
1081
void MacroAssembler::align(int modulus) {
1082
while (offset() % modulus != 0) z_nop();
1083
}
1084
1085
// Special version for non-relocateable code if required alignment
1086
// is larger than CodeEntryAlignment.
1087
void MacroAssembler::align_address(int modulus) {
1088
while ((uintptr_t)pc() % modulus != 0) z_nop();
1089
}
1090
1091
Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
1092
Register temp_reg,
1093
int64_t extra_slot_offset) {
1094
// On Z, we can have index and disp in an Address. So don't call argument_offset,
1095
// which issues an unnecessary add instruction.
1096
int stackElementSize = Interpreter::stackElementSize;
1097
int64_t offset = extra_slot_offset * stackElementSize;
1098
const Register argbase = Z_esp;
1099
if (arg_slot.is_constant()) {
1100
offset += arg_slot.as_constant() * stackElementSize;
1101
return Address(argbase, offset);
1102
}
1103
// else
1104
assert(temp_reg != noreg, "must specify");
1105
assert(temp_reg != Z_ARG1, "base and index are conflicting");
1106
z_sllg(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); // tempreg = arg_slot << 3
1107
return Address(argbase, temp_reg, offset);
1108
}
1109
1110
1111
//===================================================================
1112
//=== START C O N S T A N T S I N C O D E S T R E A M ===
1113
//===================================================================
1114
//=== P A T CH A B L E C O N S T A N T S ===
1115
//===================================================================
1116
1117
1118
//---------------------------------------------------
1119
// Load (patchable) constant into register
1120
//---------------------------------------------------
1121
1122
1123
// Load absolute address (and try to optimize).
1124
// Note: This method is usable only for position-fixed code,
1125
// referring to a position-fixed target location.
1126
// If not so, relocations and patching must be used.
1127
void MacroAssembler::load_absolute_address(Register d, address addr) {
1128
assert(addr != NULL, "should not happen");
1129
BLOCK_COMMENT("load_absolute_address:");
1130
if (addr == NULL) {
1131
z_larl(d, pc()); // Dummy emit for size calc.
1132
return;
1133
}
1134
1135
if (RelAddr::is_in_range_of_RelAddr32(addr, pc())) {
1136
z_larl(d, addr);
1137
return;
1138
}
1139
1140
load_const_optimized(d, (long)addr);
1141
}
1142
1143
// Load a 64bit constant.
1144
// Patchable code sequence, but not atomically patchable.
1145
// Make sure to keep code size constant -> no value-dependent optimizations.
1146
// Do not kill condition code.
1147
void MacroAssembler::load_const(Register t, long x) {
1148
// Note: Right shift is only cleanly defined for unsigned types
1149
// or for signed types with nonnegative values.
1150
Assembler::z_iihf(t, (long)((unsigned long)x >> 32));
1151
Assembler::z_iilf(t, (long)((unsigned long)x & 0xffffffffUL));
1152
}
1153
1154
// Load a 32bit constant into a 64bit register, sign-extend or zero-extend.
1155
// Patchable code sequence, but not atomically patchable.
1156
// Make sure to keep code size constant -> no value-dependent optimizations.
1157
// Do not kill condition code.
1158
void MacroAssembler::load_const_32to64(Register t, int64_t x, bool sign_extend) {
1159
if (sign_extend) { Assembler::z_lgfi(t, x); }
1160
else { Assembler::z_llilf(t, x); }
1161
}
1162
1163
// Load narrow oop constant, no decompression.
1164
void MacroAssembler::load_narrow_oop(Register t, narrowOop a) {
1165
assert(UseCompressedOops, "must be on to call this method");
1166
load_const_32to64(t, CompressedOops::narrow_oop_value(a), false /*sign_extend*/);
1167
}
1168
1169
// Load narrow klass constant, compression required.
1170
void MacroAssembler::load_narrow_klass(Register t, Klass* k) {
1171
assert(UseCompressedClassPointers, "must be on to call this method");
1172
narrowKlass encoded_k = CompressedKlassPointers::encode(k);
1173
load_const_32to64(t, encoded_k, false /*sign_extend*/);
1174
}
1175
1176
//------------------------------------------------------
1177
// Compare (patchable) constant with register.
1178
//------------------------------------------------------
1179
1180
// Compare narrow oop in reg with narrow oop constant, no decompression.
1181
void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2) {
1182
assert(UseCompressedOops, "must be on to call this method");
1183
1184
Assembler::z_clfi(oop1, CompressedOops::narrow_oop_value(oop2));
1185
}
1186
1187
// Compare narrow oop in reg with narrow oop constant, no decompression.
1188
void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) {
1189
assert(UseCompressedClassPointers, "must be on to call this method");
1190
narrowKlass encoded_k = CompressedKlassPointers::encode(klass2);
1191
1192
Assembler::z_clfi(klass1, encoded_k);
1193
}
1194
1195
//----------------------------------------------------------
1196
// Check which kind of load_constant we have here.
1197
//----------------------------------------------------------
1198
1199
// Detection of CPU version dependent load_const sequence.
1200
// The detection is valid only for code sequences generated by load_const,
1201
// not load_const_optimized.
1202
bool MacroAssembler::is_load_const(address a) {
1203
unsigned long inst1, inst2;
1204
unsigned int len1, len2;
1205
1206
len1 = get_instruction(a, &inst1);
1207
len2 = get_instruction(a + len1, &inst2);
1208
1209
return is_z_iihf(inst1) && is_z_iilf(inst2);
1210
}
1211
1212
// Detection of CPU version dependent load_const_32to64 sequence.
1213
// Mostly used for narrow oops and narrow Klass pointers.
1214
// The detection is valid only for code sequences generated by load_const_32to64.
1215
bool MacroAssembler::is_load_const_32to64(address pos) {
1216
unsigned long inst1, inst2;
1217
unsigned int len1;
1218
1219
len1 = get_instruction(pos, &inst1);
1220
return is_z_llilf(inst1);
1221
}
1222
1223
// Detection of compare_immediate_narrow sequence.
1224
// The detection is valid only for code sequences generated by compare_immediate_narrow_oop.
1225
bool MacroAssembler::is_compare_immediate32(address pos) {
1226
return is_equal(pos, CLFI_ZOPC, RIL_MASK);
1227
}
1228
1229
// Detection of compare_immediate_narrow sequence.
1230
// The detection is valid only for code sequences generated by compare_immediate_narrow_oop.
1231
bool MacroAssembler::is_compare_immediate_narrow_oop(address pos) {
1232
return is_compare_immediate32(pos);
1233
}
1234
1235
// Detection of compare_immediate_narrow sequence.
1236
// The detection is valid only for code sequences generated by compare_immediate_narrow_klass.
1237
bool MacroAssembler::is_compare_immediate_narrow_klass(address pos) {
1238
return is_compare_immediate32(pos);
1239
}
1240
1241
//-----------------------------------
1242
// patch the load_constant
1243
//-----------------------------------
1244
1245
// CPU-version dependend patching of load_const.
1246
void MacroAssembler::patch_const(address a, long x) {
1247
assert(is_load_const(a), "not a load of a constant");
1248
// Note: Right shift is only cleanly defined for unsigned types
1249
// or for signed types with nonnegative values.
1250
set_imm32((address)a, (long)((unsigned long)x >> 32));
1251
set_imm32((address)(a + 6), (long)((unsigned long)x & 0xffffffffUL));
1252
}
1253
1254
// Patching the value of CPU version dependent load_const_32to64 sequence.
1255
// The passed ptr MUST be in compressed format!
1256
int MacroAssembler::patch_load_const_32to64(address pos, int64_t np) {
1257
assert(is_load_const_32to64(pos), "not a load of a narrow ptr (oop or klass)");
1258
1259
set_imm32(pos, np);
1260
return 6;
1261
}
1262
1263
// Patching the value of CPU version dependent compare_immediate_narrow sequence.
1264
// The passed ptr MUST be in compressed format!
1265
int MacroAssembler::patch_compare_immediate_32(address pos, int64_t np) {
1266
assert(is_compare_immediate32(pos), "not a compressed ptr compare");
1267
1268
set_imm32(pos, np);
1269
return 6;
1270
}
1271
1272
// Patching the immediate value of CPU version dependent load_narrow_oop sequence.
1273
// The passed ptr must NOT be in compressed format!
1274
int MacroAssembler::patch_load_narrow_oop(address pos, oop o) {
1275
assert(UseCompressedOops, "Can only patch compressed oops");
1276
return patch_load_const_32to64(pos, CompressedOops::narrow_oop_value(o));
1277
}
1278
1279
// Patching the immediate value of CPU version dependent load_narrow_klass sequence.
1280
// The passed ptr must NOT be in compressed format!
1281
int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) {
1282
assert(UseCompressedClassPointers, "Can only patch compressed klass pointers");
1283
1284
narrowKlass nk = CompressedKlassPointers::encode(k);
1285
return patch_load_const_32to64(pos, nk);
1286
}
1287
1288
// Patching the immediate value of CPU version dependent compare_immediate_narrow_oop sequence.
1289
// The passed ptr must NOT be in compressed format!
1290
int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) {
1291
assert(UseCompressedOops, "Can only patch compressed oops");
1292
return patch_compare_immediate_32(pos, CompressedOops::narrow_oop_value(o));
1293
}
1294
1295
// Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence.
1296
// The passed ptr must NOT be in compressed format!
1297
int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) {
1298
assert(UseCompressedClassPointers, "Can only patch compressed klass pointers");
1299
1300
narrowKlass nk = CompressedKlassPointers::encode(k);
1301
return patch_compare_immediate_32(pos, nk);
1302
}
1303
1304
//------------------------------------------------------------------------
1305
// Extract the constant from a load_constant instruction stream.
1306
//------------------------------------------------------------------------
1307
1308
// Get constant from a load_const sequence.
1309
long MacroAssembler::get_const(address a) {
1310
assert(is_load_const(a), "not a load of a constant");
1311
unsigned long x;
1312
x = (((unsigned long) (get_imm32(a,0) & 0xffffffff)) << 32);
1313
x |= (((unsigned long) (get_imm32(a,1) & 0xffffffff)));
1314
return (long) x;
1315
}
1316
1317
//--------------------------------------
1318
// Store a constant in memory.
1319
//--------------------------------------
1320
1321
// General emitter to move a constant to memory.
1322
// The store is atomic.
1323
// o Address must be given in RS format (no index register)
1324
// o Displacement should be 12bit unsigned for efficiency. 20bit signed also supported.
1325
// o Constant can be 1, 2, 4, or 8 bytes, signed or unsigned.
1326
// o Memory slot can be 1, 2, 4, or 8 bytes, signed or unsigned.
1327
// o Memory slot must be at least as wide as constant, will assert otherwise.
1328
// o Signed constants will sign-extend, unsigned constants will zero-extend to slot width.
1329
int MacroAssembler::store_const(const Address &dest, long imm,
1330
unsigned int lm, unsigned int lc,
1331
Register scratch) {
1332
int64_t disp = dest.disp();
1333
Register base = dest.base();
1334
assert(!dest.has_index(), "not supported");
1335
assert((lm==1)||(lm==2)||(lm==4)||(lm==8), "memory length not supported");
1336
assert((lc==1)||(lc==2)||(lc==4)||(lc==8), "constant length not supported");
1337
assert(lm>=lc, "memory slot too small");
1338
assert(lc==8 || Immediate::is_simm(imm, lc*8), "const out of range");
1339
assert(Displacement::is_validDisp(disp), "displacement out of range");
1340
1341
bool is_shortDisp = Displacement::is_shortDisp(disp);
1342
int store_offset = -1;
1343
1344
// For target len == 1 it's easy.
1345
if (lm == 1) {
1346
store_offset = offset();
1347
if (is_shortDisp) {
1348
z_mvi(disp, base, imm);
1349
return store_offset;
1350
} else {
1351
z_mviy(disp, base, imm);
1352
return store_offset;
1353
}
1354
}
1355
1356
// All the "good stuff" takes an unsigned displacement.
1357
if (is_shortDisp) {
1358
// NOTE: Cannot use clear_mem for imm==0, because it is not atomic.
1359
1360
store_offset = offset();
1361
switch (lm) {
1362
case 2: // Lc == 1 handled correctly here, even for unsigned. Instruction does no widening.
1363
z_mvhhi(disp, base, imm);
1364
return store_offset;
1365
case 4:
1366
if (Immediate::is_simm16(imm)) {
1367
z_mvhi(disp, base, imm);
1368
return store_offset;
1369
}
1370
break;
1371
case 8:
1372
if (Immediate::is_simm16(imm)) {
1373
z_mvghi(disp, base, imm);
1374
return store_offset;
1375
}
1376
break;
1377
default:
1378
ShouldNotReachHere();
1379
break;
1380
}
1381
}
1382
1383
// Can't optimize, so load value and store it.
1384
guarantee(scratch != noreg, " need a scratch register here !");
1385
if (imm != 0) {
1386
load_const_optimized(scratch, imm); // Preserves CC anyway.
1387
} else {
1388
// Leave CC alone!!
1389
(void) clear_reg(scratch, true, false); // Indicate unused result.
1390
}
1391
1392
store_offset = offset();
1393
if (is_shortDisp) {
1394
switch (lm) {
1395
case 2:
1396
z_sth(scratch, disp, Z_R0, base);
1397
return store_offset;
1398
case 4:
1399
z_st(scratch, disp, Z_R0, base);
1400
return store_offset;
1401
case 8:
1402
z_stg(scratch, disp, Z_R0, base);
1403
return store_offset;
1404
default:
1405
ShouldNotReachHere();
1406
break;
1407
}
1408
} else {
1409
switch (lm) {
1410
case 2:
1411
z_sthy(scratch, disp, Z_R0, base);
1412
return store_offset;
1413
case 4:
1414
z_sty(scratch, disp, Z_R0, base);
1415
return store_offset;
1416
case 8:
1417
z_stg(scratch, disp, Z_R0, base);
1418
return store_offset;
1419
default:
1420
ShouldNotReachHere();
1421
break;
1422
}
1423
}
1424
return -1; // should not reach here
1425
}
1426
1427
//===================================================================
1428
//=== N O T P A T CH A B L E C O N S T A N T S ===
1429
//===================================================================
1430
1431
// Load constant x into register t with a fast instrcution sequence
1432
// depending on the bits in x. Preserves CC under all circumstances.
1433
int MacroAssembler::load_const_optimized_rtn_len(Register t, long x, bool emit) {
1434
if (x == 0) {
1435
int len;
1436
if (emit) {
1437
len = clear_reg(t, true, false);
1438
} else {
1439
len = 4;
1440
}
1441
return len;
1442
}
1443
1444
if (Immediate::is_simm16(x)) {
1445
if (emit) { z_lghi(t, x); }
1446
return 4;
1447
}
1448
1449
// 64 bit value: | part1 | part2 | part3 | part4 |
1450
// At least one part is not zero!
1451
// Note: Right shift is only cleanly defined for unsigned types
1452
// or for signed types with nonnegative values.
1453
int part1 = (int)((unsigned long)x >> 48) & 0x0000ffff;
1454
int part2 = (int)((unsigned long)x >> 32) & 0x0000ffff;
1455
int part3 = (int)((unsigned long)x >> 16) & 0x0000ffff;
1456
int part4 = (int)x & 0x0000ffff;
1457
int part12 = (int)((unsigned long)x >> 32);
1458
int part34 = (int)x;
1459
1460
// Lower word only (unsigned).
1461
if (part12 == 0) {
1462
if (part3 == 0) {
1463
if (emit) z_llill(t, part4);
1464
return 4;
1465
}
1466
if (part4 == 0) {
1467
if (emit) z_llilh(t, part3);
1468
return 4;
1469
}
1470
if (emit) z_llilf(t, part34);
1471
return 6;
1472
}
1473
1474
// Upper word only.
1475
if (part34 == 0) {
1476
if (part1 == 0) {
1477
if (emit) z_llihl(t, part2);
1478
return 4;
1479
}
1480
if (part2 == 0) {
1481
if (emit) z_llihh(t, part1);
1482
return 4;
1483
}
1484
if (emit) z_llihf(t, part12);
1485
return 6;
1486
}
1487
1488
// Lower word only (signed).
1489
if ((part1 == 0x0000ffff) && (part2 == 0x0000ffff) && ((part3 & 0x00008000) != 0)) {
1490
if (emit) z_lgfi(t, part34);
1491
return 6;
1492
}
1493
1494
int len = 0;
1495
1496
if ((part1 == 0) || (part2 == 0)) {
1497
if (part1 == 0) {
1498
if (emit) z_llihl(t, part2);
1499
len += 4;
1500
} else {
1501
if (emit) z_llihh(t, part1);
1502
len += 4;
1503
}
1504
} else {
1505
if (emit) z_llihf(t, part12);
1506
len += 6;
1507
}
1508
1509
if ((part3 == 0) || (part4 == 0)) {
1510
if (part3 == 0) {
1511
if (emit) z_iill(t, part4);
1512
len += 4;
1513
} else {
1514
if (emit) z_iilh(t, part3);
1515
len += 4;
1516
}
1517
} else {
1518
if (emit) z_iilf(t, part34);
1519
len += 6;
1520
}
1521
return len;
1522
}
1523
1524
//=====================================================================
1525
//=== H I G H E R L E V E L B R A N C H E M I T T E R S ===
1526
//=====================================================================
1527
1528
// Note: In the worst case, one of the scratch registers is destroyed!!!
1529
void MacroAssembler::compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1530
// Right operand is constant.
1531
if (x2.is_constant()) {
1532
jlong value = x2.as_constant();
1533
compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/true);
1534
return;
1535
}
1536
1537
// Right operand is in register.
1538
compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/true);
1539
}
1540
1541
// Note: In the worst case, one of the scratch registers is destroyed!!!
1542
void MacroAssembler::compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1543
// Right operand is constant.
1544
if (x2.is_constant()) {
1545
jlong value = x2.as_constant();
1546
compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/false);
1547
return;
1548
}
1549
1550
// Right operand is in register.
1551
compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/false);
1552
}
1553
1554
// Note: In the worst case, one of the scratch registers is destroyed!!!
1555
void MacroAssembler::compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1556
// Right operand is constant.
1557
if (x2.is_constant()) {
1558
jlong value = x2.as_constant();
1559
compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/true);
1560
return;
1561
}
1562
1563
// Right operand is in register.
1564
compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/true);
1565
}
1566
1567
void MacroAssembler::compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) {
1568
// Right operand is constant.
1569
if (x2.is_constant()) {
1570
jlong value = x2.as_constant();
1571
compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/false);
1572
return;
1573
}
1574
1575
// Right operand is in register.
1576
compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/false);
1577
}
1578
1579
// Generate an optimal branch to the branch target.
1580
// Optimal means that a relative branch (brc or brcl) is used if the
1581
// branch distance is short enough. Loading the target address into a
1582
// register and branching via reg is used as fallback only.
1583
//
1584
// Used registers:
1585
// Z_R1 - work reg. Holds branch target address.
1586
// Used in fallback case only.
1587
//
1588
// This version of branch_optimized is good for cases where the target address is known
1589
// and constant, i.e. is never changed (no relocation, no patching).
1590
void MacroAssembler::branch_optimized(Assembler::branch_condition cond, address branch_addr) {
1591
address branch_origin = pc();
1592
1593
if (RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) {
1594
z_brc(cond, branch_addr);
1595
} else if (RelAddr::is_in_range_of_RelAddr32(branch_addr, branch_origin)) {
1596
z_brcl(cond, branch_addr);
1597
} else {
1598
load_const_optimized(Z_R1, branch_addr); // CC must not get killed by load_const_optimized.
1599
z_bcr(cond, Z_R1);
1600
}
1601
}
1602
1603
// This version of branch_optimized is good for cases where the target address
1604
// is potentially not yet known at the time the code is emitted.
1605
//
1606
// One very common case is a branch to an unbound label which is handled here.
1607
// The caller might know (or hope) that the branch distance is short enough
1608
// to be encoded in a 16bit relative address. In this case he will pass a
1609
// NearLabel branch_target.
1610
// Care must be taken with unbound labels. Each call to target(label) creates
1611
// an entry in the patch queue for that label to patch all references of the label
1612
// once it gets bound. Those recorded patch locations must be patchable. Otherwise,
1613
// an assertion fires at patch time.
1614
void MacroAssembler::branch_optimized(Assembler::branch_condition cond, Label& branch_target) {
1615
if (branch_target.is_bound()) {
1616
address branch_addr = target(branch_target);
1617
branch_optimized(cond, branch_addr);
1618
} else if (branch_target.is_near()) {
1619
z_brc(cond, branch_target); // Caller assures that the target will be in range for z_brc.
1620
} else {
1621
z_brcl(cond, branch_target); // Let's hope target is in range. Otherwise, we will abort at patch time.
1622
}
1623
}
1624
1625
// Generate an optimal compare and branch to the branch target.
1626
// Optimal means that a relative branch (clgrj, brc or brcl) is used if the
1627
// branch distance is short enough. Loading the target address into a
1628
// register and branching via reg is used as fallback only.
1629
//
1630
// Input:
1631
// r1 - left compare operand
1632
// r2 - right compare operand
1633
void MacroAssembler::compare_and_branch_optimized(Register r1,
1634
Register r2,
1635
Assembler::branch_condition cond,
1636
address branch_addr,
1637
bool len64,
1638
bool has_sign) {
1639
unsigned int casenum = (len64?2:0)+(has_sign?0:1);
1640
1641
address branch_origin = pc();
1642
if (VM_Version::has_CompareBranch() && RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) {
1643
switch (casenum) {
1644
case 0: z_crj( r1, r2, cond, branch_addr); break;
1645
case 1: z_clrj (r1, r2, cond, branch_addr); break;
1646
case 2: z_cgrj(r1, r2, cond, branch_addr); break;
1647
case 3: z_clgrj(r1, r2, cond, branch_addr); break;
1648
default: ShouldNotReachHere(); break;
1649
}
1650
} else {
1651
switch (casenum) {
1652
case 0: z_cr( r1, r2); break;
1653
case 1: z_clr(r1, r2); break;
1654
case 2: z_cgr(r1, r2); break;
1655
case 3: z_clgr(r1, r2); break;
1656
default: ShouldNotReachHere(); break;
1657
}
1658
branch_optimized(cond, branch_addr);
1659
}
1660
}
1661
1662
// Generate an optimal compare and branch to the branch target.
1663
// Optimal means that a relative branch (clgij, brc or brcl) is used if the
1664
// branch distance is short enough. Loading the target address into a
1665
// register and branching via reg is used as fallback only.
1666
//
1667
// Input:
1668
// r1 - left compare operand (in register)
1669
// x2 - right compare operand (immediate)
1670
void MacroAssembler::compare_and_branch_optimized(Register r1,
1671
jlong x2,
1672
Assembler::branch_condition cond,
1673
Label& branch_target,
1674
bool len64,
1675
bool has_sign) {
1676
address branch_origin = pc();
1677
bool x2_imm8 = (has_sign && Immediate::is_simm8(x2)) || (!has_sign && Immediate::is_uimm8(x2));
1678
bool is_RelAddr16 = branch_target.is_near() ||
1679
(branch_target.is_bound() &&
1680
RelAddr::is_in_range_of_RelAddr16(target(branch_target), branch_origin));
1681
unsigned int casenum = (len64?2:0)+(has_sign?0:1);
1682
1683
if (VM_Version::has_CompareBranch() && is_RelAddr16 && x2_imm8) {
1684
switch (casenum) {
1685
case 0: z_cij( r1, x2, cond, branch_target); break;
1686
case 1: z_clij(r1, x2, cond, branch_target); break;
1687
case 2: z_cgij(r1, x2, cond, branch_target); break;
1688
case 3: z_clgij(r1, x2, cond, branch_target); break;
1689
default: ShouldNotReachHere(); break;
1690
}
1691
return;
1692
}
1693
1694
if (x2 == 0) {
1695
switch (casenum) {
1696
case 0: z_ltr(r1, r1); break;
1697
case 1: z_ltr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication!
1698
case 2: z_ltgr(r1, r1); break;
1699
case 3: z_ltgr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication!
1700
default: ShouldNotReachHere(); break;
1701
}
1702
} else {
1703
if ((has_sign && Immediate::is_simm16(x2)) || (!has_sign && Immediate::is_uimm(x2, 15))) {
1704
switch (casenum) {
1705
case 0: z_chi(r1, x2); break;
1706
case 1: z_chi(r1, x2); break; // positive immediate < 2**15
1707
case 2: z_cghi(r1, x2); break;
1708
case 3: z_cghi(r1, x2); break; // positive immediate < 2**15
1709
default: break;
1710
}
1711
} else if ( (has_sign && Immediate::is_simm32(x2)) || (!has_sign && Immediate::is_uimm32(x2)) ) {
1712
switch (casenum) {
1713
case 0: z_cfi( r1, x2); break;
1714
case 1: z_clfi(r1, x2); break;
1715
case 2: z_cgfi(r1, x2); break;
1716
case 3: z_clgfi(r1, x2); break;
1717
default: ShouldNotReachHere(); break;
1718
}
1719
} else {
1720
// No instruction with immediate operand possible, so load into register.
1721
Register scratch = (r1 != Z_R0) ? Z_R0 : Z_R1;
1722
load_const_optimized(scratch, x2);
1723
switch (casenum) {
1724
case 0: z_cr( r1, scratch); break;
1725
case 1: z_clr(r1, scratch); break;
1726
case 2: z_cgr(r1, scratch); break;
1727
case 3: z_clgr(r1, scratch); break;
1728
default: ShouldNotReachHere(); break;
1729
}
1730
}
1731
}
1732
branch_optimized(cond, branch_target);
1733
}
1734
1735
// Generate an optimal compare and branch to the branch target.
1736
// Optimal means that a relative branch (clgrj, brc or brcl) is used if the
1737
// branch distance is short enough. Loading the target address into a
1738
// register and branching via reg is used as fallback only.
1739
//
1740
// Input:
1741
// r1 - left compare operand
1742
// r2 - right compare operand
1743
void MacroAssembler::compare_and_branch_optimized(Register r1,
1744
Register r2,
1745
Assembler::branch_condition cond,
1746
Label& branch_target,
1747
bool len64,
1748
bool has_sign) {
1749
unsigned int casenum = (len64 ? 2 : 0) + (has_sign ? 0 : 1);
1750
1751
if (branch_target.is_bound()) {
1752
address branch_addr = target(branch_target);
1753
compare_and_branch_optimized(r1, r2, cond, branch_addr, len64, has_sign);
1754
} else {
1755
if (VM_Version::has_CompareBranch() && branch_target.is_near()) {
1756
switch (casenum) {
1757
case 0: z_crj( r1, r2, cond, branch_target); break;
1758
case 1: z_clrj( r1, r2, cond, branch_target); break;
1759
case 2: z_cgrj( r1, r2, cond, branch_target); break;
1760
case 3: z_clgrj(r1, r2, cond, branch_target); break;
1761
default: ShouldNotReachHere(); break;
1762
}
1763
} else {
1764
switch (casenum) {
1765
case 0: z_cr( r1, r2); break;
1766
case 1: z_clr(r1, r2); break;
1767
case 2: z_cgr(r1, r2); break;
1768
case 3: z_clgr(r1, r2); break;
1769
default: ShouldNotReachHere(); break;
1770
}
1771
branch_optimized(cond, branch_target);
1772
}
1773
}
1774
}
1775
1776
//===========================================================================
1777
//=== END H I G H E R L E V E L B R A N C H E M I T T E R S ===
1778
//===========================================================================
1779
1780
AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) {
1781
assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1782
int index = oop_recorder()->allocate_metadata_index(obj);
1783
RelocationHolder rspec = metadata_Relocation::spec(index);
1784
return AddressLiteral((address)obj, rspec);
1785
}
1786
1787
AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) {
1788
assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1789
int index = oop_recorder()->find_index(obj);
1790
RelocationHolder rspec = metadata_Relocation::spec(index);
1791
return AddressLiteral((address)obj, rspec);
1792
}
1793
1794
AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) {
1795
assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1796
int oop_index = oop_recorder()->allocate_oop_index(obj);
1797
return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
1798
}
1799
1800
AddressLiteral MacroAssembler::constant_oop_address(jobject obj) {
1801
assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
1802
int oop_index = oop_recorder()->find_index(obj);
1803
return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
1804
}
1805
1806
// NOTE: destroys r
1807
void MacroAssembler::c2bool(Register r, Register t) {
1808
z_lcr(t, r); // t = -r
1809
z_or(r, t); // r = -r OR r
1810
z_srl(r, 31); // Yields 0 if r was 0, 1 otherwise.
1811
}
1812
1813
// Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos'
1814
// and return the resulting instruction.
1815
// Dest_pos and inst_pos are 32 bit only. These parms can only designate
1816
// relative positions.
1817
// Use correct argument types. Do not pre-calculate distance.
1818
unsigned long MacroAssembler::patched_branch(address dest_pos, unsigned long inst, address inst_pos) {
1819
int c = 0;
1820
unsigned long patched_inst = 0;
1821
if (is_call_pcrelative_short(inst) ||
1822
is_branch_pcrelative_short(inst) ||
1823
is_branchoncount_pcrelative_short(inst) ||
1824
is_branchonindex32_pcrelative_short(inst)) {
1825
c = 1;
1826
int m = fmask(15, 0); // simm16(-1, 16, 32);
1827
int v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 32);
1828
patched_inst = (inst & ~m) | v;
1829
} else if (is_compareandbranch_pcrelative_short(inst)) {
1830
c = 2;
1831
long m = fmask(31, 16); // simm16(-1, 16, 48);
1832
long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48);
1833
patched_inst = (inst & ~m) | v;
1834
} else if (is_branchonindex64_pcrelative_short(inst)) {
1835
c = 3;
1836
long m = fmask(31, 16); // simm16(-1, 16, 48);
1837
long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48);
1838
patched_inst = (inst & ~m) | v;
1839
} else if (is_call_pcrelative_long(inst) || is_branch_pcrelative_long(inst)) {
1840
c = 4;
1841
long m = fmask(31, 0); // simm32(-1, 16, 48);
1842
long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48);
1843
patched_inst = (inst & ~m) | v;
1844
} else if (is_pcrelative_long(inst)) { // These are the non-branch pc-relative instructions.
1845
c = 5;
1846
long m = fmask(31, 0); // simm32(-1, 16, 48);
1847
long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48);
1848
patched_inst = (inst & ~m) | v;
1849
} else {
1850
print_dbg_msg(tty, inst, "not a relative branch", 0);
1851
dump_code_range(tty, inst_pos, 32, "not a pcrelative branch");
1852
ShouldNotReachHere();
1853
}
1854
1855
long new_off = get_pcrel_offset(patched_inst);
1856
if (new_off != (dest_pos-inst_pos)) {
1857
tty->print_cr("case %d: dest_pos = %p, inst_pos = %p, disp = %ld(%12.12lx)", c, dest_pos, inst_pos, new_off, new_off);
1858
print_dbg_msg(tty, inst, "<- original instruction: branch patching error", 0);
1859
print_dbg_msg(tty, patched_inst, "<- patched instruction: branch patching error", 0);
1860
#ifdef LUCY_DBG
1861
VM_Version::z_SIGSEGV();
1862
#endif
1863
ShouldNotReachHere();
1864
}
1865
return patched_inst;
1866
}
1867
1868
// Only called when binding labels (share/vm/asm/assembler.cpp)
1869
// Pass arguments as intended. Do not pre-calculate distance.
1870
void MacroAssembler::pd_patch_instruction(address branch, address target, const char* file, int line) {
1871
unsigned long stub_inst;
1872
int inst_len = get_instruction(branch, &stub_inst);
1873
1874
set_instruction(branch, patched_branch(target, stub_inst, branch), inst_len);
1875
}
1876
1877
1878
// Extract relative address (aka offset).
1879
// inv_simm16 works for 4-byte instructions only.
1880
// compare and branch instructions are 6-byte and have a 16bit offset "in the middle".
1881
long MacroAssembler::get_pcrel_offset(unsigned long inst) {
1882
1883
if (MacroAssembler::is_pcrelative_short(inst)) {
1884
if (((inst&0xFFFFffff00000000UL) == 0) && ((inst&0x00000000FFFF0000UL) != 0)) {
1885
return RelAddr::inv_pcrel_off16(inv_simm16(inst));
1886
} else {
1887
return RelAddr::inv_pcrel_off16(inv_simm16_48(inst));
1888
}
1889
}
1890
1891
if (MacroAssembler::is_pcrelative_long(inst)) {
1892
return RelAddr::inv_pcrel_off32(inv_simm32(inst));
1893
}
1894
1895
print_dbg_msg(tty, inst, "not a pcrelative instruction", 6);
1896
#ifdef LUCY_DBG
1897
VM_Version::z_SIGSEGV();
1898
#else
1899
ShouldNotReachHere();
1900
#endif
1901
return -1;
1902
}
1903
1904
long MacroAssembler::get_pcrel_offset(address pc) {
1905
unsigned long inst;
1906
unsigned int len = get_instruction(pc, &inst);
1907
1908
#ifdef ASSERT
1909
long offset;
1910
if (MacroAssembler::is_pcrelative_short(inst) || MacroAssembler::is_pcrelative_long(inst)) {
1911
offset = get_pcrel_offset(inst);
1912
} else {
1913
offset = -1;
1914
}
1915
1916
if (offset == -1) {
1917
dump_code_range(tty, pc, 32, "not a pcrelative instruction");
1918
#ifdef LUCY_DBG
1919
VM_Version::z_SIGSEGV();
1920
#else
1921
ShouldNotReachHere();
1922
#endif
1923
}
1924
return offset;
1925
#else
1926
return get_pcrel_offset(inst);
1927
#endif // ASSERT
1928
}
1929
1930
// Get target address from pc-relative instructions.
1931
address MacroAssembler::get_target_addr_pcrel(address pc) {
1932
assert(is_pcrelative_long(pc), "not a pcrelative instruction");
1933
return pc + get_pcrel_offset(pc);
1934
}
1935
1936
// Patch pc relative load address.
1937
void MacroAssembler::patch_target_addr_pcrel(address pc, address con) {
1938
unsigned long inst;
1939
// Offset is +/- 2**32 -> use long.
1940
ptrdiff_t distance = con - pc;
1941
1942
get_instruction(pc, &inst);
1943
1944
if (is_pcrelative_short(inst)) {
1945
*(short *)(pc+2) = RelAddr::pcrel_off16(con, pc); // Instructions are at least 2-byte aligned, no test required.
1946
1947
// Some extra safety net.
1948
if (!RelAddr::is_in_range_of_RelAddr16(distance)) {
1949
print_dbg_msg(tty, inst, "distance out of range (16bit)", 4);
1950
dump_code_range(tty, pc, 32, "distance out of range (16bit)");
1951
guarantee(RelAddr::is_in_range_of_RelAddr16(distance), "too far away (more than +/- 2**16");
1952
}
1953
return;
1954
}
1955
1956
if (is_pcrelative_long(inst)) {
1957
*(int *)(pc+2) = RelAddr::pcrel_off32(con, pc);
1958
1959
// Some Extra safety net.
1960
if (!RelAddr::is_in_range_of_RelAddr32(distance)) {
1961
print_dbg_msg(tty, inst, "distance out of range (32bit)", 6);
1962
dump_code_range(tty, pc, 32, "distance out of range (32bit)");
1963
guarantee(RelAddr::is_in_range_of_RelAddr32(distance), "too far away (more than +/- 2**32");
1964
}
1965
return;
1966
}
1967
1968
guarantee(false, "not a pcrelative instruction to patch!");
1969
}
1970
1971
// "Current PC" here means the address just behind the basr instruction.
1972
address MacroAssembler::get_PC(Register result) {
1973
z_basr(result, Z_R0); // Don't branch, just save next instruction address in result.
1974
return pc();
1975
}
1976
1977
// Get current PC + offset.
1978
// Offset given in bytes, must be even!
1979
// "Current PC" here means the address of the larl instruction plus the given offset.
1980
address MacroAssembler::get_PC(Register result, int64_t offset) {
1981
address here = pc();
1982
z_larl(result, offset/2); // Save target instruction address in result.
1983
return here + offset;
1984
}
1985
1986
void MacroAssembler::instr_size(Register size, Register pc) {
1987
// Extract 2 most significant bits of current instruction.
1988
z_llgc(size, Address(pc));
1989
z_srl(size, 6);
1990
// Compute (x+3)&6 which translates 0->2, 1->4, 2->4, 3->6.
1991
z_ahi(size, 3);
1992
z_nill(size, 6);
1993
}
1994
1995
// Resize_frame with SP(new) = SP(old) - [offset].
1996
void MacroAssembler::resize_frame_sub(Register offset, Register fp, bool load_fp)
1997
{
1998
assert_different_registers(offset, fp, Z_SP);
1999
if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); }
2000
2001
z_sgr(Z_SP, offset);
2002
z_stg(fp, _z_abi(callers_sp), Z_SP);
2003
}
2004
2005
// Resize_frame with SP(new) = [newSP] + offset.
2006
// This emitter is useful if we already have calculated a pointer
2007
// into the to-be-allocated stack space, e.g. with special alignment properties,
2008
// but need some additional space, e.g. for spilling.
2009
// newSP is the pre-calculated pointer. It must not be modified.
2010
// fp holds, or is filled with, the frame pointer.
2011
// offset is the additional increment which is added to addr to form the new SP.
2012
// Note: specify a negative value to reserve more space!
2013
// load_fp == true only indicates that fp is not pre-filled with the frame pointer.
2014
// It does not guarantee that fp contains the frame pointer at the end.
2015
void MacroAssembler::resize_frame_abs_with_offset(Register newSP, Register fp, int offset, bool load_fp) {
2016
assert_different_registers(newSP, fp, Z_SP);
2017
2018
if (load_fp) {
2019
z_lg(fp, _z_abi(callers_sp), Z_SP);
2020
}
2021
2022
add2reg(Z_SP, offset, newSP);
2023
z_stg(fp, _z_abi(callers_sp), Z_SP);
2024
}
2025
2026
// Resize_frame with SP(new) = [newSP].
2027
// load_fp == true only indicates that fp is not pre-filled with the frame pointer.
2028
// It does not guarantee that fp contains the frame pointer at the end.
2029
void MacroAssembler::resize_frame_absolute(Register newSP, Register fp, bool load_fp) {
2030
assert_different_registers(newSP, fp, Z_SP);
2031
2032
if (load_fp) {
2033
z_lg(fp, _z_abi(callers_sp), Z_SP); // need to use load/store.
2034
}
2035
2036
z_lgr(Z_SP, newSP);
2037
if (newSP != Z_R0) { // make sure we generate correct code, no matter what register newSP uses.
2038
z_stg(fp, _z_abi(callers_sp), newSP);
2039
} else {
2040
z_stg(fp, _z_abi(callers_sp), Z_SP);
2041
}
2042
}
2043
2044
// Resize_frame with SP(new) = SP(old) + offset.
2045
void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) {
2046
assert_different_registers(fp, Z_SP);
2047
2048
if (load_fp) {
2049
z_lg(fp, _z_abi(callers_sp), Z_SP);
2050
}
2051
add64(Z_SP, offset);
2052
z_stg(fp, _z_abi(callers_sp), Z_SP);
2053
}
2054
2055
void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) {
2056
#ifdef ASSERT
2057
assert_different_registers(bytes, old_sp, Z_SP);
2058
if (!copy_sp) {
2059
z_cgr(old_sp, Z_SP);
2060
asm_assert_eq("[old_sp]!=[Z_SP]", 0x211);
2061
}
2062
#endif
2063
if (copy_sp) { z_lgr(old_sp, Z_SP); }
2064
if (bytes_with_inverted_sign) {
2065
z_agr(Z_SP, bytes);
2066
} else {
2067
z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster.
2068
}
2069
z_stg(old_sp, _z_abi(callers_sp), Z_SP);
2070
}
2071
2072
unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) {
2073
long offset = Assembler::align(bytes, frame::alignment_in_bytes);
2074
assert(offset > 0, "should push a frame with positive size, size = %ld.", offset);
2075
assert(Displacement::is_validDisp(-offset), "frame size out of range, size = %ld", offset);
2076
2077
// We must not write outside the current stack bounds (given by Z_SP).
2078
// Thus, we have to first update Z_SP and then store the previous SP as stack linkage.
2079
// We rely on Z_R0 by default to be available as scratch.
2080
z_lgr(scratch, Z_SP);
2081
add2reg(Z_SP, -offset);
2082
z_stg(scratch, _z_abi(callers_sp), Z_SP);
2083
#ifdef ASSERT
2084
// Just make sure nobody uses the value in the default scratch register.
2085
// When another register is used, the caller might rely on it containing the frame pointer.
2086
if (scratch == Z_R0) {
2087
z_iihf(scratch, 0xbaadbabe);
2088
z_iilf(scratch, 0xdeadbeef);
2089
}
2090
#endif
2091
return offset;
2092
}
2093
2094
// Push a frame of size `bytes' plus abi160 on top.
2095
unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) {
2096
BLOCK_COMMENT("push_frame_abi160 {");
2097
unsigned int res = push_frame(bytes + frame::z_abi_160_size);
2098
BLOCK_COMMENT("} push_frame_abi160");
2099
return res;
2100
}
2101
2102
// Pop current C frame.
2103
void MacroAssembler::pop_frame() {
2104
BLOCK_COMMENT("pop_frame:");
2105
Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP);
2106
}
2107
2108
// Pop current C frame and restore return PC register (Z_R14).
2109
void MacroAssembler::pop_frame_restore_retPC(int frame_size_in_bytes) {
2110
BLOCK_COMMENT("pop_frame_restore_retPC:");
2111
int retPC_offset = _z_abi16(return_pc) + frame_size_in_bytes;
2112
// If possible, pop frame by add instead of load (a penny saved is a penny got :-).
2113
if (Displacement::is_validDisp(retPC_offset)) {
2114
z_lg(Z_R14, retPC_offset, Z_SP);
2115
add2reg(Z_SP, frame_size_in_bytes);
2116
} else {
2117
add2reg(Z_SP, frame_size_in_bytes);
2118
restore_return_pc();
2119
}
2120
}
2121
2122
void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) {
2123
if (allow_relocation) {
2124
call_c(entry_point);
2125
} else {
2126
call_c_static(entry_point);
2127
}
2128
}
2129
2130
void MacroAssembler::call_VM_leaf_base(address entry_point) {
2131
bool allow_relocation = true;
2132
call_VM_leaf_base(entry_point, allow_relocation);
2133
}
2134
2135
void MacroAssembler::call_VM_base(Register oop_result,
2136
Register last_java_sp,
2137
address entry_point,
2138
bool allow_relocation,
2139
bool check_exceptions) { // Defaults to true.
2140
// Allow_relocation indicates, if true, that the generated code shall
2141
// be fit for code relocation or referenced data relocation. In other
2142
// words: all addresses must be considered variable. PC-relative addressing
2143
// is not possible then.
2144
// On the other hand, if (allow_relocation == false), addresses and offsets
2145
// may be considered stable, enabling us to take advantage of some PC-relative
2146
// addressing tweaks. These might improve performance and reduce code size.
2147
2148
// Determine last_java_sp register.
2149
if (!last_java_sp->is_valid()) {
2150
last_java_sp = Z_SP; // Load Z_SP as SP.
2151
}
2152
2153
set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, Z_R1, allow_relocation);
2154
2155
// ARG1 must hold thread address.
2156
z_lgr(Z_ARG1, Z_thread);
2157
2158
address return_pc = NULL;
2159
if (allow_relocation) {
2160
return_pc = call_c(entry_point);
2161
} else {
2162
return_pc = call_c_static(entry_point);
2163
}
2164
2165
reset_last_Java_frame(allow_relocation);
2166
2167
// C++ interp handles this in the interpreter.
2168
check_and_handle_popframe(Z_thread);
2169
check_and_handle_earlyret(Z_thread);
2170
2171
// Check for pending exceptions.
2172
if (check_exceptions) {
2173
// Check for pending exceptions (java_thread is set upon return).
2174
load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset()));
2175
2176
// This used to conditionally jump to forward_exception however it is
2177
// possible if we relocate that the branch will not reach. So we must jump
2178
// around so we can always reach.
2179
2180
Label ok;
2181
z_bre(ok); // Bcondequal is the same as bcondZero.
2182
call_stub(StubRoutines::forward_exception_entry());
2183
bind(ok);
2184
}
2185
2186
// Get oop result if there is one and reset the value in the thread.
2187
if (oop_result->is_valid()) {
2188
get_vm_result(oop_result);
2189
}
2190
2191
_last_calls_return_pc = return_pc; // Wipe out other (error handling) calls.
2192
}
2193
2194
void MacroAssembler::call_VM_base(Register oop_result,
2195
Register last_java_sp,
2196
address entry_point,
2197
bool check_exceptions) { // Defaults to true.
2198
bool allow_relocation = true;
2199
call_VM_base(oop_result, last_java_sp, entry_point, allow_relocation, check_exceptions);
2200
}
2201
2202
// VM calls without explicit last_java_sp.
2203
2204
void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
2205
// Call takes possible detour via InterpreterMacroAssembler.
2206
call_VM_base(oop_result, noreg, entry_point, true, check_exceptions);
2207
}
2208
2209
void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
2210
// Z_ARG1 is reserved for the thread.
2211
lgr_if_needed(Z_ARG2, arg_1);
2212
call_VM(oop_result, entry_point, check_exceptions);
2213
}
2214
2215
void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
2216
// Z_ARG1 is reserved for the thread.
2217
lgr_if_needed(Z_ARG2, arg_1);
2218
assert(arg_2 != Z_ARG2, "smashed argument");
2219
lgr_if_needed(Z_ARG3, arg_2);
2220
call_VM(oop_result, entry_point, check_exceptions);
2221
}
2222
2223
void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2,
2224
Register arg_3, bool check_exceptions) {
2225
// Z_ARG1 is reserved for the thread.
2226
lgr_if_needed(Z_ARG2, arg_1);
2227
assert(arg_2 != Z_ARG2, "smashed argument");
2228
lgr_if_needed(Z_ARG3, arg_2);
2229
assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
2230
lgr_if_needed(Z_ARG4, arg_3);
2231
call_VM(oop_result, entry_point, check_exceptions);
2232
}
2233
2234
// VM static calls without explicit last_java_sp.
2235
2236
void MacroAssembler::call_VM_static(Register oop_result, address entry_point, bool check_exceptions) {
2237
// Call takes possible detour via InterpreterMacroAssembler.
2238
call_VM_base(oop_result, noreg, entry_point, false, check_exceptions);
2239
}
2240
2241
void MacroAssembler::call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2,
2242
Register arg_3, bool check_exceptions) {
2243
// Z_ARG1 is reserved for the thread.
2244
lgr_if_needed(Z_ARG2, arg_1);
2245
assert(arg_2 != Z_ARG2, "smashed argument");
2246
lgr_if_needed(Z_ARG3, arg_2);
2247
assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
2248
lgr_if_needed(Z_ARG4, arg_3);
2249
call_VM_static(oop_result, entry_point, check_exceptions);
2250
}
2251
2252
// VM calls with explicit last_java_sp.
2253
2254
void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions) {
2255
// Call takes possible detour via InterpreterMacroAssembler.
2256
call_VM_base(oop_result, last_java_sp, entry_point, true, check_exceptions);
2257
}
2258
2259
void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
2260
// Z_ARG1 is reserved for the thread.
2261
lgr_if_needed(Z_ARG2, arg_1);
2262
call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
2263
}
2264
2265
void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1,
2266
Register arg_2, bool check_exceptions) {
2267
// Z_ARG1 is reserved for the thread.
2268
lgr_if_needed(Z_ARG2, arg_1);
2269
assert(arg_2 != Z_ARG2, "smashed argument");
2270
lgr_if_needed(Z_ARG3, arg_2);
2271
call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
2272
}
2273
2274
void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1,
2275
Register arg_2, Register arg_3, bool check_exceptions) {
2276
// Z_ARG1 is reserved for the thread.
2277
lgr_if_needed(Z_ARG2, arg_1);
2278
assert(arg_2 != Z_ARG2, "smashed argument");
2279
lgr_if_needed(Z_ARG3, arg_2);
2280
assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument");
2281
lgr_if_needed(Z_ARG4, arg_3);
2282
call_VM(oop_result, last_java_sp, entry_point, check_exceptions);
2283
}
2284
2285
// VM leaf calls.
2286
2287
void MacroAssembler::call_VM_leaf(address entry_point) {
2288
// Call takes possible detour via InterpreterMacroAssembler.
2289
call_VM_leaf_base(entry_point, true);
2290
}
2291
2292
void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
2293
if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2294
call_VM_leaf(entry_point);
2295
}
2296
2297
void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
2298
if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2299
assert(arg_2 != Z_ARG1, "smashed argument");
2300
if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2301
call_VM_leaf(entry_point);
2302
}
2303
2304
void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
2305
if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2306
assert(arg_2 != Z_ARG1, "smashed argument");
2307
if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2308
assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument");
2309
if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3);
2310
call_VM_leaf(entry_point);
2311
}
2312
2313
// Static VM leaf calls.
2314
// Really static VM leaf calls are never patched.
2315
2316
void MacroAssembler::call_VM_leaf_static(address entry_point) {
2317
// Call takes possible detour via InterpreterMacroAssembler.
2318
call_VM_leaf_base(entry_point, false);
2319
}
2320
2321
void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1) {
2322
if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2323
call_VM_leaf_static(entry_point);
2324
}
2325
2326
void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2) {
2327
if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2328
assert(arg_2 != Z_ARG1, "smashed argument");
2329
if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2330
call_VM_leaf_static(entry_point);
2331
}
2332
2333
void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
2334
if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1);
2335
assert(arg_2 != Z_ARG1, "smashed argument");
2336
if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2);
2337
assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument");
2338
if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3);
2339
call_VM_leaf_static(entry_point);
2340
}
2341
2342
// Don't use detour via call_c(reg).
2343
address MacroAssembler::call_c(address function_entry) {
2344
load_const(Z_R1, function_entry);
2345
return call(Z_R1);
2346
}
2347
2348
// Variant for really static (non-relocatable) calls which are never patched.
2349
address MacroAssembler::call_c_static(address function_entry) {
2350
load_absolute_address(Z_R1, function_entry);
2351
#if 0 // def ASSERT
2352
// Verify that call site did not move.
2353
load_const_optimized(Z_R0, function_entry);
2354
z_cgr(Z_R1, Z_R0);
2355
z_brc(bcondEqual, 3);
2356
z_illtrap(0xba);
2357
#endif
2358
return call(Z_R1);
2359
}
2360
2361
address MacroAssembler::call_c_opt(address function_entry) {
2362
bool success = call_far_patchable(function_entry, -2 /* emit relocation + constant */);
2363
_last_calls_return_pc = success ? pc() : NULL;
2364
return _last_calls_return_pc;
2365
}
2366
2367
// Identify a call_far_patchable instruction: LARL + LG + BASR
2368
//
2369
// nop ; optionally, if required for alignment
2370
// lgrl rx,A(TOC entry) ; PC-relative access into constant pool
2371
// basr Z_R14,rx ; end of this instruction must be aligned to a word boundary
2372
//
2373
// Code pattern will eventually get patched into variant2 (see below for detection code).
2374
//
2375
bool MacroAssembler::is_call_far_patchable_variant0_at(address instruction_addr) {
2376
address iaddr = instruction_addr;
2377
2378
// Check for the actual load instruction.
2379
if (!is_load_const_from_toc(iaddr)) { return false; }
2380
iaddr += load_const_from_toc_size();
2381
2382
// Check for the call (BASR) instruction, finally.
2383
assert(iaddr-instruction_addr+call_byregister_size() == call_far_patchable_size(), "size mismatch");
2384
return is_call_byregister(iaddr);
2385
}
2386
2387
// Identify a call_far_patchable instruction: BRASL
2388
//
2389
// Code pattern to suits atomic patching:
2390
// nop ; Optionally, if required for alignment.
2391
// nop ... ; Multiple filler nops to compensate for size difference (variant0 is longer).
2392
// nop ; For code pattern detection: Prepend each BRASL with a nop.
2393
// brasl Z_R14,<reladdr> ; End of code must be 4-byte aligned !
2394
bool MacroAssembler::is_call_far_patchable_variant2_at(address instruction_addr) {
2395
const address call_addr = (address)((intptr_t)instruction_addr + call_far_patchable_size() - call_far_pcrelative_size());
2396
2397
// Check for correct number of leading nops.
2398
address iaddr;
2399
for (iaddr = instruction_addr; iaddr < call_addr; iaddr += nop_size()) {
2400
if (!is_z_nop(iaddr)) { return false; }
2401
}
2402
assert(iaddr == call_addr, "sanity");
2403
2404
// --> Check for call instruction.
2405
if (is_call_far_pcrelative(call_addr)) {
2406
assert(call_addr-instruction_addr+call_far_pcrelative_size() == call_far_patchable_size(), "size mismatch");
2407
return true;
2408
}
2409
2410
return false;
2411
}
2412
2413
// Emit a NOT mt-safely patchable 64 bit absolute call.
2414
// If toc_offset == -2, then the destination of the call (= target) is emitted
2415
// to the constant pool and a runtime_call relocation is added
2416
// to the code buffer.
2417
// If toc_offset != -2, target must already be in the constant pool at
2418
// _ctableStart+toc_offset (a caller can retrieve toc_offset
2419
// from the runtime_call relocation).
2420
// Special handling of emitting to scratch buffer when there is no constant pool.
2421
// Slightly changed code pattern. We emit an additional nop if we would
2422
// not end emitting at a word aligned address. This is to ensure
2423
// an atomically patchable displacement in brasl instructions.
2424
//
2425
// A call_far_patchable comes in different flavors:
2426
// - LARL(CP) / LG(CP) / BR (address in constant pool, access via CP register)
2427
// - LGRL(CP) / BR (address in constant pool, pc-relative accesss)
2428
// - BRASL (relative address of call target coded in instruction)
2429
// All flavors occupy the same amount of space. Length differences are compensated
2430
// by leading nops, such that the instruction sequence always ends at the same
2431
// byte offset. This is required to keep the return offset constant.
2432
// Furthermore, the return address (the end of the instruction sequence) is forced
2433
// to be on a 4-byte boundary. This is required for atomic patching, should we ever
2434
// need to patch the call target of the BRASL flavor.
2435
// RETURN value: false, if no constant pool entry could be allocated, true otherwise.
2436
bool MacroAssembler::call_far_patchable(address target, int64_t tocOffset) {
2437
// Get current pc and ensure word alignment for end of instr sequence.
2438
const address start_pc = pc();
2439
const intptr_t start_off = offset();
2440
assert(!call_far_patchable_requires_alignment_nop(start_pc), "call_far_patchable requires aligned address");
2441
const ptrdiff_t dist = (ptrdiff_t)(target - (start_pc + 2)); // Prepend each BRASL with a nop.
2442
const bool emit_target_to_pool = (tocOffset == -2) && !code_section()->scratch_emit();
2443
const bool emit_relative_call = !emit_target_to_pool &&
2444
RelAddr::is_in_range_of_RelAddr32(dist) &&
2445
ReoptimizeCallSequences &&
2446
!code_section()->scratch_emit();
2447
2448
if (emit_relative_call) {
2449
// Add padding to get the same size as below.
2450
const unsigned int padding = call_far_patchable_size() - call_far_pcrelative_size();
2451
unsigned int current_padding;
2452
for (current_padding = 0; current_padding < padding; current_padding += nop_size()) { z_nop(); }
2453
assert(current_padding == padding, "sanity");
2454
2455
// relative call: len = 2(nop) + 6 (brasl)
2456
// CodeBlob resize cannot occur in this case because
2457
// this call is emitted into pre-existing space.
2458
z_nop(); // Prepend each BRASL with a nop.
2459
z_brasl(Z_R14, target);
2460
} else {
2461
// absolute call: Get address from TOC.
2462
// len = (load TOC){6|0} + (load from TOC){6} + (basr){2} = {14|8}
2463
if (emit_target_to_pool) {
2464
// When emitting the call for the first time, we do not need to use
2465
// the pc-relative version. It will be patched anyway, when the code
2466
// buffer is copied.
2467
// Relocation is not needed when !ReoptimizeCallSequences.
2468
relocInfo::relocType rt = ReoptimizeCallSequences ? relocInfo::runtime_call_w_cp_type : relocInfo::none;
2469
AddressLiteral dest(target, rt);
2470
// Store_oop_in_toc() adds dest to the constant table. As side effect, this kills
2471
// inst_mark(). Reset if possible.
2472
bool reset_mark = (inst_mark() == pc());
2473
tocOffset = store_oop_in_toc(dest);
2474
if (reset_mark) { set_inst_mark(); }
2475
if (tocOffset == -1) {
2476
return false; // Couldn't create constant pool entry.
2477
}
2478
}
2479
assert(offset() == start_off, "emit no code before this point!");
2480
2481
address tocPos = pc() + tocOffset;
2482
if (emit_target_to_pool) {
2483
tocPos = code()->consts()->start() + tocOffset;
2484
}
2485
load_long_pcrelative(Z_R14, tocPos);
2486
z_basr(Z_R14, Z_R14);
2487
}
2488
2489
#ifdef ASSERT
2490
// Assert that we can identify the emitted call.
2491
assert(is_call_far_patchable_at(addr_at(start_off)), "can't identify emitted call");
2492
assert(offset() == start_off+call_far_patchable_size(), "wrong size");
2493
2494
if (emit_target_to_pool) {
2495
assert(get_dest_of_call_far_patchable_at(addr_at(start_off), code()->consts()->start()) == target,
2496
"wrong encoding of dest address");
2497
}
2498
#endif
2499
return true; // success
2500
}
2501
2502
// Identify a call_far_patchable instruction.
2503
// For more detailed information see header comment of call_far_patchable.
2504
bool MacroAssembler::is_call_far_patchable_at(address instruction_addr) {
2505
return is_call_far_patchable_variant2_at(instruction_addr) || // short version: BRASL
2506
is_call_far_patchable_variant0_at(instruction_addr); // long version LARL + LG + BASR
2507
}
2508
2509
// Does the call_far_patchable instruction use a pc-relative encoding
2510
// of the call destination?
2511
bool MacroAssembler::is_call_far_patchable_pcrelative_at(address instruction_addr) {
2512
// Variant 2 is pc-relative.
2513
return is_call_far_patchable_variant2_at(instruction_addr);
2514
}
2515
2516
bool MacroAssembler::is_call_far_pcrelative(address instruction_addr) {
2517
// Prepend each BRASL with a nop.
2518
return is_z_nop(instruction_addr) && is_z_brasl(instruction_addr + nop_size()); // Match at position after one nop required.
2519
}
2520
2521
// Set destination address of a call_far_patchable instruction.
2522
void MacroAssembler::set_dest_of_call_far_patchable_at(address instruction_addr, address dest, int64_t tocOffset) {
2523
ResourceMark rm;
2524
2525
// Now that CP entry is verified, patch call to a pc-relative call (if circumstances permit).
2526
int code_size = MacroAssembler::call_far_patchable_size();
2527
CodeBuffer buf(instruction_addr, code_size);
2528
MacroAssembler masm(&buf);
2529
masm.call_far_patchable(dest, tocOffset);
2530
ICache::invalidate_range(instruction_addr, code_size); // Empty on z.
2531
}
2532
2533
// Get dest address of a call_far_patchable instruction.
2534
address MacroAssembler::get_dest_of_call_far_patchable_at(address instruction_addr, address ctable) {
2535
// Dynamic TOC: absolute address in constant pool.
2536
// Check variant2 first, it is more frequent.
2537
2538
// Relative address encoded in call instruction.
2539
if (is_call_far_patchable_variant2_at(instruction_addr)) {
2540
return MacroAssembler::get_target_addr_pcrel(instruction_addr + nop_size()); // Prepend each BRASL with a nop.
2541
2542
// Absolute address in constant pool.
2543
} else if (is_call_far_patchable_variant0_at(instruction_addr)) {
2544
address iaddr = instruction_addr;
2545
2546
long tocOffset = get_load_const_from_toc_offset(iaddr);
2547
address tocLoc = iaddr + tocOffset;
2548
return *(address *)(tocLoc);
2549
} else {
2550
fprintf(stderr, "MacroAssembler::get_dest_of_call_far_patchable_at has a problem at %p:\n", instruction_addr);
2551
fprintf(stderr, "not a call_far_patchable: %16.16lx %16.16lx, len = %d\n",
2552
*(unsigned long*)instruction_addr,
2553
*(unsigned long*)(instruction_addr+8),
2554
call_far_patchable_size());
2555
Disassembler::decode(instruction_addr, instruction_addr+call_far_patchable_size());
2556
ShouldNotReachHere();
2557
return NULL;
2558
}
2559
}
2560
2561
void MacroAssembler::align_call_far_patchable(address pc) {
2562
if (call_far_patchable_requires_alignment_nop(pc)) { z_nop(); }
2563
}
2564
2565
void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
2566
}
2567
2568
void MacroAssembler::check_and_handle_popframe(Register java_thread) {
2569
}
2570
2571
// Read from the polling page.
2572
// Use TM or TMY instruction, depending on read offset.
2573
// offset = 0: Use TM, safepoint polling.
2574
// offset < 0: Use TMY, profiling safepoint polling.
2575
void MacroAssembler::load_from_polling_page(Register polling_page_address, int64_t offset) {
2576
if (Immediate::is_uimm12(offset)) {
2577
z_tm(offset, polling_page_address, mask_safepoint);
2578
} else {
2579
z_tmy(offset, polling_page_address, mask_profiling);
2580
}
2581
}
2582
2583
// Check whether z_instruction is a read access to the polling page
2584
// which was emitted by load_from_polling_page(..).
2585
bool MacroAssembler::is_load_from_polling_page(address instr_loc) {
2586
unsigned long z_instruction;
2587
unsigned int ilen = get_instruction(instr_loc, &z_instruction);
2588
2589
if (ilen == 2) { return false; } // It's none of the allowed instructions.
2590
2591
if (ilen == 4) {
2592
if (!is_z_tm(z_instruction)) { return false; } // It's len=4, but not a z_tm. fail.
2593
2594
int ms = inv_mask(z_instruction,8,32); // mask
2595
int ra = inv_reg(z_instruction,16,32); // base register
2596
int ds = inv_uimm12(z_instruction); // displacement
2597
2598
if (!(ds == 0 && ra != 0 && ms == mask_safepoint)) {
2599
return false; // It's not a z_tm(0, ra, mask_safepoint). Fail.
2600
}
2601
2602
} else { /* if (ilen == 6) */
2603
2604
assert(!is_z_lg(z_instruction), "old form (LG) polling page access. Please fix and use TM(Y).");
2605
2606
if (!is_z_tmy(z_instruction)) { return false; } // It's len=6, but not a z_tmy. fail.
2607
2608
int ms = inv_mask(z_instruction,8,48); // mask
2609
int ra = inv_reg(z_instruction,16,48); // base register
2610
int ds = inv_simm20(z_instruction); // displacement
2611
}
2612
2613
return true;
2614
}
2615
2616
// Extract poll address from instruction and ucontext.
2617
address MacroAssembler::get_poll_address(address instr_loc, void* ucontext) {
2618
assert(ucontext != NULL, "must have ucontext");
2619
ucontext_t* uc = (ucontext_t*) ucontext;
2620
unsigned long z_instruction;
2621
unsigned int ilen = get_instruction(instr_loc, &z_instruction);
2622
2623
if (ilen == 4 && is_z_tm(z_instruction)) {
2624
int ra = inv_reg(z_instruction, 16, 32); // base register
2625
int ds = inv_uimm12(z_instruction); // displacement
2626
address addr = (address)uc->uc_mcontext.gregs[ra];
2627
return addr + ds;
2628
} else if (ilen == 6 && is_z_tmy(z_instruction)) {
2629
int ra = inv_reg(z_instruction, 16, 48); // base register
2630
int ds = inv_simm20(z_instruction); // displacement
2631
address addr = (address)uc->uc_mcontext.gregs[ra];
2632
return addr + ds;
2633
}
2634
2635
ShouldNotReachHere();
2636
return NULL;
2637
}
2638
2639
// Extract poll register from instruction.
2640
uint MacroAssembler::get_poll_register(address instr_loc) {
2641
unsigned long z_instruction;
2642
unsigned int ilen = get_instruction(instr_loc, &z_instruction);
2643
2644
if (ilen == 4 && is_z_tm(z_instruction)) {
2645
return (uint)inv_reg(z_instruction, 16, 32); // base register
2646
} else if (ilen == 6 && is_z_tmy(z_instruction)) {
2647
return (uint)inv_reg(z_instruction, 16, 48); // base register
2648
}
2649
2650
ShouldNotReachHere();
2651
return 0;
2652
}
2653
2654
void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) {
2655
const Address poll_byte_addr(Z_thread, in_bytes(JavaThread::polling_word_offset()) + 7 /* Big Endian */);
2656
// Armed page has poll_bit set.
2657
z_tm(poll_byte_addr, SafepointMechanism::poll_bit());
2658
z_brnaz(slow_path);
2659
}
2660
2661
// Don't rely on register locking, always use Z_R1 as scratch register instead.
2662
void MacroAssembler::bang_stack_with_offset(int offset) {
2663
// Stack grows down, caller passes positive offset.
2664
assert(offset > 0, "must bang with positive offset");
2665
if (Displacement::is_validDisp(-offset)) {
2666
z_tmy(-offset, Z_SP, mask_stackbang);
2667
} else {
2668
add2reg(Z_R1, -offset, Z_SP); // Do not destroy Z_SP!!!
2669
z_tm(0, Z_R1, mask_stackbang); // Just banging.
2670
}
2671
}
2672
2673
void MacroAssembler::reserved_stack_check(Register return_pc) {
2674
// Test if reserved zone needs to be enabled.
2675
Label no_reserved_zone_enabling;
2676
assert(return_pc == Z_R14, "Return pc must be in R14 before z_br() to StackOverflow stub.");
2677
BLOCK_COMMENT("reserved_stack_check {");
2678
2679
z_clg(Z_SP, Address(Z_thread, JavaThread::reserved_stack_activation_offset()));
2680
z_brl(no_reserved_zone_enabling);
2681
2682
// Enable reserved zone again, throw stack overflow exception.
2683
save_return_pc();
2684
push_frame_abi160(0);
2685
call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), Z_thread);
2686
pop_frame();
2687
restore_return_pc();
2688
2689
load_const_optimized(Z_R1, StubRoutines::throw_delayed_StackOverflowError_entry());
2690
// Don't use call() or z_basr(), they will invalidate Z_R14 which contains the return pc.
2691
z_br(Z_R1);
2692
2693
should_not_reach_here();
2694
2695
bind(no_reserved_zone_enabling);
2696
BLOCK_COMMENT("} reserved_stack_check");
2697
}
2698
2699
// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
2700
void MacroAssembler::tlab_allocate(Register obj,
2701
Register var_size_in_bytes,
2702
int con_size_in_bytes,
2703
Register t1,
2704
Label& slow_case) {
2705
assert_different_registers(obj, var_size_in_bytes, t1);
2706
Register end = t1;
2707
Register thread = Z_thread;
2708
2709
z_lg(obj, Address(thread, JavaThread::tlab_top_offset()));
2710
if (var_size_in_bytes == noreg) {
2711
z_lay(end, Address(obj, con_size_in_bytes));
2712
} else {
2713
z_lay(end, Address(obj, var_size_in_bytes));
2714
}
2715
z_cg(end, Address(thread, JavaThread::tlab_end_offset()));
2716
branch_optimized(bcondHigh, slow_case);
2717
2718
// Update the tlab top pointer.
2719
z_stg(end, Address(thread, JavaThread::tlab_top_offset()));
2720
2721
// Recover var_size_in_bytes if necessary.
2722
if (var_size_in_bytes == end) {
2723
z_sgr(var_size_in_bytes, obj);
2724
}
2725
}
2726
2727
// Emitter for interface method lookup.
2728
// input: recv_klass, intf_klass, itable_index
2729
// output: method_result
2730
// kills: itable_index, temp1_reg, Z_R0, Z_R1
2731
// TODO: Temp2_reg is unused. we may use this emitter also in the itable stubs.
2732
// If the register is still not needed then, remove it.
2733
void MacroAssembler::lookup_interface_method(Register recv_klass,
2734
Register intf_klass,
2735
RegisterOrConstant itable_index,
2736
Register method_result,
2737
Register temp1_reg,
2738
Label& no_such_interface,
2739
bool return_method) {
2740
2741
const Register vtable_len = temp1_reg; // Used to compute itable_entry_addr.
2742
const Register itable_entry_addr = Z_R1_scratch;
2743
const Register itable_interface = Z_R0_scratch;
2744
2745
BLOCK_COMMENT("lookup_interface_method {");
2746
2747
// Load start of itable entries into itable_entry_addr.
2748
z_llgf(vtable_len, Address(recv_klass, Klass::vtable_length_offset()));
2749
z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes()));
2750
2751
// Loop over all itable entries until desired interfaceOop(Rinterface) found.
2752
const int vtable_base_offset = in_bytes(Klass::vtable_start_offset());
2753
2754
add2reg_with_index(itable_entry_addr,
2755
vtable_base_offset + itableOffsetEntry::interface_offset_in_bytes(),
2756
recv_klass, vtable_len);
2757
2758
const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize;
2759
Label search;
2760
2761
bind(search);
2762
2763
// Handle IncompatibleClassChangeError.
2764
// If the entry is NULL then we've reached the end of the table
2765
// without finding the expected interface, so throw an exception.
2766
load_and_test_long(itable_interface, Address(itable_entry_addr));
2767
z_bre(no_such_interface);
2768
2769
add2reg(itable_entry_addr, itable_offset_search_inc);
2770
z_cgr(itable_interface, intf_klass);
2771
z_brne(search);
2772
2773
// Entry found and itable_entry_addr points to it, get offset of vtable for interface.
2774
if (return_method) {
2775
const int vtable_offset_offset = (itableOffsetEntry::offset_offset_in_bytes() -
2776
itableOffsetEntry::interface_offset_in_bytes()) -
2777
itable_offset_search_inc;
2778
2779
// Compute itableMethodEntry and get method and entry point
2780
// we use addressing with index and displacement, since the formula
2781
// for computing the entry's offset has a fixed and a dynamic part,
2782
// the latter depending on the matched interface entry and on the case,
2783
// that the itable index has been passed as a register, not a constant value.
2784
int method_offset = itableMethodEntry::method_offset_in_bytes();
2785
// Fixed part (displacement), common operand.
2786
Register itable_offset = method_result; // Dynamic part (index register).
2787
2788
if (itable_index.is_register()) {
2789
// Compute the method's offset in that register, for the formula, see the
2790
// else-clause below.
2791
z_sllg(itable_offset, itable_index.as_register(), exact_log2(itableMethodEntry::size() * wordSize));
2792
z_agf(itable_offset, vtable_offset_offset, itable_entry_addr);
2793
} else {
2794
// Displacement increases.
2795
method_offset += itableMethodEntry::size() * wordSize * itable_index.as_constant();
2796
2797
// Load index from itable.
2798
z_llgf(itable_offset, vtable_offset_offset, itable_entry_addr);
2799
}
2800
2801
// Finally load the method's oop.
2802
z_lg(method_result, method_offset, itable_offset, recv_klass);
2803
}
2804
BLOCK_COMMENT("} lookup_interface_method");
2805
}
2806
2807
// Lookup for virtual method invocation.
2808
void MacroAssembler::lookup_virtual_method(Register recv_klass,
2809
RegisterOrConstant vtable_index,
2810
Register method_result) {
2811
assert_different_registers(recv_klass, vtable_index.register_or_noreg());
2812
assert(vtableEntry::size() * wordSize == wordSize,
2813
"else adjust the scaling in the code below");
2814
2815
BLOCK_COMMENT("lookup_virtual_method {");
2816
2817
const int base = in_bytes(Klass::vtable_start_offset());
2818
2819
if (vtable_index.is_constant()) {
2820
// Load with base + disp.
2821
Address vtable_entry_addr(recv_klass,
2822
vtable_index.as_constant() * wordSize +
2823
base +
2824
vtableEntry::method_offset_in_bytes());
2825
2826
z_lg(method_result, vtable_entry_addr);
2827
} else {
2828
// Shift index properly and load with base + index + disp.
2829
Register vindex = vtable_index.as_register();
2830
Address vtable_entry_addr(recv_klass, vindex,
2831
base + vtableEntry::method_offset_in_bytes());
2832
2833
z_sllg(vindex, vindex, exact_log2(wordSize));
2834
z_lg(method_result, vtable_entry_addr);
2835
}
2836
BLOCK_COMMENT("} lookup_virtual_method");
2837
}
2838
2839
// Factor out code to call ic_miss_handler.
2840
// Generate code to call the inline cache miss handler.
2841
//
2842
// In most cases, this code will be generated out-of-line.
2843
// The method parameters are intended to provide some variability.
2844
// ICM - Label which has to be bound to the start of useful code (past any traps).
2845
// trapMarker - Marking byte for the generated illtrap instructions (if any).
2846
// Any value except 0x00 is supported.
2847
// = 0x00 - do not generate illtrap instructions.
2848
// use nops to fill ununsed space.
2849
// requiredSize - required size of the generated code. If the actually
2850
// generated code is smaller, use padding instructions to fill up.
2851
// = 0 - no size requirement, no padding.
2852
// scratch - scratch register to hold branch target address.
2853
//
2854
// The method returns the code offset of the bound label.
2855
unsigned int MacroAssembler::call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch) {
2856
intptr_t startOffset = offset();
2857
2858
// Prevent entry at content_begin().
2859
if (trapMarker != 0) {
2860
z_illtrap(trapMarker);
2861
}
2862
2863
// Load address of inline cache miss code into scratch register
2864
// and branch to cache miss handler.
2865
BLOCK_COMMENT("IC miss handler {");
2866
BIND(ICM);
2867
unsigned int labelOffset = offset();
2868
AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub());
2869
2870
load_const_optimized(scratch, icmiss);
2871
z_br(scratch);
2872
2873
// Fill unused space.
2874
if (requiredSize > 0) {
2875
while ((offset() - startOffset) < requiredSize) {
2876
if (trapMarker == 0) {
2877
z_nop();
2878
} else {
2879
z_illtrap(trapMarker);
2880
}
2881
}
2882
}
2883
BLOCK_COMMENT("} IC miss handler");
2884
return labelOffset;
2885
}
2886
2887
void MacroAssembler::nmethod_UEP(Label& ic_miss) {
2888
Register ic_reg = Z_inline_cache;
2889
int klass_offset = oopDesc::klass_offset_in_bytes();
2890
if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
2891
if (VM_Version::has_CompareBranch()) {
2892
z_cgij(Z_ARG1, 0, Assembler::bcondEqual, ic_miss);
2893
} else {
2894
z_ltgr(Z_ARG1, Z_ARG1);
2895
z_bre(ic_miss);
2896
}
2897
}
2898
// Compare cached class against klass from receiver.
2899
compare_klass_ptr(ic_reg, klass_offset, Z_ARG1, false);
2900
z_brne(ic_miss);
2901
}
2902
2903
void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
2904
Register super_klass,
2905
Register temp1_reg,
2906
Label* L_success,
2907
Label* L_failure,
2908
Label* L_slow_path,
2909
RegisterOrConstant super_check_offset) {
2910
2911
const int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
2912
const int sco_offset = in_bytes(Klass::super_check_offset_offset());
2913
2914
bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
2915
bool need_slow_path = (must_load_sco ||
2916
super_check_offset.constant_or_zero() == sc_offset);
2917
2918
// Input registers must not overlap.
2919
assert_different_registers(sub_klass, super_klass, temp1_reg);
2920
if (super_check_offset.is_register()) {
2921
assert_different_registers(sub_klass, super_klass,
2922
super_check_offset.as_register());
2923
} else if (must_load_sco) {
2924
assert(temp1_reg != noreg, "supply either a temp or a register offset");
2925
}
2926
2927
const Register Rsuper_check_offset = temp1_reg;
2928
2929
NearLabel L_fallthrough;
2930
int label_nulls = 0;
2931
if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
2932
if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
2933
if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
2934
assert(label_nulls <= 1 ||
2935
(L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
2936
"at most one NULL in the batch, usually");
2937
2938
BLOCK_COMMENT("check_klass_subtype_fast_path {");
2939
// If the pointers are equal, we are done (e.g., String[] elements).
2940
// This self-check enables sharing of secondary supertype arrays among
2941
// non-primary types such as array-of-interface. Otherwise, each such
2942
// type would need its own customized SSA.
2943
// We move this check to the front of the fast path because many
2944
// type checks are in fact trivially successful in this manner,
2945
// so we get a nicely predicted branch right at the start of the check.
2946
compare64_and_branch(sub_klass, super_klass, bcondEqual, *L_success);
2947
2948
// Check the supertype display, which is uint.
2949
if (must_load_sco) {
2950
z_llgf(Rsuper_check_offset, sco_offset, super_klass);
2951
super_check_offset = RegisterOrConstant(Rsuper_check_offset);
2952
}
2953
Address super_check_addr(sub_klass, super_check_offset, 0);
2954
z_cg(super_klass, super_check_addr); // compare w/ displayed supertype
2955
2956
// This check has worked decisively for primary supers.
2957
// Secondary supers are sought in the super_cache ('super_cache_addr').
2958
// (Secondary supers are interfaces and very deeply nested subtypes.)
2959
// This works in the same check above because of a tricky aliasing
2960
// between the super_cache and the primary super display elements.
2961
// (The 'super_check_addr' can address either, as the case requires.)
2962
// Note that the cache is updated below if it does not help us find
2963
// what we need immediately.
2964
// So if it was a primary super, we can just fail immediately.
2965
// Otherwise, it's the slow path for us (no success at this point).
2966
2967
// Hacked jmp, which may only be used just before L_fallthrough.
2968
#define final_jmp(label) \
2969
if (&(label) == &L_fallthrough) { /*do nothing*/ } \
2970
else { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/
2971
2972
if (super_check_offset.is_register()) {
2973
branch_optimized(Assembler::bcondEqual, *L_success);
2974
z_cfi(super_check_offset.as_register(), sc_offset);
2975
if (L_failure == &L_fallthrough) {
2976
branch_optimized(Assembler::bcondEqual, *L_slow_path);
2977
} else {
2978
branch_optimized(Assembler::bcondNotEqual, *L_failure);
2979
final_jmp(*L_slow_path);
2980
}
2981
} else if (super_check_offset.as_constant() == sc_offset) {
2982
// Need a slow path; fast failure is impossible.
2983
if (L_slow_path == &L_fallthrough) {
2984
branch_optimized(Assembler::bcondEqual, *L_success);
2985
} else {
2986
branch_optimized(Assembler::bcondNotEqual, *L_slow_path);
2987
final_jmp(*L_success);
2988
}
2989
} else {
2990
// No slow path; it's a fast decision.
2991
if (L_failure == &L_fallthrough) {
2992
branch_optimized(Assembler::bcondEqual, *L_success);
2993
} else {
2994
branch_optimized(Assembler::bcondNotEqual, *L_failure);
2995
final_jmp(*L_success);
2996
}
2997
}
2998
2999
bind(L_fallthrough);
3000
#undef local_brc
3001
#undef final_jmp
3002
BLOCK_COMMENT("} check_klass_subtype_fast_path");
3003
// fallthru (to slow path)
3004
}
3005
3006
void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass,
3007
Register Rsuperklass,
3008
Register Rarray_ptr, // tmp
3009
Register Rlength, // tmp
3010
Label* L_success,
3011
Label* L_failure) {
3012
// Input registers must not overlap.
3013
// Also check for R1 which is explicitely used here.
3014
assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength);
3015
NearLabel L_fallthrough;
3016
int label_nulls = 0;
3017
if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
3018
if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
3019
assert(label_nulls <= 1, "at most one NULL in the batch");
3020
3021
const int ss_offset = in_bytes(Klass::secondary_supers_offset());
3022
const int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3023
3024
const int length_offset = Array<Klass*>::length_offset_in_bytes();
3025
const int base_offset = Array<Klass*>::base_offset_in_bytes();
3026
3027
// Hacked jmp, which may only be used just before L_fallthrough.
3028
#define final_jmp(label) \
3029
if (&(label) == &L_fallthrough) { /*do nothing*/ } \
3030
else branch_optimized(Assembler::bcondAlways, label) /*omit semicolon*/
3031
3032
NearLabel loop_iterate, loop_count, match;
3033
3034
BLOCK_COMMENT("check_klass_subtype_slow_path {");
3035
z_lg(Rarray_ptr, ss_offset, Rsubklass);
3036
3037
load_and_test_int(Rlength, Address(Rarray_ptr, length_offset));
3038
branch_optimized(Assembler::bcondZero, *L_failure);
3039
3040
// Oops in table are NO MORE compressed.
3041
z_cg(Rsuperklass, base_offset, Rarray_ptr); // Check array element for match.
3042
z_bre(match); // Shortcut for array length = 1.
3043
3044
// No match yet, so we must walk the array's elements.
3045
z_lngfr(Rlength, Rlength);
3046
z_sllg(Rlength, Rlength, LogBytesPerWord); // -#bytes of cache array
3047
z_llill(Z_R1, BytesPerWord); // Set increment/end index.
3048
add2reg(Rlength, 2 * BytesPerWord); // start index = -(n-2)*BytesPerWord
3049
z_slgr(Rarray_ptr, Rlength); // start addr: += (n-2)*BytesPerWord
3050
z_bru(loop_count);
3051
3052
BIND(loop_iterate);
3053
z_cg(Rsuperklass, base_offset, Rlength, Rarray_ptr); // Check array element for match.
3054
z_bre(match);
3055
BIND(loop_count);
3056
z_brxlg(Rlength, Z_R1, loop_iterate);
3057
3058
// Rsuperklass not found among secondary super classes -> failure.
3059
branch_optimized(Assembler::bcondAlways, *L_failure);
3060
3061
// Got a hit. Return success (zero result). Set cache.
3062
// Cache load doesn't happen here. For speed it is directly emitted by the compiler.
3063
3064
BIND(match);
3065
3066
z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache.
3067
3068
final_jmp(*L_success);
3069
3070
// Exit to the surrounding code.
3071
BIND(L_fallthrough);
3072
#undef local_brc
3073
#undef final_jmp
3074
BLOCK_COMMENT("} check_klass_subtype_slow_path");
3075
}
3076
3077
// Emitter for combining fast and slow path.
3078
void MacroAssembler::check_klass_subtype(Register sub_klass,
3079
Register super_klass,
3080
Register temp1_reg,
3081
Register temp2_reg,
3082
Label& L_success) {
3083
NearLabel failure;
3084
BLOCK_COMMENT(err_msg("check_klass_subtype(%s subclass of %s) {", sub_klass->name(), super_klass->name()));
3085
check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg,
3086
&L_success, &failure, NULL);
3087
check_klass_subtype_slow_path(sub_klass, super_klass,
3088
temp1_reg, temp2_reg, &L_success, NULL);
3089
BIND(failure);
3090
BLOCK_COMMENT("} check_klass_subtype");
3091
}
3092
3093
void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) {
3094
assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
3095
3096
Label L_fallthrough;
3097
if (L_fast_path == NULL) {
3098
L_fast_path = &L_fallthrough;
3099
} else if (L_slow_path == NULL) {
3100
L_slow_path = &L_fallthrough;
3101
}
3102
3103
// Fast path check: class is fully initialized
3104
z_cli(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized);
3105
z_bre(*L_fast_path);
3106
3107
// Fast path check: current thread is initializer thread
3108
z_cg(thread, Address(klass, InstanceKlass::init_thread_offset()));
3109
if (L_slow_path == &L_fallthrough) {
3110
z_bre(*L_fast_path);
3111
} else if (L_fast_path == &L_fallthrough) {
3112
z_brne(*L_slow_path);
3113
} else {
3114
Unimplemented();
3115
}
3116
3117
bind(L_fallthrough);
3118
}
3119
3120
// Increment a counter at counter_address when the eq condition code is
3121
// set. Kills registers tmp1_reg and tmp2_reg and preserves the condition code.
3122
void MacroAssembler::increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg) {
3123
Label l;
3124
z_brne(l);
3125
load_const(tmp1_reg, counter_address);
3126
add2mem_32(Address(tmp1_reg), 1, tmp2_reg);
3127
z_cr(tmp1_reg, tmp1_reg); // Set cc to eq.
3128
bind(l);
3129
}
3130
3131
// Semantics are dependent on the slow_case label:
3132
// If the slow_case label is not NULL, failure to biased-lock the object
3133
// transfers control to the location of the slow_case label. If the
3134
// object could be biased-locked, control is transferred to the done label.
3135
// The condition code is unpredictable.
3136
//
3137
// If the slow_case label is NULL, failure to biased-lock the object results
3138
// in a transfer of control to the done label with a condition code of not_equal.
3139
// If the biased-lock could be successfully obtained, control is transfered to
3140
// the done label with a condition code of equal.
3141
// It is mandatory to react on the condition code At the done label.
3142
//
3143
void MacroAssembler::biased_locking_enter(Register obj_reg,
3144
Register mark_reg,
3145
Register temp_reg,
3146
Register temp2_reg, // May be Z_RO!
3147
Label &done,
3148
Label *slow_case) {
3149
assert(UseBiasedLocking, "why call this otherwise?");
3150
assert_different_registers(obj_reg, mark_reg, temp_reg, temp2_reg);
3151
3152
Label cas_label; // Try, if implemented, CAS locking. Fall thru to slow path otherwise.
3153
3154
BLOCK_COMMENT("biased_locking_enter {");
3155
3156
// Biased locking
3157
// See whether the lock is currently biased toward our thread and
3158
// whether the epoch is still valid.
3159
// Note that the runtime guarantees sufficient alignment of JavaThread
3160
// pointers to allow age to be placed into low bits.
3161
assert(markWord::age_shift == markWord::lock_bits + markWord::biased_lock_bits,
3162
"biased locking makes assumptions about bit layout");
3163
z_lr(temp_reg, mark_reg);
3164
z_nilf(temp_reg, markWord::biased_lock_mask_in_place);
3165
z_chi(temp_reg, markWord::biased_lock_pattern);
3166
z_brne(cas_label); // Try cas if object is not biased, i.e. cannot be biased locked.
3167
3168
load_prototype_header(temp_reg, obj_reg);
3169
load_const_optimized(temp2_reg, ~((int) markWord::age_mask_in_place));
3170
3171
z_ogr(temp_reg, Z_thread);
3172
z_xgr(temp_reg, mark_reg);
3173
z_ngr(temp_reg, temp2_reg);
3174
if (PrintBiasedLockingStatistics) {
3175
increment_counter_eq((address) BiasedLocking::biased_lock_entry_count_addr(), mark_reg, temp2_reg);
3176
// Restore mark_reg.
3177
z_lg(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg);
3178
}
3179
branch_optimized(Assembler::bcondEqual, done); // Biased lock obtained, return success.
3180
3181
Label try_revoke_bias;
3182
Label try_rebias;
3183
Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes());
3184
3185
//----------------------------------------------------------------------------
3186
// At this point we know that the header has the bias pattern and
3187
// that we are not the bias owner in the current epoch. We need to
3188
// figure out more details about the state of the header in order to
3189
// know what operations can be legally performed on the object's
3190
// header.
3191
3192
// If the low three bits in the xor result aren't clear, that means
3193
// the prototype header is no longer biased and we have to revoke
3194
// the bias on this object.
3195
z_tmll(temp_reg, markWord::biased_lock_mask_in_place);
3196
z_brnaz(try_revoke_bias);
3197
3198
// Biasing is still enabled for this data type. See whether the
3199
// epoch of the current bias is still valid, meaning that the epoch
3200
// bits of the mark word are equal to the epoch bits of the
3201
// prototype header. (Note that the prototype header's epoch bits
3202
// only change at a safepoint.) If not, attempt to rebias the object
3203
// toward the current thread. Note that we must be absolutely sure
3204
// that the current epoch is invalid in order to do this because
3205
// otherwise the manipulations it performs on the mark word are
3206
// illegal.
3207
z_tmll(temp_reg, markWord::epoch_mask_in_place);
3208
z_brnaz(try_rebias);
3209
3210
//----------------------------------------------------------------------------
3211
// The epoch of the current bias is still valid but we know nothing
3212
// about the owner; it might be set or it might be clear. Try to
3213
// acquire the bias of the object using an atomic operation. If this
3214
// fails we will go in to the runtime to revoke the object's bias.
3215
// Note that we first construct the presumed unbiased header so we
3216
// don't accidentally blow away another thread's valid bias.
3217
z_nilf(mark_reg, markWord::biased_lock_mask_in_place | markWord::age_mask_in_place |
3218
markWord::epoch_mask_in_place);
3219
z_lgr(temp_reg, Z_thread);
3220
z_llgfr(mark_reg, mark_reg);
3221
z_ogr(temp_reg, mark_reg);
3222
3223
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3224
3225
z_csg(mark_reg, temp_reg, 0, obj_reg);
3226
3227
// If the biasing toward our thread failed, this means that
3228
// another thread succeeded in biasing it toward itself and we
3229
// need to revoke that bias. The revocation will occur in the
3230
// interpreter runtime in the slow case.
3231
3232
if (PrintBiasedLockingStatistics) {
3233
increment_counter_eq((address) BiasedLocking::anonymously_biased_lock_entry_count_addr(),
3234
temp_reg, temp2_reg);
3235
}
3236
if (slow_case != NULL) {
3237
branch_optimized(Assembler::bcondNotEqual, *slow_case); // Biased lock not obtained, need to go the long way.
3238
}
3239
branch_optimized(Assembler::bcondAlways, done); // Biased lock status given in condition code.
3240
3241
//----------------------------------------------------------------------------
3242
bind(try_rebias);
3243
// At this point we know the epoch has expired, meaning that the
3244
// current "bias owner", if any, is actually invalid. Under these
3245
// circumstances _only_, we are allowed to use the current header's
3246
// value as the comparison value when doing the cas to acquire the
3247
// bias in the current epoch. In other words, we allow transfer of
3248
// the bias from one thread to another directly in this situation.
3249
3250
z_nilf(mark_reg, markWord::biased_lock_mask_in_place | markWord::age_mask_in_place | markWord::epoch_mask_in_place);
3251
load_prototype_header(temp_reg, obj_reg);
3252
z_llgfr(mark_reg, mark_reg);
3253
3254
z_ogr(temp_reg, Z_thread);
3255
3256
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3257
3258
z_csg(mark_reg, temp_reg, 0, obj_reg);
3259
3260
// If the biasing toward our thread failed, this means that
3261
// another thread succeeded in biasing it toward itself and we
3262
// need to revoke that bias. The revocation will occur in the
3263
// interpreter runtime in the slow case.
3264
3265
if (PrintBiasedLockingStatistics) {
3266
increment_counter_eq((address) BiasedLocking::rebiased_lock_entry_count_addr(), temp_reg, temp2_reg);
3267
}
3268
if (slow_case != NULL) {
3269
branch_optimized(Assembler::bcondNotEqual, *slow_case); // Biased lock not obtained, need to go the long way.
3270
}
3271
z_bru(done); // Biased lock status given in condition code.
3272
3273
//----------------------------------------------------------------------------
3274
bind(try_revoke_bias);
3275
// The prototype mark in the klass doesn't have the bias bit set any
3276
// more, indicating that objects of this data type are not supposed
3277
// to be biased any more. We are going to try to reset the mark of
3278
// this object to the prototype value and fall through to the
3279
// CAS-based locking scheme. Note that if our CAS fails, it means
3280
// that another thread raced us for the privilege of revoking the
3281
// bias of this particular object, so it's okay to continue in the
3282
// normal locking code.
3283
load_prototype_header(temp_reg, obj_reg);
3284
3285
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3286
3287
z_csg(mark_reg, temp_reg, 0, obj_reg);
3288
3289
// Fall through to the normal CAS-based lock, because no matter what
3290
// the result of the above CAS, some thread must have succeeded in
3291
// removing the bias bit from the object's header.
3292
if (PrintBiasedLockingStatistics) {
3293
// z_cgr(mark_reg, temp2_reg);
3294
increment_counter_eq((address) BiasedLocking::revoked_lock_entry_count_addr(), temp_reg, temp2_reg);
3295
}
3296
3297
bind(cas_label);
3298
BLOCK_COMMENT("} biased_locking_enter");
3299
}
3300
3301
void MacroAssembler::biased_locking_exit(Register mark_addr, Register temp_reg, Label& done) {
3302
// Check for biased locking unlock case, which is a no-op
3303
// Note: we do not have to check the thread ID for two reasons.
3304
// First, the interpreter checks for IllegalMonitorStateException at
3305
// a higher level. Second, if the bias was revoked while we held the
3306
// lock, the object could not be rebiased toward another thread, so
3307
// the bias bit would be clear.
3308
BLOCK_COMMENT("biased_locking_exit {");
3309
3310
z_lg(temp_reg, 0, mark_addr);
3311
z_nilf(temp_reg, markWord::biased_lock_mask_in_place);
3312
3313
z_chi(temp_reg, markWord::biased_lock_pattern);
3314
z_bre(done);
3315
BLOCK_COMMENT("} biased_locking_exit");
3316
}
3317
3318
void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias) {
3319
Register displacedHeader = temp1;
3320
Register currentHeader = temp1;
3321
Register temp = temp2;
3322
NearLabel done, object_has_monitor;
3323
3324
BLOCK_COMMENT("compiler_fast_lock_object {");
3325
3326
// Load markWord from oop into mark.
3327
z_lg(displacedHeader, 0, oop);
3328
3329
if (DiagnoseSyncOnValueBasedClasses != 0) {
3330
load_klass(Z_R1_scratch, oop);
3331
z_l(Z_R1_scratch, Address(Z_R1_scratch, Klass::access_flags_offset()));
3332
assert((JVM_ACC_IS_VALUE_BASED_CLASS & 0xFFFF) == 0, "or change following instruction");
3333
z_nilh(Z_R1_scratch, JVM_ACC_IS_VALUE_BASED_CLASS >> 16);
3334
z_brne(done);
3335
}
3336
3337
if (try_bias) {
3338
biased_locking_enter(oop, displacedHeader, temp, Z_R0, done);
3339
}
3340
3341
// Handle existing monitor.
3342
// The object has an existing monitor iff (mark & monitor_value) != 0.
3343
guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word");
3344
z_lr(temp, displacedHeader);
3345
z_nill(temp, markWord::monitor_value);
3346
z_brne(object_has_monitor);
3347
3348
// Set mark to markWord | markWord::unlocked_value.
3349
z_oill(displacedHeader, markWord::unlocked_value);
3350
3351
// Load Compare Value application register.
3352
3353
// Initialize the box (must happen before we update the object mark).
3354
z_stg(displacedHeader, BasicLock::displaced_header_offset_in_bytes(), box);
3355
3356
// Memory Fence (in cmpxchgd)
3357
// Compare object markWord with mark and if equal exchange scratch1 with object markWord.
3358
3359
// If the compare-and-swap succeeded, then we found an unlocked object and we
3360
// have now locked it.
3361
z_csg(displacedHeader, box, 0, oop);
3362
assert(currentHeader==displacedHeader, "must be same register"); // Identified two registers from z/Architecture.
3363
z_bre(done);
3364
3365
// We did not see an unlocked object so try the fast recursive case.
3366
3367
z_sgr(currentHeader, Z_SP);
3368
load_const_optimized(temp, (~(os::vm_page_size()-1) | markWord::lock_mask_in_place));
3369
3370
z_ngr(currentHeader, temp);
3371
// z_brne(done);
3372
// z_release();
3373
z_stg(currentHeader/*==0 or not 0*/, BasicLock::displaced_header_offset_in_bytes(), box);
3374
3375
z_bru(done);
3376
3377
Register zero = temp;
3378
Register monitor_tagged = displacedHeader; // Tagged with markWord::monitor_value.
3379
bind(object_has_monitor);
3380
// The object's monitor m is unlocked iff m->owner == NULL,
3381
// otherwise m->owner may contain a thread or a stack address.
3382
//
3383
// Try to CAS m->owner from NULL to current thread.
3384
z_lghi(zero, 0);
3385
// If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
3386
z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged);
3387
// Store a non-null value into the box.
3388
z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box);
3389
#ifdef ASSERT
3390
z_brne(done);
3391
// We've acquired the monitor, check some invariants.
3392
// Invariant 1: _recursions should be 0.
3393
asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged,
3394
"monitor->_recursions should be 0", -1);
3395
z_ltgr(zero, zero); // Set CR=EQ.
3396
#endif
3397
bind(done);
3398
3399
BLOCK_COMMENT("} compiler_fast_lock_object");
3400
// If locking was successful, CR should indicate 'EQ'.
3401
// The compiler or the native wrapper generates a branch to the runtime call
3402
// _complete_monitor_locking_Java.
3403
}
3404
3405
void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias) {
3406
Register displacedHeader = temp1;
3407
Register currentHeader = temp2;
3408
Register temp = temp1;
3409
Register monitor = temp2;
3410
3411
Label done, object_has_monitor;
3412
3413
BLOCK_COMMENT("compiler_fast_unlock_object {");
3414
3415
if (try_bias) {
3416
biased_locking_exit(oop, currentHeader, done);
3417
}
3418
3419
// Find the lock address and load the displaced header from the stack.
3420
// if the displaced header is zero, we have a recursive unlock.
3421
load_and_test_long(displacedHeader, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3422
z_bre(done);
3423
3424
// Handle existing monitor.
3425
// The object has an existing monitor iff (mark & monitor_value) != 0.
3426
z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);
3427
guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word");
3428
z_nill(currentHeader, markWord::monitor_value);
3429
z_brne(object_has_monitor);
3430
3431
// Check if it is still a light weight lock, this is true if we see
3432
// the stack address of the basicLock in the markWord of the object
3433
// copy box to currentHeader such that csg does not kill it.
3434
z_lgr(currentHeader, box);
3435
z_csg(currentHeader, displacedHeader, 0, oop);
3436
z_bru(done); // Csg sets CR as desired.
3437
3438
// Handle existing monitor.
3439
bind(object_has_monitor);
3440
z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); // CurrentHeader is tagged with monitor_value set.
3441
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
3442
z_brne(done);
3443
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
3444
z_brne(done);
3445
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
3446
z_brne(done);
3447
load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
3448
z_brne(done);
3449
z_release();
3450
z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
3451
3452
bind(done);
3453
3454
BLOCK_COMMENT("} compiler_fast_unlock_object");
3455
// flag == EQ indicates success
3456
// flag == NE indicates failure
3457
}
3458
3459
void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) {
3460
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3461
bs->resolve_jobject(this, value, tmp1, tmp2);
3462
}
3463
3464
// Last_Java_sp must comply to the rules in frame_s390.hpp.
3465
void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) {
3466
BLOCK_COMMENT("set_last_Java_frame {");
3467
3468
// Always set last_Java_pc and flags first because once last_Java_sp
3469
// is visible has_last_Java_frame is true and users will look at the
3470
// rest of the fields. (Note: flags should always be zero before we
3471
// get here so doesn't need to be set.)
3472
3473
// Verify that last_Java_pc was zeroed on return to Java.
3474
if (allow_relocation) {
3475
asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()),
3476
Z_thread,
3477
"last_Java_pc not zeroed before leaving Java",
3478
0x200);
3479
} else {
3480
asm_assert_mem8_is_zero_static(in_bytes(JavaThread::last_Java_pc_offset()),
3481
Z_thread,
3482
"last_Java_pc not zeroed before leaving Java",
3483
0x200);
3484
}
3485
3486
// When returning from calling out from Java mode the frame anchor's
3487
// last_Java_pc will always be set to NULL. It is set here so that
3488
// if we are doing a call to native (not VM) that we capture the
3489
// known pc and don't have to rely on the native call having a
3490
// standard frame linkage where we can find the pc.
3491
if (last_Java_pc!=noreg) {
3492
z_stg(last_Java_pc, Address(Z_thread, JavaThread::last_Java_pc_offset()));
3493
}
3494
3495
// This membar release is not required on z/Architecture, since the sequence of stores
3496
// in maintained. Nevertheless, we leave it in to document the required ordering.
3497
// The implementation of z_release() should be empty.
3498
// z_release();
3499
3500
z_stg(last_Java_sp, Address(Z_thread, JavaThread::last_Java_sp_offset()));
3501
BLOCK_COMMENT("} set_last_Java_frame");
3502
}
3503
3504
void MacroAssembler::reset_last_Java_frame(bool allow_relocation) {
3505
BLOCK_COMMENT("reset_last_Java_frame {");
3506
3507
if (allow_relocation) {
3508
asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()),
3509
Z_thread,
3510
"SP was not set, still zero",
3511
0x202);
3512
} else {
3513
asm_assert_mem8_isnot_zero_static(in_bytes(JavaThread::last_Java_sp_offset()),
3514
Z_thread,
3515
"SP was not set, still zero",
3516
0x202);
3517
}
3518
3519
// _last_Java_sp = 0
3520
// Clearing storage must be atomic here, so don't use clear_mem()!
3521
store_const(Address(Z_thread, JavaThread::last_Java_sp_offset()), 0);
3522
3523
// _last_Java_pc = 0
3524
store_const(Address(Z_thread, JavaThread::last_Java_pc_offset()), 0);
3525
3526
BLOCK_COMMENT("} reset_last_Java_frame");
3527
return;
3528
}
3529
3530
void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation) {
3531
assert_different_registers(sp, tmp1);
3532
3533
// We cannot trust that code generated by the C++ compiler saves R14
3534
// to z_abi_160.return_pc, because sometimes it spills R14 using stmg at
3535
// z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()).
3536
// Therefore we load the PC into tmp1 and let set_last_Java_frame() save
3537
// it into the frame anchor.
3538
get_PC(tmp1);
3539
set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1, allow_relocation);
3540
}
3541
3542
void MacroAssembler::set_thread_state(JavaThreadState new_state) {
3543
z_release();
3544
3545
assert(Immediate::is_uimm16(_thread_max_state), "enum value out of range for instruction");
3546
assert(sizeof(JavaThreadState) == sizeof(int), "enum value must have base type int");
3547
store_const(Address(Z_thread, JavaThread::thread_state_offset()), new_state, Z_R0, false);
3548
}
3549
3550
void MacroAssembler::get_vm_result(Register oop_result) {
3551
verify_thread();
3552
3553
z_lg(oop_result, Address(Z_thread, JavaThread::vm_result_offset()));
3554
clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(void*));
3555
3556
verify_oop(oop_result, FILE_AND_LINE);
3557
}
3558
3559
void MacroAssembler::get_vm_result_2(Register result) {
3560
verify_thread();
3561
3562
z_lg(result, Address(Z_thread, JavaThread::vm_result_2_offset()));
3563
clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(void*));
3564
}
3565
3566
// We require that C code which does not return a value in vm_result will
3567
// leave it undisturbed.
3568
void MacroAssembler::set_vm_result(Register oop_result) {
3569
z_stg(oop_result, Address(Z_thread, JavaThread::vm_result_offset()));
3570
}
3571
3572
// Explicit null checks (used for method handle code).
3573
void MacroAssembler::null_check(Register reg, Register tmp, int64_t offset) {
3574
if (!ImplicitNullChecks) {
3575
NearLabel ok;
3576
3577
compare64_and_branch(reg, (intptr_t) 0, Assembler::bcondNotEqual, ok);
3578
3579
// We just put the address into reg if it was 0 (tmp==Z_R0 is allowed so we can't use it for the address).
3580
address exception_entry = Interpreter::throw_NullPointerException_entry();
3581
load_absolute_address(reg, exception_entry);
3582
z_br(reg);
3583
3584
bind(ok);
3585
} else {
3586
if (needs_explicit_null_check((intptr_t)offset)) {
3587
// Provoke OS NULL exception if reg = NULL by
3588
// accessing M[reg] w/o changing any registers.
3589
z_lg(tmp, 0, reg);
3590
}
3591
// else
3592
// Nothing to do, (later) access of M[reg + offset]
3593
// will provoke OS NULL exception if reg = NULL.
3594
}
3595
}
3596
3597
//-------------------------------------
3598
// Compressed Klass Pointers
3599
//-------------------------------------
3600
3601
// Klass oop manipulations if compressed.
3602
void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3603
Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. (dst == src) also possible.
3604
address base = CompressedKlassPointers::base();
3605
int shift = CompressedKlassPointers::shift();
3606
bool need_zero_extend = base != 0;
3607
assert(UseCompressedClassPointers, "only for compressed klass ptrs");
3608
3609
BLOCK_COMMENT("cKlass encoder {");
3610
3611
#ifdef ASSERT
3612
Label ok;
3613
z_tmll(current, KlassAlignmentInBytes-1); // Check alignment.
3614
z_brc(Assembler::bcondAllZero, ok);
3615
// The plain disassembler does not recognize illtrap. It instead displays
3616
// a 32-bit value. Issueing two illtraps assures the disassembler finds
3617
// the proper beginning of the next instruction.
3618
z_illtrap(0xee);
3619
z_illtrap(0xee);
3620
bind(ok);
3621
#endif
3622
3623
// Scale down the incoming klass pointer first.
3624
// We then can be sure we calculate an offset that fits into 32 bit.
3625
// More generally speaking: all subsequent calculations are purely 32-bit.
3626
if (shift != 0) {
3627
assert (LogKlassAlignmentInBytes == shift, "decode alg wrong");
3628
z_srlg(dst, current, shift);
3629
current = dst;
3630
}
3631
3632
if (base != NULL) {
3633
// Use scaled-down base address parts to match scaled-down klass pointer.
3634
unsigned int base_h = ((unsigned long)base)>>(32+shift);
3635
unsigned int base_l = (unsigned int)(((unsigned long)base)>>shift);
3636
3637
// General considerations:
3638
// - when calculating (current_h - base_h), all digits must cancel (become 0).
3639
// Otherwise, we would end up with a compressed klass pointer which doesn't
3640
// fit into 32-bit.
3641
// - Only bit#33 of the difference could potentially be non-zero. For that
3642
// to happen, (current_l < base_l) must hold. In this case, the subtraction
3643
// will create a borrow out of bit#32, nicely killing bit#33.
3644
// - With the above, we only need to consider current_l and base_l to
3645
// calculate the result.
3646
// - Both values are treated as unsigned. The unsigned subtraction is
3647
// replaced by adding (unsigned) the 2's complement of the subtrahend.
3648
3649
if (base_l == 0) {
3650
// - By theory, the calculation to be performed here (current_h - base_h) MUST
3651
// cancel all high-word bits. Otherwise, we would end up with an offset
3652
// (i.e. compressed klass pointer) that does not fit into 32 bit.
3653
// - current_l remains unchanged.
3654
// - Therefore, we can replace all calculation with just a
3655
// zero-extending load 32 to 64 bit.
3656
// - Even that can be replaced with a conditional load if dst != current.
3657
// (this is a local view. The shift step may have requested zero-extension).
3658
} else {
3659
if ((base_h == 0) && is_uimm(base_l, 31)) {
3660
// If we happen to find that (base_h == 0), and that base_l is within the range
3661
// which can be represented by a signed int, then we can use 64bit signed add with
3662
// (-base_l) as 32bit signed immediate operand. The add will take care of the
3663
// upper 32 bits of the result, saving us the need of an extra zero extension.
3664
// For base_l to be in the required range, it must not have the most significant
3665
// bit (aka sign bit) set.
3666
lgr_if_needed(dst, current); // no zero/sign extension in this case!
3667
z_agfi(dst, -(int)base_l); // base_l must be passed as signed.
3668
need_zero_extend = false;
3669
current = dst;
3670
} else {
3671
// To begin with, we may need to copy and/or zero-extend the register operand.
3672
// We have to calculate (current_l - base_l). Because there is no unsigend
3673
// subtract instruction with immediate operand, we add the 2's complement of base_l.
3674
if (need_zero_extend) {
3675
z_llgfr(dst, current);
3676
need_zero_extend = false;
3677
} else {
3678
llgfr_if_needed(dst, current);
3679
}
3680
current = dst;
3681
z_alfi(dst, -base_l);
3682
}
3683
}
3684
}
3685
3686
if (need_zero_extend) {
3687
// We must zero-extend the calculated result. It may have some leftover bits in
3688
// the hi-word because we only did optimized calculations.
3689
z_llgfr(dst, current);
3690
} else {
3691
llgfr_if_needed(dst, current); // zero-extension while copying comes at no extra cost.
3692
}
3693
3694
BLOCK_COMMENT("} cKlass encoder");
3695
}
3696
3697
// This function calculates the size of the code generated by
3698
// decode_klass_not_null(register dst, Register src)
3699
// when (Universe::heap() != NULL). Hence, if the instructions
3700
// it generates change, then this method needs to be updated.
3701
int MacroAssembler::instr_size_for_decode_klass_not_null() {
3702
address base = CompressedKlassPointers::base();
3703
int shift_size = CompressedKlassPointers::shift() == 0 ? 0 : 6; /* sllg */
3704
int addbase_size = 0;
3705
assert(UseCompressedClassPointers, "only for compressed klass ptrs");
3706
3707
if (base != NULL) {
3708
unsigned int base_h = ((unsigned long)base)>>32;
3709
unsigned int base_l = (unsigned int)((unsigned long)base);
3710
if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
3711
addbase_size += 6; /* aih */
3712
} else if ((base_h == 0) && (base_l != 0)) {
3713
addbase_size += 6; /* algfi */
3714
} else {
3715
addbase_size += load_const_size();
3716
addbase_size += 4; /* algr */
3717
}
3718
}
3719
#ifdef ASSERT
3720
addbase_size += 10;
3721
addbase_size += 2; // Extra sigill.
3722
#endif
3723
return addbase_size + shift_size;
3724
}
3725
3726
// !!! If the instructions that get generated here change
3727
// then function instr_size_for_decode_klass_not_null()
3728
// needs to get updated.
3729
// This variant of decode_klass_not_null() must generate predictable code!
3730
// The code must only depend on globally known parameters.
3731
void MacroAssembler::decode_klass_not_null(Register dst) {
3732
address base = CompressedKlassPointers::base();
3733
int shift = CompressedKlassPointers::shift();
3734
int beg_off = offset();
3735
assert(UseCompressedClassPointers, "only for compressed klass ptrs");
3736
3737
BLOCK_COMMENT("cKlass decoder (const size) {");
3738
3739
if (shift != 0) { // Shift required?
3740
z_sllg(dst, dst, shift);
3741
}
3742
if (base != NULL) {
3743
unsigned int base_h = ((unsigned long)base)>>32;
3744
unsigned int base_l = (unsigned int)((unsigned long)base);
3745
if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
3746
z_aih(dst, base_h); // Base has no set bits in lower half.
3747
} else if ((base_h == 0) && (base_l != 0)) {
3748
z_algfi(dst, base_l); // Base has no set bits in upper half.
3749
} else {
3750
load_const(Z_R0, base); // Base has set bits everywhere.
3751
z_algr(dst, Z_R0);
3752
}
3753
}
3754
3755
#ifdef ASSERT
3756
Label ok;
3757
z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment.
3758
z_brc(Assembler::bcondAllZero, ok);
3759
// The plain disassembler does not recognize illtrap. It instead displays
3760
// a 32-bit value. Issueing two illtraps assures the disassembler finds
3761
// the proper beginning of the next instruction.
3762
z_illtrap(0xd1);
3763
z_illtrap(0xd1);
3764
bind(ok);
3765
#endif
3766
assert(offset() == beg_off + instr_size_for_decode_klass_not_null(), "Code gen mismatch.");
3767
3768
BLOCK_COMMENT("} cKlass decoder (const size)");
3769
}
3770
3771
// This variant of decode_klass_not_null() is for cases where
3772
// 1) the size of the generated instructions may vary
3773
// 2) the result is (potentially) stored in a register different from the source.
3774
void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
3775
address base = CompressedKlassPointers::base();
3776
int shift = CompressedKlassPointers::shift();
3777
assert(UseCompressedClassPointers, "only for compressed klass ptrs");
3778
3779
BLOCK_COMMENT("cKlass decoder {");
3780
3781
if (src == noreg) src = dst;
3782
3783
if (shift != 0) { // Shift or at least move required?
3784
z_sllg(dst, src, shift);
3785
} else {
3786
lgr_if_needed(dst, src);
3787
}
3788
3789
if (base != NULL) {
3790
unsigned int base_h = ((unsigned long)base)>>32;
3791
unsigned int base_l = (unsigned int)((unsigned long)base);
3792
if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
3793
z_aih(dst, base_h); // Base has not set bits in lower half.
3794
} else if ((base_h == 0) && (base_l != 0)) {
3795
z_algfi(dst, base_l); // Base has no set bits in upper half.
3796
} else {
3797
load_const_optimized(Z_R0, base); // Base has set bits everywhere.
3798
z_algr(dst, Z_R0);
3799
}
3800
}
3801
3802
#ifdef ASSERT
3803
Label ok;
3804
z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment.
3805
z_brc(Assembler::bcondAllZero, ok);
3806
// The plain disassembler does not recognize illtrap. It instead displays
3807
// a 32-bit value. Issueing two illtraps assures the disassembler finds
3808
// the proper beginning of the next instruction.
3809
z_illtrap(0xd2);
3810
z_illtrap(0xd2);
3811
bind(ok);
3812
#endif
3813
BLOCK_COMMENT("} cKlass decoder");
3814
}
3815
3816
void MacroAssembler::load_klass(Register klass, Address mem) {
3817
if (UseCompressedClassPointers) {
3818
z_llgf(klass, mem);
3819
// Attention: no null check here!
3820
decode_klass_not_null(klass);
3821
} else {
3822
z_lg(klass, mem);
3823
}
3824
}
3825
3826
void MacroAssembler::load_klass(Register klass, Register src_oop) {
3827
if (UseCompressedClassPointers) {
3828
z_llgf(klass, oopDesc::klass_offset_in_bytes(), src_oop);
3829
// Attention: no null check here!
3830
decode_klass_not_null(klass);
3831
} else {
3832
z_lg(klass, oopDesc::klass_offset_in_bytes(), src_oop);
3833
}
3834
}
3835
3836
void MacroAssembler::load_prototype_header(Register Rheader, Register Rsrc_oop) {
3837
assert_different_registers(Rheader, Rsrc_oop);
3838
load_klass(Rheader, Rsrc_oop);
3839
z_lg(Rheader, Address(Rheader, Klass::prototype_header_offset()));
3840
}
3841
3842
void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) {
3843
if (UseCompressedClassPointers) {
3844
assert_different_registers(dst_oop, klass, Z_R0);
3845
if (ck == noreg) ck = klass;
3846
encode_klass_not_null(ck, klass);
3847
z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
3848
} else {
3849
z_stg(klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
3850
}
3851
}
3852
3853
void MacroAssembler::store_klass_gap(Register s, Register d) {
3854
if (UseCompressedClassPointers) {
3855
assert(s != d, "not enough registers");
3856
// Support s = noreg.
3857
if (s != noreg) {
3858
z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes()));
3859
} else {
3860
z_mvhi(Address(d, oopDesc::klass_gap_offset_in_bytes()), 0);
3861
}
3862
}
3863
}
3864
3865
// Compare klass ptr in memory against klass ptr in register.
3866
//
3867
// Rop1 - klass in register, always uncompressed.
3868
// disp - Offset of klass in memory, compressed/uncompressed, depending on runtime flag.
3869
// Rbase - Base address of cKlass in memory.
3870
// maybeNULL - True if Rop1 possibly is a NULL.
3871
void MacroAssembler::compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybeNULL) {
3872
3873
BLOCK_COMMENT("compare klass ptr {");
3874
3875
if (UseCompressedClassPointers) {
3876
const int shift = CompressedKlassPointers::shift();
3877
address base = CompressedKlassPointers::base();
3878
3879
assert((shift == 0) || (shift == LogKlassAlignmentInBytes), "cKlass encoder detected bad shift");
3880
assert_different_registers(Rop1, Z_R0);
3881
assert_different_registers(Rop1, Rbase, Z_R1);
3882
3883
// First encode register oop and then compare with cOop in memory.
3884
// This sequence saves an unnecessary cOop load and decode.
3885
if (base == NULL) {
3886
if (shift == 0) {
3887
z_cl(Rop1, disp, Rbase); // Unscaled
3888
} else {
3889
z_srlg(Z_R0, Rop1, shift); // ZeroBased
3890
z_cl(Z_R0, disp, Rbase);
3891
}
3892
} else { // HeapBased
3893
#ifdef ASSERT
3894
bool used_R0 = true;
3895
bool used_R1 = true;
3896
#endif
3897
Register current = Rop1;
3898
Label done;
3899
3900
if (maybeNULL) { // NULL ptr must be preserved!
3901
z_ltgr(Z_R0, current);
3902
z_bre(done);
3903
current = Z_R0;
3904
}
3905
3906
unsigned int base_h = ((unsigned long)base)>>32;
3907
unsigned int base_l = (unsigned int)((unsigned long)base);
3908
if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) {
3909
lgr_if_needed(Z_R0, current);
3910
z_aih(Z_R0, -((int)base_h)); // Base has no set bits in lower half.
3911
} else if ((base_h == 0) && (base_l != 0)) {
3912
lgr_if_needed(Z_R0, current);
3913
z_agfi(Z_R0, -(int)base_l);
3914
} else {
3915
int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base));
3916
add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement.
3917
}
3918
3919
if (shift != 0) {
3920
z_srlg(Z_R0, Z_R0, shift);
3921
}
3922
bind(done);
3923
z_cl(Z_R0, disp, Rbase);
3924
#ifdef ASSERT
3925
if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2);
3926
if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2);
3927
#endif
3928
}
3929
} else {
3930
z_clg(Rop1, disp, Z_R0, Rbase);
3931
}
3932
BLOCK_COMMENT("} compare klass ptr");
3933
}
3934
3935
//---------------------------
3936
// Compressed oops
3937
//---------------------------
3938
3939
void MacroAssembler::encode_heap_oop(Register oop) {
3940
oop_encoder(oop, oop, true /*maybe null*/);
3941
}
3942
3943
void MacroAssembler::encode_heap_oop_not_null(Register oop) {
3944
oop_encoder(oop, oop, false /*not null*/);
3945
}
3946
3947
// Called with something derived from the oop base. e.g. oop_base>>3.
3948
int MacroAssembler::get_oop_base_pow2_offset(uint64_t oop_base) {
3949
unsigned int oop_base_ll = ((unsigned int)(oop_base >> 0)) & 0xffff;
3950
unsigned int oop_base_lh = ((unsigned int)(oop_base >> 16)) & 0xffff;
3951
unsigned int oop_base_hl = ((unsigned int)(oop_base >> 32)) & 0xffff;
3952
unsigned int oop_base_hh = ((unsigned int)(oop_base >> 48)) & 0xffff;
3953
unsigned int n_notzero_parts = (oop_base_ll == 0 ? 0:1)
3954
+ (oop_base_lh == 0 ? 0:1)
3955
+ (oop_base_hl == 0 ? 0:1)
3956
+ (oop_base_hh == 0 ? 0:1);
3957
3958
assert(oop_base != 0, "This is for HeapBased cOops only");
3959
3960
if (n_notzero_parts != 1) { // Check if oop_base is just a few pages shy of a power of 2.
3961
uint64_t pow2_offset = 0x10000 - oop_base_ll;
3962
if (pow2_offset < 0x8000) { // This might not be necessary.
3963
uint64_t oop_base2 = oop_base + pow2_offset;
3964
3965
oop_base_ll = ((unsigned int)(oop_base2 >> 0)) & 0xffff;
3966
oop_base_lh = ((unsigned int)(oop_base2 >> 16)) & 0xffff;
3967
oop_base_hl = ((unsigned int)(oop_base2 >> 32)) & 0xffff;
3968
oop_base_hh = ((unsigned int)(oop_base2 >> 48)) & 0xffff;
3969
n_notzero_parts = (oop_base_ll == 0 ? 0:1) +
3970
(oop_base_lh == 0 ? 0:1) +
3971
(oop_base_hl == 0 ? 0:1) +
3972
(oop_base_hh == 0 ? 0:1);
3973
if (n_notzero_parts == 1) {
3974
assert(-(int64_t)pow2_offset != (int64_t)-1, "We use -1 to signal uninitialized base register");
3975
return -pow2_offset;
3976
}
3977
}
3978
}
3979
return 0;
3980
}
3981
3982
// If base address is offset from a straight power of two by just a few pages,
3983
// return this offset to the caller for a possible later composite add.
3984
// TODO/FIX: will only work correctly for 4k pages.
3985
int MacroAssembler::get_oop_base(Register Rbase, uint64_t oop_base) {
3986
int pow2_offset = get_oop_base_pow2_offset(oop_base);
3987
3988
load_const_optimized(Rbase, oop_base - pow2_offset); // Best job possible.
3989
3990
return pow2_offset;
3991
}
3992
3993
int MacroAssembler::get_oop_base_complement(Register Rbase, uint64_t oop_base) {
3994
int offset = get_oop_base(Rbase, oop_base);
3995
z_lcgr(Rbase, Rbase);
3996
return -offset;
3997
}
3998
3999
// Compare compressed oop in memory against oop in register.
4000
// Rop1 - Oop in register.
4001
// disp - Offset of cOop in memory.
4002
// Rbase - Base address of cOop in memory.
4003
// maybeNULL - True if Rop1 possibly is a NULL.
4004
// maybeNULLtarget - Branch target for Rop1 == NULL, if flow control shall NOT continue with compare instruction.
4005
void MacroAssembler::compare_heap_oop(Register Rop1, Address mem, bool maybeNULL) {
4006
Register Rbase = mem.baseOrR0();
4007
Register Rindex = mem.indexOrR0();
4008
int64_t disp = mem.disp();
4009
4010
const int shift = CompressedOops::shift();
4011
address base = CompressedOops::base();
4012
4013
assert(UseCompressedOops, "must be on to call this method");
4014
assert(Universe::heap() != NULL, "java heap must be initialized to call this method");
4015
assert((shift == 0) || (shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift");
4016
assert_different_registers(Rop1, Z_R0);
4017
assert_different_registers(Rop1, Rbase, Z_R1);
4018
assert_different_registers(Rop1, Rindex, Z_R1);
4019
4020
BLOCK_COMMENT("compare heap oop {");
4021
4022
// First encode register oop and then compare with cOop in memory.
4023
// This sequence saves an unnecessary cOop load and decode.
4024
if (base == NULL) {
4025
if (shift == 0) {
4026
z_cl(Rop1, disp, Rindex, Rbase); // Unscaled
4027
} else {
4028
z_srlg(Z_R0, Rop1, shift); // ZeroBased
4029
z_cl(Z_R0, disp, Rindex, Rbase);
4030
}
4031
} else { // HeapBased
4032
#ifdef ASSERT
4033
bool used_R0 = true;
4034
bool used_R1 = true;
4035
#endif
4036
Label done;
4037
int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base));
4038
4039
if (maybeNULL) { // NULL ptr must be preserved!
4040
z_ltgr(Z_R0, Rop1);
4041
z_bre(done);
4042
}
4043
4044
add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1);
4045
z_srlg(Z_R0, Z_R0, shift);
4046
4047
bind(done);
4048
z_cl(Z_R0, disp, Rindex, Rbase);
4049
#ifdef ASSERT
4050
if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2);
4051
if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2);
4052
#endif
4053
}
4054
BLOCK_COMMENT("} compare heap oop");
4055
}
4056
4057
void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
4058
const Address& addr, Register val,
4059
Register tmp1, Register tmp2, Register tmp3) {
4060
assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL |
4061
ON_UNKNOWN_OOP_REF)) == 0, "unsupported decorator");
4062
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4063
decorators = AccessInternal::decorator_fixup(decorators);
4064
bool as_raw = (decorators & AS_RAW) != 0;
4065
if (as_raw) {
4066
bs->BarrierSetAssembler::store_at(this, decorators, type,
4067
addr, val,
4068
tmp1, tmp2, tmp3);
4069
} else {
4070
bs->store_at(this, decorators, type,
4071
addr, val,
4072
tmp1, tmp2, tmp3);
4073
}
4074
}
4075
4076
void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
4077
const Address& addr, Register dst,
4078
Register tmp1, Register tmp2, Label *is_null) {
4079
assert((decorators & ~(AS_RAW | IN_HEAP | IN_NATIVE | IS_ARRAY | IS_NOT_NULL |
4080
ON_PHANTOM_OOP_REF | ON_WEAK_OOP_REF)) == 0, "unsupported decorator");
4081
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4082
decorators = AccessInternal::decorator_fixup(decorators);
4083
bool as_raw = (decorators & AS_RAW) != 0;
4084
if (as_raw) {
4085
bs->BarrierSetAssembler::load_at(this, decorators, type,
4086
addr, dst,
4087
tmp1, tmp2, is_null);
4088
} else {
4089
bs->load_at(this, decorators, type,
4090
addr, dst,
4091
tmp1, tmp2, is_null);
4092
}
4093
}
4094
4095
void MacroAssembler::load_heap_oop(Register dest, const Address &a,
4096
Register tmp1, Register tmp2,
4097
DecoratorSet decorators, Label *is_null) {
4098
access_load_at(T_OBJECT, IN_HEAP | decorators, a, dest, tmp1, tmp2, is_null);
4099
}
4100
4101
void MacroAssembler::store_heap_oop(Register Roop, const Address &a,
4102
Register tmp1, Register tmp2, Register tmp3,
4103
DecoratorSet decorators) {
4104
access_store_at(T_OBJECT, IN_HEAP | decorators, a, Roop, tmp1, tmp2, tmp3);
4105
}
4106
4107
//-------------------------------------------------
4108
// Encode compressed oop. Generally usable encoder.
4109
//-------------------------------------------------
4110
// Rsrc - contains regular oop on entry. It remains unchanged.
4111
// Rdst - contains compressed oop on exit.
4112
// Rdst and Rsrc may indicate same register, in which case Rsrc does not remain unchanged.
4113
//
4114
// Rdst must not indicate scratch register Z_R1 (Z_R1_scratch) for functionality.
4115
// Rdst should not indicate scratch register Z_R0 (Z_R0_scratch) for performance.
4116
//
4117
// only32bitValid is set, if later code only uses the lower 32 bits. In this
4118
// case we must not fix the upper 32 bits.
4119
void MacroAssembler::oop_encoder(Register Rdst, Register Rsrc, bool maybeNULL,
4120
Register Rbase, int pow2_offset, bool only32bitValid) {
4121
4122
const address oop_base = CompressedOops::base();
4123
const int oop_shift = CompressedOops::shift();
4124
const bool disjoint = CompressedOops::base_disjoint();
4125
4126
assert(UseCompressedOops, "must be on to call this method");
4127
assert(Universe::heap() != NULL, "java heap must be initialized to call this encoder");
4128
assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift");
4129
4130
if (disjoint || (oop_base == NULL)) {
4131
BLOCK_COMMENT("cOop encoder zeroBase {");
4132
if (oop_shift == 0) {
4133
if (oop_base != NULL && !only32bitValid) {
4134
z_llgfr(Rdst, Rsrc); // Clear upper bits in case the register will be decoded again.
4135
} else {
4136
lgr_if_needed(Rdst, Rsrc);
4137
}
4138
} else {
4139
z_srlg(Rdst, Rsrc, oop_shift);
4140
if (oop_base != NULL && !only32bitValid) {
4141
z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
4142
}
4143
}
4144
BLOCK_COMMENT("} cOop encoder zeroBase");
4145
return;
4146
}
4147
4148
bool used_R0 = false;
4149
bool used_R1 = false;
4150
4151
BLOCK_COMMENT("cOop encoder general {");
4152
assert_different_registers(Rdst, Z_R1);
4153
assert_different_registers(Rsrc, Rbase);
4154
if (maybeNULL) {
4155
Label done;
4156
// We reorder shifting and subtracting, so that we can compare
4157
// and shift in parallel:
4158
//
4159
// cycle 0: potential LoadN, base = <const>
4160
// cycle 1: base = !base dst = src >> 3, cmp cr = (src != 0)
4161
// cycle 2: if (cr) br, dst = dst + base + offset
4162
4163
// Get oop_base components.
4164
if (pow2_offset == -1) {
4165
if (Rdst == Rbase) {
4166
if (Rdst == Z_R1 || Rsrc == Z_R1) {
4167
Rbase = Z_R0;
4168
used_R0 = true;
4169
} else {
4170
Rdst = Z_R1;
4171
used_R1 = true;
4172
}
4173
}
4174
if (Rbase == Z_R1) {
4175
used_R1 = true;
4176
}
4177
pow2_offset = get_oop_base_complement(Rbase, ((uint64_t)(intptr_t)oop_base) >> oop_shift);
4178
}
4179
assert_different_registers(Rdst, Rbase);
4180
4181
// Check for NULL oop (must be left alone) and shift.
4182
if (oop_shift != 0) { // Shift out alignment bits
4183
if (((intptr_t)oop_base&0xc000000000000000L) == 0L) { // We are sure: no single address will have the leftmost bit set.
4184
z_srag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code.
4185
} else {
4186
z_srlg(Rdst, Rsrc, oop_shift);
4187
z_ltgr(Rsrc, Rsrc); // This is the recommended way of testing for zero.
4188
// This probably is faster, as it does not write a register. No!
4189
// z_cghi(Rsrc, 0);
4190
}
4191
} else {
4192
z_ltgr(Rdst, Rsrc); // Move NULL to result register.
4193
}
4194
z_bre(done);
4195
4196
// Subtract oop_base components.
4197
if ((Rdst == Z_R0) || (Rbase == Z_R0)) {
4198
z_algr(Rdst, Rbase);
4199
if (pow2_offset != 0) { add2reg(Rdst, pow2_offset); }
4200
} else {
4201
add2reg_with_index(Rdst, pow2_offset, Rbase, Rdst);
4202
}
4203
if (!only32bitValid) {
4204
z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
4205
}
4206
bind(done);
4207
4208
} else { // not null
4209
// Get oop_base components.
4210
if (pow2_offset == -1) {
4211
pow2_offset = get_oop_base_complement(Rbase, (uint64_t)(intptr_t)oop_base);
4212
}
4213
4214
// Subtract oop_base components and shift.
4215
if (Rdst == Z_R0 || Rsrc == Z_R0 || Rbase == Z_R0) {
4216
// Don't use lay instruction.
4217
if (Rdst == Rsrc) {
4218
z_algr(Rdst, Rbase);
4219
} else {
4220
lgr_if_needed(Rdst, Rbase);
4221
z_algr(Rdst, Rsrc);
4222
}
4223
if (pow2_offset != 0) add2reg(Rdst, pow2_offset);
4224
} else {
4225
add2reg_with_index(Rdst, pow2_offset, Rbase, Rsrc);
4226
}
4227
if (oop_shift != 0) { // Shift out alignment bits.
4228
z_srlg(Rdst, Rdst, oop_shift);
4229
}
4230
if (!only32bitValid) {
4231
z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again.
4232
}
4233
}
4234
#ifdef ASSERT
4235
if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb01bUL, 2); }
4236
if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb02bUL, 2); }
4237
#endif
4238
BLOCK_COMMENT("} cOop encoder general");
4239
}
4240
4241
//-------------------------------------------------
4242
// decode compressed oop. Generally usable decoder.
4243
//-------------------------------------------------
4244
// Rsrc - contains compressed oop on entry.
4245
// Rdst - contains regular oop on exit.
4246
// Rdst and Rsrc may indicate same register.
4247
// Rdst must not be the same register as Rbase, if Rbase was preloaded (before call).
4248
// Rdst can be the same register as Rbase. Then, either Z_R0 or Z_R1 must be available as scratch.
4249
// Rbase - register to use for the base
4250
// pow2_offset - offset of base to nice value. If -1, base must be loaded.
4251
// For performance, it is good to
4252
// - avoid Z_R0 for any of the argument registers.
4253
// - keep Rdst and Rsrc distinct from Rbase. Rdst == Rsrc is ok for performance.
4254
// - avoid Z_R1 for Rdst if Rdst == Rbase.
4255
void MacroAssembler::oop_decoder(Register Rdst, Register Rsrc, bool maybeNULL, Register Rbase, int pow2_offset) {
4256
4257
const address oop_base = CompressedOops::base();
4258
const int oop_shift = CompressedOops::shift();
4259
const bool disjoint = CompressedOops::base_disjoint();
4260
4261
assert(UseCompressedOops, "must be on to call this method");
4262
assert(Universe::heap() != NULL, "java heap must be initialized to call this decoder");
4263
assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes),
4264
"cOop encoder detected bad shift");
4265
4266
// cOops are always loaded zero-extended from memory. No explicit zero-extension necessary.
4267
4268
if (oop_base != NULL) {
4269
unsigned int oop_base_hl = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xffff;
4270
unsigned int oop_base_hh = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 48)) & 0xffff;
4271
unsigned int oop_base_hf = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xFFFFffff;
4272
if (disjoint && (oop_base_hl == 0 || oop_base_hh == 0)) {
4273
BLOCK_COMMENT("cOop decoder disjointBase {");
4274
// We do not need to load the base. Instead, we can install the upper bits
4275
// with an OR instead of an ADD.
4276
Label done;
4277
4278
// Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set.
4279
if (maybeNULL) { // NULL ptr must be preserved!
4280
z_slag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code.
4281
z_bre(done);
4282
} else {
4283
z_sllg(Rdst, Rsrc, oop_shift); // Logical shift leaves condition code alone.
4284
}
4285
if ((oop_base_hl != 0) && (oop_base_hh != 0)) {
4286
z_oihf(Rdst, oop_base_hf);
4287
} else if (oop_base_hl != 0) {
4288
z_oihl(Rdst, oop_base_hl);
4289
} else {
4290
assert(oop_base_hh != 0, "not heapbased mode");
4291
z_oihh(Rdst, oop_base_hh);
4292
}
4293
bind(done);
4294
BLOCK_COMMENT("} cOop decoder disjointBase");
4295
} else {
4296
BLOCK_COMMENT("cOop decoder general {");
4297
// There are three decode steps:
4298
// scale oop offset (shift left)
4299
// get base (in reg) and pow2_offset (constant)
4300
// add base, pow2_offset, and oop offset
4301
// The following register overlap situations may exist:
4302
// Rdst == Rsrc, Rbase any other
4303
// not a problem. Scaling in-place leaves Rbase undisturbed.
4304
// Loading Rbase does not impact the scaled offset.
4305
// Rdst == Rbase, Rsrc any other
4306
// scaling would destroy a possibly preloaded Rbase. Loading Rbase
4307
// would destroy the scaled offset.
4308
// Remedy: use Rdst_tmp if Rbase has been preloaded.
4309
// use Rbase_tmp if base has to be loaded.
4310
// Rsrc == Rbase, Rdst any other
4311
// Only possible without preloaded Rbase.
4312
// Loading Rbase does not destroy compressed oop because it was scaled into Rdst before.
4313
// Rsrc == Rbase, Rdst == Rbase
4314
// Only possible without preloaded Rbase.
4315
// Loading Rbase would destroy compressed oop. Scaling in-place is ok.
4316
// Remedy: use Rbase_tmp.
4317
//
4318
Label done;
4319
Register Rdst_tmp = Rdst;
4320
Register Rbase_tmp = Rbase;
4321
bool used_R0 = false;
4322
bool used_R1 = false;
4323
bool base_preloaded = pow2_offset >= 0;
4324
guarantee(!(base_preloaded && (Rsrc == Rbase)), "Register clash, check caller");
4325
assert(oop_shift != 0, "room for optimization");
4326
4327
// Check if we need to use scratch registers.
4328
if (Rdst == Rbase) {
4329
assert(!(((Rdst == Z_R0) && (Rsrc == Z_R1)) || ((Rdst == Z_R1) && (Rsrc == Z_R0))), "need a scratch reg");
4330
if (Rdst != Rsrc) {
4331
if (base_preloaded) { Rdst_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; }
4332
else { Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; }
4333
} else {
4334
Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1;
4335
}
4336
}
4337
if (base_preloaded) lgr_if_needed(Rbase_tmp, Rbase);
4338
4339
// Scale oop and check for NULL.
4340
// Rsrc contains a narrow oop. Thus we are sure the leftmost <oop_shift> bits will never be set.
4341
if (maybeNULL) { // NULL ptr must be preserved!
4342
z_slag(Rdst_tmp, Rsrc, oop_shift); // Arithmetic shift sets the condition code.
4343
z_bre(done);
4344
} else {
4345
z_sllg(Rdst_tmp, Rsrc, oop_shift); // Logical shift leaves condition code alone.
4346
}
4347
4348
// Get oop_base components.
4349
if (!base_preloaded) {
4350
pow2_offset = get_oop_base(Rbase_tmp, (uint64_t)(intptr_t)oop_base);
4351
}
4352
4353
// Add up all components.
4354
if ((Rbase_tmp == Z_R0) || (Rdst_tmp == Z_R0)) {
4355
z_algr(Rdst_tmp, Rbase_tmp);
4356
if (pow2_offset != 0) { add2reg(Rdst_tmp, pow2_offset); }
4357
} else {
4358
add2reg_with_index(Rdst_tmp, pow2_offset, Rbase_tmp, Rdst_tmp);
4359
}
4360
4361
bind(done);
4362
lgr_if_needed(Rdst, Rdst_tmp);
4363
#ifdef ASSERT
4364
if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb03bUL, 2); }
4365
if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb04bUL, 2); }
4366
#endif
4367
BLOCK_COMMENT("} cOop decoder general");
4368
}
4369
} else {
4370
BLOCK_COMMENT("cOop decoder zeroBase {");
4371
if (oop_shift == 0) {
4372
lgr_if_needed(Rdst, Rsrc);
4373
} else {
4374
z_sllg(Rdst, Rsrc, oop_shift);
4375
}
4376
BLOCK_COMMENT("} cOop decoder zeroBase");
4377
}
4378
}
4379
4380
// ((OopHandle)result).resolve();
4381
void MacroAssembler::resolve_oop_handle(Register result) {
4382
// OopHandle::resolve is an indirection.
4383
z_lg(result, 0, result);
4384
}
4385
4386
void MacroAssembler::load_mirror_from_const_method(Register mirror, Register const_method) {
4387
mem2reg_opt(mirror, Address(const_method, ConstMethod::constants_offset()));
4388
mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
4389
mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset()));
4390
resolve_oop_handle(mirror);
4391
}
4392
4393
void MacroAssembler::load_method_holder(Register holder, Register method) {
4394
mem2reg_opt(holder, Address(method, Method::const_offset()));
4395
mem2reg_opt(holder, Address(holder, ConstMethod::constants_offset()));
4396
mem2reg_opt(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes()));
4397
}
4398
4399
//---------------------------------------------------------------
4400
//--- Operations on arrays.
4401
//---------------------------------------------------------------
4402
4403
// Compiler ensures base is doubleword aligned and cnt is #doublewords.
4404
// Emitter does not KILL cnt and base arguments, since they need to be copied to
4405
// work registers anyway.
4406
// Actually, only r0, r1, and r5 are killed.
4407
unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register odd_tmp_reg) {
4408
4409
int block_start = offset();
4410
Register dst_len = Z_R1; // Holds dst len for MVCLE.
4411
Register dst_addr = Z_R0; // Holds dst addr for MVCLE.
4412
4413
Label doXC, doMVCLE, done;
4414
4415
BLOCK_COMMENT("Clear_Array {");
4416
4417
// Check for zero len and convert to long.
4418
z_ltgfr(odd_tmp_reg, cnt_arg);
4419
z_bre(done); // Nothing to do if len == 0.
4420
4421
// Prefetch data to be cleared.
4422
if (VM_Version::has_Prefetch()) {
4423
z_pfd(0x02, 0, Z_R0, base_pointer_arg);
4424
z_pfd(0x02, 256, Z_R0, base_pointer_arg);
4425
}
4426
4427
z_sllg(dst_len, odd_tmp_reg, 3); // #bytes to clear.
4428
z_cghi(odd_tmp_reg, 32); // Check for len <= 256 bytes (<=32 DW).
4429
z_brnh(doXC); // If so, use executed XC to clear.
4430
4431
// MVCLE: initialize long arrays (general case).
4432
bind(doMVCLE);
4433
z_lgr(dst_addr, base_pointer_arg);
4434
// Pass 0 as source length to MVCLE: destination will be filled with padding byte 0.
4435
// The even register of the register pair is not killed.
4436
clear_reg(odd_tmp_reg, true, false);
4437
MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding()-1), 0);
4438
z_bru(done);
4439
4440
// XC: initialize short arrays.
4441
Label XC_template; // Instr template, never exec directly!
4442
bind(XC_template);
4443
z_xc(0,0,base_pointer_arg,0,base_pointer_arg);
4444
4445
bind(doXC);
4446
add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE.
4447
if (VM_Version::has_ExecuteExtensions()) {
4448
z_exrl(dst_len, XC_template); // Execute XC with var. len.
4449
} else {
4450
z_larl(odd_tmp_reg, XC_template);
4451
z_ex(dst_len,0,Z_R0,odd_tmp_reg); // Execute XC with var. len.
4452
}
4453
// z_bru(done); // fallthru
4454
4455
bind(done);
4456
4457
BLOCK_COMMENT("} Clear_Array");
4458
4459
int block_end = offset();
4460
return block_end - block_start;
4461
}
4462
4463
// Compiler ensures base is doubleword aligned and cnt is count of doublewords.
4464
// Emitter does not KILL any arguments nor work registers.
4465
// Emitter generates up to 16 XC instructions, depending on the array length.
4466
unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) {
4467
int block_start = offset();
4468
int off;
4469
int lineSize_Bytes = AllocatePrefetchStepSize;
4470
int lineSize_DW = AllocatePrefetchStepSize>>LogBytesPerWord;
4471
bool doPrefetch = VM_Version::has_Prefetch();
4472
int XC_maxlen = 256;
4473
int numXCInstr = cnt > 0 ? (cnt*BytesPerWord-1)/XC_maxlen+1 : 0;
4474
4475
BLOCK_COMMENT("Clear_Array_Const {");
4476
assert(cnt*BytesPerWord <= 4096, "ClearArrayConst can handle 4k only");
4477
4478
// Do less prefetching for very short arrays.
4479
if (numXCInstr > 0) {
4480
// Prefetch only some cache lines, then begin clearing.
4481
if (doPrefetch) {
4482
if (cnt*BytesPerWord <= lineSize_Bytes/4) { // If less than 1/4 of a cache line to clear,
4483
z_pfd(0x02, 0, Z_R0, base); // prefetch just the first cache line.
4484
} else {
4485
assert(XC_maxlen == lineSize_Bytes, "ClearArrayConst needs 256B cache lines");
4486
for (off = 0; (off < AllocatePrefetchLines) && (off <= numXCInstr); off ++) {
4487
z_pfd(0x02, off*lineSize_Bytes, Z_R0, base);
4488
}
4489
}
4490
}
4491
4492
for (off=0; off<(numXCInstr-1); off++) {
4493
z_xc(off*XC_maxlen, XC_maxlen-1, base, off*XC_maxlen, base);
4494
4495
// Prefetch some cache lines in advance.
4496
if (doPrefetch && (off <= numXCInstr-AllocatePrefetchLines)) {
4497
z_pfd(0x02, (off+AllocatePrefetchLines)*lineSize_Bytes, Z_R0, base);
4498
}
4499
}
4500
if (off*XC_maxlen < cnt*BytesPerWord) {
4501
z_xc(off*XC_maxlen, (cnt*BytesPerWord-off*XC_maxlen)-1, base, off*XC_maxlen, base);
4502
}
4503
}
4504
BLOCK_COMMENT("} Clear_Array_Const");
4505
4506
int block_end = offset();
4507
return block_end - block_start;
4508
}
4509
4510
// Compiler ensures base is doubleword aligned and cnt is #doublewords.
4511
// Emitter does not KILL cnt and base arguments, since they need to be copied to
4512
// work registers anyway.
4513
// Actually, only r0, r1, (which are work registers) and odd_tmp_reg are killed.
4514
//
4515
// For very large arrays, exploit MVCLE H/W support.
4516
// MVCLE instruction automatically exploits H/W-optimized page mover.
4517
// - Bytes up to next page boundary are cleared with a series of XC to self.
4518
// - All full pages are cleared with the page mover H/W assist.
4519
// - Remaining bytes are again cleared by a series of XC to self.
4520
//
4521
unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register odd_tmp_reg) {
4522
4523
int block_start = offset();
4524
Register dst_len = Z_R1; // Holds dst len for MVCLE.
4525
Register dst_addr = Z_R0; // Holds dst addr for MVCLE.
4526
4527
BLOCK_COMMENT("Clear_Array_Const_Big {");
4528
4529
// Get len to clear.
4530
load_const_optimized(dst_len, (long)cnt*8L); // in Bytes = #DW*8
4531
4532
// Prepare other args to MVCLE.
4533
z_lgr(dst_addr, base_pointer_arg);
4534
// Pass 0 as source length to MVCLE: destination will be filled with padding byte 0.
4535
// The even register of the register pair is not killed.
4536
(void) clear_reg(odd_tmp_reg, true, false); // Src len of MVCLE is zero.
4537
MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding() - 1), 0);
4538
BLOCK_COMMENT("} Clear_Array_Const_Big");
4539
4540
int block_end = offset();
4541
return block_end - block_start;
4542
}
4543
4544
// Allocator.
4545
unsigned int MacroAssembler::CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg,
4546
Register cnt_reg,
4547
Register tmp1_reg, Register tmp2_reg) {
4548
// Tmp1 is oddReg.
4549
// Tmp2 is evenReg.
4550
4551
int block_start = offset();
4552
Label doMVC, doMVCLE, done, MVC_template;
4553
4554
BLOCK_COMMENT("CopyRawMemory_AlignedDisjoint {");
4555
4556
// Check for zero len and convert to long.
4557
z_ltgfr(cnt_reg, cnt_reg); // Remember casted value for doSTG case.
4558
z_bre(done); // Nothing to do if len == 0.
4559
4560
z_sllg(Z_R1, cnt_reg, 3); // Dst len in bytes. calc early to have the result ready.
4561
4562
z_cghi(cnt_reg, 32); // Check for len <= 256 bytes (<=32 DW).
4563
z_brnh(doMVC); // If so, use executed MVC to clear.
4564
4565
bind(doMVCLE); // A lot of data (more than 256 bytes).
4566
// Prep dest reg pair.
4567
z_lgr(Z_R0, dst_reg); // dst addr
4568
// Dst len already in Z_R1.
4569
// Prep src reg pair.
4570
z_lgr(tmp2_reg, src_reg); // src addr
4571
z_lgr(tmp1_reg, Z_R1); // Src len same as dst len.
4572
4573
// Do the copy.
4574
move_long_ext(Z_R0, tmp2_reg, 0xb0); // Bypass cache.
4575
z_bru(done); // All done.
4576
4577
bind(MVC_template); // Just some data (not more than 256 bytes).
4578
z_mvc(0, 0, dst_reg, 0, src_reg);
4579
4580
bind(doMVC);
4581
4582
if (VM_Version::has_ExecuteExtensions()) {
4583
add2reg(Z_R1, -1);
4584
} else {
4585
add2reg(tmp1_reg, -1, Z_R1);
4586
z_larl(Z_R1, MVC_template);
4587
}
4588
4589
if (VM_Version::has_Prefetch()) {
4590
z_pfd(1, 0,Z_R0,src_reg);
4591
z_pfd(2, 0,Z_R0,dst_reg);
4592
// z_pfd(1,256,Z_R0,src_reg); // Assume very short copy.
4593
// z_pfd(2,256,Z_R0,dst_reg);
4594
}
4595
4596
if (VM_Version::has_ExecuteExtensions()) {
4597
z_exrl(Z_R1, MVC_template);
4598
} else {
4599
z_ex(tmp1_reg, 0, Z_R0, Z_R1);
4600
}
4601
4602
bind(done);
4603
4604
BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint");
4605
4606
int block_end = offset();
4607
return block_end - block_start;
4608
}
4609
4610
//-------------------------------------------------
4611
// Constants (scalar and oop) in constant pool
4612
//-------------------------------------------------
4613
4614
// Add a non-relocated constant to the CP.
4615
int MacroAssembler::store_const_in_toc(AddressLiteral& val) {
4616
long value = val.value();
4617
address tocPos = long_constant(value);
4618
4619
if (tocPos != NULL) {
4620
int tocOffset = (int)(tocPos - code()->consts()->start());
4621
return tocOffset;
4622
}
4623
// Address_constant returned NULL, so no constant entry has been created.
4624
// In that case, we return a "fatal" offset, just in case that subsequently
4625
// generated access code is executed.
4626
return -1;
4627
}
4628
4629
// Returns the TOC offset where the address is stored.
4630
// Add a relocated constant to the CP.
4631
int MacroAssembler::store_oop_in_toc(AddressLiteral& oop) {
4632
// Use RelocationHolder::none for the constant pool entry.
4633
// Otherwise we will end up with a failing NativeCall::verify(x),
4634
// where x is the address of the constant pool entry.
4635
address tocPos = address_constant((address)oop.value(), RelocationHolder::none);
4636
4637
if (tocPos != NULL) {
4638
int tocOffset = (int)(tocPos - code()->consts()->start());
4639
RelocationHolder rsp = oop.rspec();
4640
Relocation *rel = rsp.reloc();
4641
4642
// Store toc_offset in relocation, used by call_far_patchable.
4643
if ((relocInfo::relocType)rel->type() == relocInfo::runtime_call_w_cp_type) {
4644
((runtime_call_w_cp_Relocation *)(rel))->set_constant_pool_offset(tocOffset);
4645
}
4646
// Relocate at the load's pc.
4647
relocate(rsp);
4648
4649
return tocOffset;
4650
}
4651
// Address_constant returned NULL, so no constant entry has been created
4652
// in that case, we return a "fatal" offset, just in case that subsequently
4653
// generated access code is executed.
4654
return -1;
4655
}
4656
4657
bool MacroAssembler::load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc) {
4658
int tocOffset = store_const_in_toc(a);
4659
if (tocOffset == -1) return false;
4660
address tocPos = tocOffset + code()->consts()->start();
4661
assert((address)code()->consts()->start() != NULL, "Please add CP address");
4662
relocate(a.rspec());
4663
load_long_pcrelative(dst, tocPos);
4664
return true;
4665
}
4666
4667
bool MacroAssembler::load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc) {
4668
int tocOffset = store_oop_in_toc(a);
4669
if (tocOffset == -1) return false;
4670
address tocPos = tocOffset + code()->consts()->start();
4671
assert((address)code()->consts()->start() != NULL, "Please add CP address");
4672
4673
load_addr_pcrelative(dst, tocPos);
4674
return true;
4675
}
4676
4677
// If the instruction sequence at the given pc is a load_const_from_toc
4678
// sequence, return the value currently stored at the referenced position
4679
// in the TOC.
4680
intptr_t MacroAssembler::get_const_from_toc(address pc) {
4681
4682
assert(is_load_const_from_toc(pc), "must be load_const_from_pool");
4683
4684
long offset = get_load_const_from_toc_offset(pc);
4685
address dataLoc = NULL;
4686
if (is_load_const_from_toc_pcrelative(pc)) {
4687
dataLoc = pc + offset;
4688
} else {
4689
CodeBlob* cb = CodeCache::find_blob_unsafe(pc); // Else we get assertion if nmethod is zombie.
4690
assert(cb && cb->is_nmethod(), "sanity");
4691
nmethod* nm = (nmethod*)cb;
4692
dataLoc = nm->ctable_begin() + offset;
4693
}
4694
return *(intptr_t *)dataLoc;
4695
}
4696
4697
// If the instruction sequence at the given pc is a load_const_from_toc
4698
// sequence, copy the passed-in new_data value into the referenced
4699
// position in the TOC.
4700
void MacroAssembler::set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb) {
4701
assert(is_load_const_from_toc(pc), "must be load_const_from_pool");
4702
4703
long offset = MacroAssembler::get_load_const_from_toc_offset(pc);
4704
address dataLoc = NULL;
4705
if (is_load_const_from_toc_pcrelative(pc)) {
4706
dataLoc = pc+offset;
4707
} else {
4708
nmethod* nm = CodeCache::find_nmethod(pc);
4709
assert((cb == NULL) || (nm == (nmethod*)cb), "instruction address should be in CodeBlob");
4710
dataLoc = nm->ctable_begin() + offset;
4711
}
4712
if (*(unsigned long *)dataLoc != new_data) { // Prevent cache invalidation: update only if necessary.
4713
*(unsigned long *)dataLoc = new_data;
4714
}
4715
}
4716
4717
// Dynamic TOC. Getter must only be called if "a" is a load_const_from_toc
4718
// site. Verify by calling is_load_const_from_toc() before!!
4719
// Offset is +/- 2**32 -> use long.
4720
long MacroAssembler::get_load_const_from_toc_offset(address a) {
4721
assert(is_load_const_from_toc_pcrelative(a), "expected pc relative load");
4722
// expected code sequence:
4723
// z_lgrl(t, simm32); len = 6
4724
unsigned long inst;
4725
unsigned int len = get_instruction(a, &inst);
4726
return get_pcrel_offset(inst);
4727
}
4728
4729
//**********************************************************************************
4730
// inspection of generated instruction sequences for a particular pattern
4731
//**********************************************************************************
4732
4733
bool MacroAssembler::is_load_const_from_toc_pcrelative(address a) {
4734
#ifdef ASSERT
4735
unsigned long inst;
4736
unsigned int len = get_instruction(a+2, &inst);
4737
if ((len == 6) && is_load_pcrelative_long(a) && is_call_pcrelative_long(inst)) {
4738
const int range = 128;
4739
Assembler::dump_code_range(tty, a, range, "instr(a) == z_lgrl && instr(a+2) == z_brasl");
4740
VM_Version::z_SIGSEGV();
4741
}
4742
#endif
4743
// expected code sequence:
4744
// z_lgrl(t, relAddr32); len = 6
4745
//TODO: verify accessed data is in CP, if possible.
4746
return is_load_pcrelative_long(a); // TODO: might be too general. Currently, only lgrl is used.
4747
}
4748
4749
bool MacroAssembler::is_load_const_from_toc_call(address a) {
4750
return is_load_const_from_toc(a) && is_call_byregister(a + load_const_from_toc_size());
4751
}
4752
4753
bool MacroAssembler::is_load_const_call(address a) {
4754
return is_load_const(a) && is_call_byregister(a + load_const_size());
4755
}
4756
4757
//-------------------------------------------------
4758
// Emitters for some really CICS instructions
4759
//-------------------------------------------------
4760
4761
void MacroAssembler::move_long_ext(Register dst, Register src, unsigned int pad) {
4762
assert(dst->encoding()%2==0, "must be an even/odd register pair");
4763
assert(src->encoding()%2==0, "must be an even/odd register pair");
4764
assert(pad<256, "must be a padding BYTE");
4765
4766
Label retry;
4767
bind(retry);
4768
Assembler::z_mvcle(dst, src, pad);
4769
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4770
}
4771
4772
void MacroAssembler::compare_long_ext(Register left, Register right, unsigned int pad) {
4773
assert(left->encoding() % 2 == 0, "must be an even/odd register pair");
4774
assert(right->encoding() % 2 == 0, "must be an even/odd register pair");
4775
assert(pad<256, "must be a padding BYTE");
4776
4777
Label retry;
4778
bind(retry);
4779
Assembler::z_clcle(left, right, pad, Z_R0);
4780
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4781
}
4782
4783
void MacroAssembler::compare_long_uni(Register left, Register right, unsigned int pad) {
4784
assert(left->encoding() % 2 == 0, "must be an even/odd register pair");
4785
assert(right->encoding() % 2 == 0, "must be an even/odd register pair");
4786
assert(pad<=0xfff, "must be a padding HALFWORD");
4787
assert(VM_Version::has_ETF2(), "instruction must be available");
4788
4789
Label retry;
4790
bind(retry);
4791
Assembler::z_clclu(left, right, pad, Z_R0);
4792
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4793
}
4794
4795
void MacroAssembler::search_string(Register end, Register start) {
4796
assert(end->encoding() != 0, "end address must not be in R0");
4797
assert(start->encoding() != 0, "start address must not be in R0");
4798
4799
Label retry;
4800
bind(retry);
4801
Assembler::z_srst(end, start);
4802
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4803
}
4804
4805
void MacroAssembler::search_string_uni(Register end, Register start) {
4806
assert(end->encoding() != 0, "end address must not be in R0");
4807
assert(start->encoding() != 0, "start address must not be in R0");
4808
assert(VM_Version::has_ETF3(), "instruction must be available");
4809
4810
Label retry;
4811
bind(retry);
4812
Assembler::z_srstu(end, start);
4813
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4814
}
4815
4816
void MacroAssembler::kmac(Register srcBuff) {
4817
assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
4818
assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
4819
4820
Label retry;
4821
bind(retry);
4822
Assembler::z_kmac(Z_R0, srcBuff);
4823
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4824
}
4825
4826
void MacroAssembler::kimd(Register srcBuff) {
4827
assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
4828
assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
4829
4830
Label retry;
4831
bind(retry);
4832
Assembler::z_kimd(Z_R0, srcBuff);
4833
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4834
}
4835
4836
void MacroAssembler::klmd(Register srcBuff) {
4837
assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
4838
assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair");
4839
4840
Label retry;
4841
bind(retry);
4842
Assembler::z_klmd(Z_R0, srcBuff);
4843
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4844
}
4845
4846
void MacroAssembler::km(Register dstBuff, Register srcBuff) {
4847
// DstBuff and srcBuff are allowed to be the same register (encryption in-place).
4848
// DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block.
4849
assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
4850
assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register");
4851
assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
4852
4853
Label retry;
4854
bind(retry);
4855
Assembler::z_km(dstBuff, srcBuff);
4856
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4857
}
4858
4859
void MacroAssembler::kmc(Register dstBuff, Register srcBuff) {
4860
// DstBuff and srcBuff are allowed to be the same register (encryption in-place).
4861
// DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block.
4862
assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0");
4863
assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register");
4864
assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
4865
4866
Label retry;
4867
bind(retry);
4868
Assembler::z_kmc(dstBuff, srcBuff);
4869
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4870
}
4871
4872
void MacroAssembler::cksm(Register crcBuff, Register srcBuff) {
4873
assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair");
4874
4875
Label retry;
4876
bind(retry);
4877
Assembler::z_cksm(crcBuff, srcBuff);
4878
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4879
}
4880
4881
void MacroAssembler::translate_oo(Register r1, Register r2, uint m3) {
4882
assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
4883
assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
4884
4885
Label retry;
4886
bind(retry);
4887
Assembler::z_troo(r1, r2, m3);
4888
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4889
}
4890
4891
void MacroAssembler::translate_ot(Register r1, Register r2, uint m3) {
4892
assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
4893
assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
4894
4895
Label retry;
4896
bind(retry);
4897
Assembler::z_trot(r1, r2, m3);
4898
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4899
}
4900
4901
void MacroAssembler::translate_to(Register r1, Register r2, uint m3) {
4902
assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
4903
assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
4904
4905
Label retry;
4906
bind(retry);
4907
Assembler::z_trto(r1, r2, m3);
4908
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4909
}
4910
4911
void MacroAssembler::translate_tt(Register r1, Register r2, uint m3) {
4912
assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair");
4913
assert((m3 & 0b1110) == 0, "Unused mask bits must be zero");
4914
4915
Label retry;
4916
bind(retry);
4917
Assembler::z_trtt(r1, r2, m3);
4918
Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry);
4919
}
4920
4921
//---------------------------------------
4922
// Helpers for Intrinsic Emitters
4923
//---------------------------------------
4924
4925
/**
4926
* uint32_t crc;
4927
* timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
4928
*/
4929
void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) {
4930
assert_different_registers(crc, table, tmp);
4931
assert_different_registers(val, table);
4932
if (crc == val) { // Must rotate first to use the unmodified value.
4933
rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
4934
z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits.
4935
} else {
4936
z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits.
4937
rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
4938
}
4939
z_x(crc, Address(table, tmp, 0));
4940
}
4941
4942
/**
4943
* uint32_t crc;
4944
* timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
4945
*/
4946
void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
4947
fold_byte_crc32(crc, crc, table, tmp);
4948
}
4949
4950
/**
4951
* Emits code to update CRC-32 with a byte value according to constants in table.
4952
*
4953
* @param [in,out]crc Register containing the crc.
4954
* @param [in]val Register containing the byte to fold into the CRC.
4955
* @param [in]table Register containing the table of crc constants.
4956
*
4957
* uint32_t crc;
4958
* val = crc_table[(val ^ crc) & 0xFF];
4959
* crc = val ^ (crc >> 8);
4960
*/
4961
void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
4962
z_xr(val, crc);
4963
fold_byte_crc32(crc, val, table, val);
4964
}
4965
4966
4967
/**
4968
* @param crc register containing existing CRC (32-bit)
4969
* @param buf register pointing to input byte buffer (byte*)
4970
* @param len register containing number of bytes
4971
* @param table register pointing to CRC table
4972
*/
4973
void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, Register data) {
4974
assert_different_registers(crc, buf, len, table, data);
4975
4976
Label L_mainLoop, L_done;
4977
const int mainLoop_stepping = 1;
4978
4979
// Process all bytes in a single-byte loop.
4980
z_ltr(len, len);
4981
z_brnh(L_done);
4982
4983
bind(L_mainLoop);
4984
z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
4985
add2reg(buf, mainLoop_stepping); // Advance buffer position.
4986
update_byte_crc32(crc, data, table);
4987
z_brct(len, L_mainLoop); // Iterate.
4988
4989
bind(L_done);
4990
}
4991
4992
/**
4993
* Emits code to update CRC-32 with a 4-byte value according to constants in table.
4994
* Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c.
4995
*
4996
*/
4997
void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
4998
Register t0, Register t1, Register t2, Register t3) {
4999
// This is what we implement (the DOBIG4 part):
5000
//
5001
// #define DOBIG4 c ^= *++buf4; \
5002
// c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
5003
// crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
5004
// #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
5005
// Pre-calculate (constant) column offsets, use columns 4..7 for big-endian.
5006
const int ix0 = 4*(4*CRC32_COLUMN_SIZE);
5007
const int ix1 = 5*(4*CRC32_COLUMN_SIZE);
5008
const int ix2 = 6*(4*CRC32_COLUMN_SIZE);
5009
const int ix3 = 7*(4*CRC32_COLUMN_SIZE);
5010
5011
// XOR crc with next four bytes of buffer.
5012
lgr_if_needed(t0, crc);
5013
z_x(t0, Address(buf, bufDisp));
5014
if (bufInc != 0) {
5015
add2reg(buf, bufInc);
5016
}
5017
5018
// Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices.
5019
rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2
5020
rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2
5021
rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2
5022
rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2
5023
5024
// XOR indexed table values to calculate updated crc.
5025
z_ly(t2, Address(table, t2, (intptr_t)ix1));
5026
z_ly(t0, Address(table, t0, (intptr_t)ix3));
5027
z_xy(t2, Address(table, t3, (intptr_t)ix0));
5028
z_xy(t0, Address(table, t1, (intptr_t)ix2));
5029
z_xr(t0, t2); // Now t0 contains the updated CRC value.
5030
lgr_if_needed(crc, t0);
5031
}
5032
5033
/**
5034
* @param crc register containing existing CRC (32-bit)
5035
* @param buf register pointing to input byte buffer (byte*)
5036
* @param len register containing number of bytes
5037
* @param table register pointing to CRC table
5038
*
5039
* uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
5040
*/
5041
void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
5042
Register t0, Register t1, Register t2, Register t3,
5043
bool invertCRC) {
5044
assert_different_registers(crc, buf, len, table);
5045
5046
Label L_mainLoop, L_tail;
5047
Register data = t0;
5048
Register ctr = Z_R0;
5049
const int mainLoop_stepping = 4;
5050
const int log_stepping = exact_log2(mainLoop_stepping);
5051
5052
// Don't test for len <= 0 here. This pathological case should not occur anyway.
5053
// Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
5054
// The situation itself is detected and handled correctly by the conditional branches
5055
// following aghi(len, -stepping) and aghi(len, +stepping).
5056
5057
if (invertCRC) {
5058
not_(crc, noreg, false); // 1s complement of crc
5059
}
5060
5061
// Check for short (<4 bytes) buffer.
5062
z_srag(ctr, len, log_stepping);
5063
z_brnh(L_tail);
5064
5065
z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data.
5066
rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
5067
5068
BIND(L_mainLoop);
5069
update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3);
5070
z_brct(ctr, L_mainLoop); // Iterate.
5071
5072
z_lrvr(crc, crc); // Revert byte order back to original.
5073
5074
// Process last few (<8) bytes of buffer.
5075
BIND(L_tail);
5076
update_byteLoop_crc32(crc, buf, len, table, data);
5077
5078
if (invertCRC) {
5079
not_(crc, noreg, false); // 1s complement of crc
5080
}
5081
}
5082
5083
/**
5084
* @param crc register containing existing CRC (32-bit)
5085
* @param buf register pointing to input byte buffer (byte*)
5086
* @param len register containing number of bytes
5087
* @param table register pointing to CRC table
5088
*/
5089
void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
5090
Register t0, Register t1, Register t2, Register t3,
5091
bool invertCRC) {
5092
assert_different_registers(crc, buf, len, table);
5093
Register data = t0;
5094
5095
if (invertCRC) {
5096
not_(crc, noreg, false); // 1s complement of crc
5097
}
5098
5099
update_byteLoop_crc32(crc, buf, len, table, data);
5100
5101
if (invertCRC) {
5102
not_(crc, noreg, false); // 1s complement of crc
5103
}
5104
}
5105
5106
void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp,
5107
bool invertCRC) {
5108
assert_different_registers(crc, buf, len, table, tmp);
5109
5110
if (invertCRC) {
5111
not_(crc, noreg, false); // 1s complement of crc
5112
}
5113
5114
z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
5115
update_byte_crc32(crc, tmp, table);
5116
5117
if (invertCRC) {
5118
not_(crc, noreg, false); // 1s complement of crc
5119
}
5120
}
5121
5122
void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table,
5123
bool invertCRC) {
5124
assert_different_registers(crc, val, table);
5125
5126
if (invertCRC) {
5127
not_(crc, noreg, false); // 1s complement of crc
5128
}
5129
5130
update_byte_crc32(crc, val, table);
5131
5132
if (invertCRC) {
5133
not_(crc, noreg, false); // 1s complement of crc
5134
}
5135
}
5136
5137
//
5138
// Code for BigInteger::multiplyToLen() intrinsic.
5139
//
5140
5141
// dest_lo += src1 + src2
5142
// dest_hi += carry1 + carry2
5143
// Z_R7 is destroyed !
5144
void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo,
5145
Register src1, Register src2) {
5146
clear_reg(Z_R7);
5147
z_algr(dest_lo, src1);
5148
z_alcgr(dest_hi, Z_R7);
5149
z_algr(dest_lo, src2);
5150
z_alcgr(dest_hi, Z_R7);
5151
}
5152
5153
// Multiply 64 bit by 64 bit first loop.
5154
void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart,
5155
Register x_xstart,
5156
Register y, Register y_idx,
5157
Register z,
5158
Register carry,
5159
Register product,
5160
Register idx, Register kdx) {
5161
// jlong carry, x[], y[], z[];
5162
// for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
5163
// huge_128 product = y[idx] * x[xstart] + carry;
5164
// z[kdx] = (jlong)product;
5165
// carry = (jlong)(product >>> 64);
5166
// }
5167
// z[xstart] = carry;
5168
5169
Label L_first_loop, L_first_loop_exit;
5170
Label L_one_x, L_one_y, L_multiply;
5171
5172
z_aghi(xstart, -1);
5173
z_brl(L_one_x); // Special case: length of x is 1.
5174
5175
// Load next two integers of x.
5176
z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
5177
mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0));
5178
5179
5180
bind(L_first_loop);
5181
5182
z_aghi(idx, -1);
5183
z_brl(L_first_loop_exit);
5184
z_aghi(idx, -1);
5185
z_brl(L_one_y);
5186
5187
// Load next two integers of y.
5188
z_sllg(Z_R1_scratch, idx, LogBytesPerInt);
5189
mem2reg_opt(y_idx, Address(y, Z_R1_scratch, 0));
5190
5191
5192
bind(L_multiply);
5193
5194
Register multiplicand = product->successor();
5195
Register product_low = multiplicand;
5196
5197
lgr_if_needed(multiplicand, x_xstart);
5198
z_mlgr(product, y_idx); // multiplicand * y_idx -> product::multiplicand
5199
clear_reg(Z_R7);
5200
z_algr(product_low, carry); // Add carry to result.
5201
z_alcgr(product, Z_R7); // Add carry of the last addition.
5202
add2reg(kdx, -2);
5203
5204
// Store result.
5205
z_sllg(Z_R7, kdx, LogBytesPerInt);
5206
reg2mem_opt(product_low, Address(z, Z_R7, 0));
5207
lgr_if_needed(carry, product);
5208
z_bru(L_first_loop);
5209
5210
5211
bind(L_one_y); // Load one 32 bit portion of y as (0,value).
5212
5213
clear_reg(y_idx);
5214
mem2reg_opt(y_idx, Address(y, (intptr_t) 0), false);
5215
z_bru(L_multiply);
5216
5217
5218
bind(L_one_x); // Load one 32 bit portion of x as (0,value).
5219
5220
clear_reg(x_xstart);
5221
mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false);
5222
z_bru(L_first_loop);
5223
5224
bind(L_first_loop_exit);
5225
}
5226
5227
// Multiply 64 bit by 64 bit and add 128 bit.
5228
void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y,
5229
Register z,
5230
Register yz_idx, Register idx,
5231
Register carry, Register product,
5232
int offset) {
5233
// huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry;
5234
// z[kdx] = (jlong)product;
5235
5236
Register multiplicand = product->successor();
5237
Register product_low = multiplicand;
5238
5239
z_sllg(Z_R7, idx, LogBytesPerInt);
5240
mem2reg_opt(yz_idx, Address(y, Z_R7, offset));
5241
5242
lgr_if_needed(multiplicand, x_xstart);
5243
z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand
5244
mem2reg_opt(yz_idx, Address(z, Z_R7, offset));
5245
5246
add2_with_carry(product, product_low, carry, yz_idx);
5247
5248
z_sllg(Z_R7, idx, LogBytesPerInt);
5249
reg2mem_opt(product_low, Address(z, Z_R7, offset));
5250
5251
}
5252
5253
// Multiply 128 bit by 128 bit. Unrolled inner loop.
5254
void MacroAssembler::multiply_128_x_128_loop(Register x_xstart,
5255
Register y, Register z,
5256
Register yz_idx, Register idx,
5257
Register jdx,
5258
Register carry, Register product,
5259
Register carry2) {
5260
// jlong carry, x[], y[], z[];
5261
// int kdx = ystart+1;
5262
// for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
5263
// huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry;
5264
// z[kdx+idx+1] = (jlong)product;
5265
// jlong carry2 = (jlong)(product >>> 64);
5266
// product = (y[idx] * x_xstart) + z[kdx+idx] + carry2;
5267
// z[kdx+idx] = (jlong)product;
5268
// carry = (jlong)(product >>> 64);
5269
// }
5270
// idx += 2;
5271
// if (idx > 0) {
5272
// product = (y[idx] * x_xstart) + z[kdx+idx] + carry;
5273
// z[kdx+idx] = (jlong)product;
5274
// carry = (jlong)(product >>> 64);
5275
// }
5276
5277
Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
5278
5279
// scale the index
5280
lgr_if_needed(jdx, idx);
5281
and_imm(jdx, 0xfffffffffffffffcL);
5282
rshift(jdx, 2);
5283
5284
5285
bind(L_third_loop);
5286
5287
z_aghi(jdx, -1);
5288
z_brl(L_third_loop_exit);
5289
add2reg(idx, -4);
5290
5291
multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8);
5292
lgr_if_needed(carry2, product);
5293
5294
multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0);
5295
lgr_if_needed(carry, product);
5296
z_bru(L_third_loop);
5297
5298
5299
bind(L_third_loop_exit); // Handle any left-over operand parts.
5300
5301
and_imm(idx, 0x3);
5302
z_brz(L_post_third_loop_done);
5303
5304
Label L_check_1;
5305
5306
z_aghi(idx, -2);
5307
z_brl(L_check_1);
5308
5309
multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0);
5310
lgr_if_needed(carry, product);
5311
5312
5313
bind(L_check_1);
5314
5315
add2reg(idx, 0x2);
5316
and_imm(idx, 0x1);
5317
z_aghi(idx, -1);
5318
z_brl(L_post_third_loop_done);
5319
5320
Register multiplicand = product->successor();
5321
Register product_low = multiplicand;
5322
5323
z_sllg(Z_R7, idx, LogBytesPerInt);
5324
clear_reg(yz_idx);
5325
mem2reg_opt(yz_idx, Address(y, Z_R7, 0), false);
5326
lgr_if_needed(multiplicand, x_xstart);
5327
z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand
5328
clear_reg(yz_idx);
5329
mem2reg_opt(yz_idx, Address(z, Z_R7, 0), false);
5330
5331
add2_with_carry(product, product_low, yz_idx, carry);
5332
5333
z_sllg(Z_R7, idx, LogBytesPerInt);
5334
reg2mem_opt(product_low, Address(z, Z_R7, 0), false);
5335
rshift(product_low, 32);
5336
5337
lshift(product, 32);
5338
z_ogr(product_low, product);
5339
lgr_if_needed(carry, product_low);
5340
5341
bind(L_post_third_loop_done);
5342
}
5343
5344
void MacroAssembler::multiply_to_len(Register x, Register xlen,
5345
Register y, Register ylen,
5346
Register z,
5347
Register tmp1, Register tmp2,
5348
Register tmp3, Register tmp4,
5349
Register tmp5) {
5350
ShortBranchVerifier sbv(this);
5351
5352
assert_different_registers(x, xlen, y, ylen, z,
5353
tmp1, tmp2, tmp3, tmp4, tmp5, Z_R1_scratch, Z_R7);
5354
assert_different_registers(x, xlen, y, ylen, z,
5355
tmp1, tmp2, tmp3, tmp4, tmp5, Z_R8);
5356
5357
z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP);
5358
5359
// In openJdk, we store the argument as 32-bit value to slot.
5360
Address zlen(Z_SP, _z_abi(remaining_cargs)); // Int in long on big endian.
5361
5362
const Register idx = tmp1;
5363
const Register kdx = tmp2;
5364
const Register xstart = tmp3;
5365
5366
const Register y_idx = tmp4;
5367
const Register carry = tmp5;
5368
const Register product = Z_R0_scratch;
5369
const Register x_xstart = Z_R8;
5370
5371
// First Loop.
5372
//
5373
// final static long LONG_MASK = 0xffffffffL;
5374
// int xstart = xlen - 1;
5375
// int ystart = ylen - 1;
5376
// long carry = 0;
5377
// for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
5378
// long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
5379
// z[kdx] = (int)product;
5380
// carry = product >>> 32;
5381
// }
5382
// z[xstart] = (int)carry;
5383
//
5384
5385
lgr_if_needed(idx, ylen); // idx = ylen
5386
z_llgf(kdx, zlen); // C2 does not respect int to long conversion for stub calls, thus load zero-extended.
5387
clear_reg(carry); // carry = 0
5388
5389
Label L_done;
5390
5391
lgr_if_needed(xstart, xlen);
5392
z_aghi(xstart, -1);
5393
z_brl(L_done);
5394
5395
multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
5396
5397
NearLabel L_second_loop;
5398
compare64_and_branch(kdx, RegisterOrConstant((intptr_t) 0), bcondEqual, L_second_loop);
5399
5400
NearLabel L_carry;
5401
z_aghi(kdx, -1);
5402
z_brz(L_carry);
5403
5404
// Store lower 32 bits of carry.
5405
z_sllg(Z_R1_scratch, kdx, LogBytesPerInt);
5406
reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
5407
rshift(carry, 32);
5408
z_aghi(kdx, -1);
5409
5410
5411
bind(L_carry);
5412
5413
// Store upper 32 bits of carry.
5414
z_sllg(Z_R1_scratch, kdx, LogBytesPerInt);
5415
reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
5416
5417
// Second and third (nested) loops.
5418
//
5419
// for (int i = xstart-1; i >= 0; i--) { // Second loop
5420
// carry = 0;
5421
// for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
5422
// long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
5423
// (z[k] & LONG_MASK) + carry;
5424
// z[k] = (int)product;
5425
// carry = product >>> 32;
5426
// }
5427
// z[i] = (int)carry;
5428
// }
5429
//
5430
// i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx
5431
5432
const Register jdx = tmp1;
5433
5434
bind(L_second_loop);
5435
5436
clear_reg(carry); // carry = 0;
5437
lgr_if_needed(jdx, ylen); // j = ystart+1
5438
5439
z_aghi(xstart, -1); // i = xstart-1;
5440
z_brl(L_done);
5441
5442
// Use free slots in the current stackframe instead of push/pop.
5443
Address zsave(Z_SP, _z_abi(carg_1));
5444
reg2mem_opt(z, zsave);
5445
5446
5447
Label L_last_x;
5448
5449
z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
5450
load_address(z, Address(z, Z_R1_scratch, 4)); // z = z + k - j
5451
z_aghi(xstart, -1); // i = xstart-1;
5452
z_brl(L_last_x);
5453
5454
z_sllg(Z_R1_scratch, xstart, LogBytesPerInt);
5455
mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0));
5456
5457
5458
Label L_third_loop_prologue;
5459
5460
bind(L_third_loop_prologue);
5461
5462
Address xsave(Z_SP, _z_abi(carg_2));
5463
Address xlensave(Z_SP, _z_abi(carg_3));
5464
Address ylensave(Z_SP, _z_abi(carg_4));
5465
5466
reg2mem_opt(x, xsave);
5467
reg2mem_opt(xstart, xlensave);
5468
reg2mem_opt(ylen, ylensave);
5469
5470
5471
multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x);
5472
5473
mem2reg_opt(z, zsave);
5474
mem2reg_opt(x, xsave);
5475
mem2reg_opt(xlen, xlensave); // This is the decrement of the loop counter!
5476
mem2reg_opt(ylen, ylensave);
5477
5478
add2reg(tmp3, 1, xlen);
5479
z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt);
5480
reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
5481
z_aghi(tmp3, -1);
5482
z_brl(L_done);
5483
5484
rshift(carry, 32);
5485
z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt);
5486
reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false);
5487
z_bru(L_second_loop);
5488
5489
// Next infrequent code is moved outside loops.
5490
bind(L_last_x);
5491
5492
clear_reg(x_xstart);
5493
mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false);
5494
z_bru(L_third_loop_prologue);
5495
5496
bind(L_done);
5497
5498
z_lmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP);
5499
}
5500
5501
#ifndef PRODUCT
5502
// Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false).
5503
void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) {
5504
Label ok;
5505
if (check_equal) {
5506
z_bre(ok);
5507
} else {
5508
z_brne(ok);
5509
}
5510
stop(msg, id);
5511
bind(ok);
5512
}
5513
5514
// Assert if CC indicates "low".
5515
void MacroAssembler::asm_assert_low(const char *msg, int id) {
5516
Label ok;
5517
z_brnl(ok);
5518
stop(msg, id);
5519
bind(ok);
5520
}
5521
5522
// Assert if CC indicates "high".
5523
void MacroAssembler::asm_assert_high(const char *msg, int id) {
5524
Label ok;
5525
z_brnh(ok);
5526
stop(msg, id);
5527
bind(ok);
5528
}
5529
5530
// Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false)
5531
// generate non-relocatable code.
5532
void MacroAssembler::asm_assert_static(bool check_equal, const char *msg, int id) {
5533
Label ok;
5534
if (check_equal) { z_bre(ok); }
5535
else { z_brne(ok); }
5536
stop_static(msg, id);
5537
bind(ok);
5538
}
5539
5540
void MacroAssembler::asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset,
5541
Register mem_base, const char* msg, int id) {
5542
switch (size) {
5543
case 4:
5544
load_and_test_int(Z_R0, Address(mem_base, mem_offset));
5545
break;
5546
case 8:
5547
load_and_test_long(Z_R0, Address(mem_base, mem_offset));
5548
break;
5549
default:
5550
ShouldNotReachHere();
5551
}
5552
if (allow_relocation) { asm_assert(check_equal, msg, id); }
5553
else { asm_assert_static(check_equal, msg, id); }
5554
}
5555
5556
// Check the condition
5557
// expected_size == FP - SP
5558
// after transformation:
5559
// expected_size - FP + SP == 0
5560
// Destroys Register expected_size if no tmp register is passed.
5561
void MacroAssembler::asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) {
5562
if (tmp == noreg) {
5563
tmp = expected_size;
5564
} else {
5565
if (tmp != expected_size) {
5566
z_lgr(tmp, expected_size);
5567
}
5568
z_algr(tmp, Z_SP);
5569
z_slg(tmp, 0, Z_R0, Z_SP);
5570
asm_assert_eq(msg, id);
5571
}
5572
}
5573
#endif // !PRODUCT
5574
5575
void MacroAssembler::verify_thread() {
5576
if (VerifyThread) {
5577
unimplemented("", 117);
5578
}
5579
}
5580
5581
// Save and restore functions: Exclude Z_R0.
5582
void MacroAssembler::save_volatile_regs(Register dst, int offset, bool include_fp, bool include_flags) {
5583
z_stmg(Z_R1, Z_R5, offset, dst); offset += 5 * BytesPerWord;
5584
if (include_fp) {
5585
z_std(Z_F0, Address(dst, offset)); offset += BytesPerWord;
5586
z_std(Z_F1, Address(dst, offset)); offset += BytesPerWord;
5587
z_std(Z_F2, Address(dst, offset)); offset += BytesPerWord;
5588
z_std(Z_F3, Address(dst, offset)); offset += BytesPerWord;
5589
z_std(Z_F4, Address(dst, offset)); offset += BytesPerWord;
5590
z_std(Z_F5, Address(dst, offset)); offset += BytesPerWord;
5591
z_std(Z_F6, Address(dst, offset)); offset += BytesPerWord;
5592
z_std(Z_F7, Address(dst, offset)); offset += BytesPerWord;
5593
}
5594
if (include_flags) {
5595
Label done;
5596
z_mvi(Address(dst, offset), 2); // encoding: equal
5597
z_bre(done);
5598
z_mvi(Address(dst, offset), 4); // encoding: higher
5599
z_brh(done);
5600
z_mvi(Address(dst, offset), 1); // encoding: lower
5601
bind(done);
5602
}
5603
}
5604
void MacroAssembler::restore_volatile_regs(Register src, int offset, bool include_fp, bool include_flags) {
5605
z_lmg(Z_R1, Z_R5, offset, src); offset += 5 * BytesPerWord;
5606
if (include_fp) {
5607
z_ld(Z_F0, Address(src, offset)); offset += BytesPerWord;
5608
z_ld(Z_F1, Address(src, offset)); offset += BytesPerWord;
5609
z_ld(Z_F2, Address(src, offset)); offset += BytesPerWord;
5610
z_ld(Z_F3, Address(src, offset)); offset += BytesPerWord;
5611
z_ld(Z_F4, Address(src, offset)); offset += BytesPerWord;
5612
z_ld(Z_F5, Address(src, offset)); offset += BytesPerWord;
5613
z_ld(Z_F6, Address(src, offset)); offset += BytesPerWord;
5614
z_ld(Z_F7, Address(src, offset)); offset += BytesPerWord;
5615
}
5616
if (include_flags) {
5617
z_cli(Address(src, offset), 2); // see encoding above
5618
}
5619
}
5620
5621
// Plausibility check for oops.
5622
void MacroAssembler::verify_oop(Register oop, const char* msg) {
5623
if (!VerifyOops) return;
5624
5625
BLOCK_COMMENT("verify_oop {");
5626
unsigned int nbytes_save = (5 + 8 + 1) * BytesPerWord;
5627
address entry_addr = StubRoutines::verify_oop_subroutine_entry_address();
5628
5629
save_return_pc();
5630
5631
// Push frame, but preserve flags
5632
z_lgr(Z_R0, Z_SP);
5633
z_lay(Z_SP, -((int64_t)nbytes_save + frame::z_abi_160_size), Z_SP);
5634
z_stg(Z_R0, _z_abi(callers_sp), Z_SP);
5635
5636
save_volatile_regs(Z_SP, frame::z_abi_160_size, true, true);
5637
5638
lgr_if_needed(Z_ARG2, oop);
5639
load_const_optimized(Z_ARG1, (address)msg);
5640
load_const_optimized(Z_R1, entry_addr);
5641
z_lg(Z_R1, 0, Z_R1);
5642
call_c(Z_R1);
5643
5644
restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, true);
5645
pop_frame();
5646
restore_return_pc();
5647
5648
BLOCK_COMMENT("} verify_oop ");
5649
}
5650
5651
void MacroAssembler::verify_oop_addr(Address addr, const char* msg) {
5652
if (!VerifyOops) return;
5653
5654
BLOCK_COMMENT("verify_oop {");
5655
unsigned int nbytes_save = (5 + 8) * BytesPerWord;
5656
address entry_addr = StubRoutines::verify_oop_subroutine_entry_address();
5657
5658
save_return_pc();
5659
unsigned int frame_size = push_frame_abi160(nbytes_save); // kills Z_R0
5660
save_volatile_regs(Z_SP, frame::z_abi_160_size, true, false);
5661
5662
z_lg(Z_ARG2, addr.plus_disp(frame_size));
5663
load_const_optimized(Z_ARG1, (address)msg);
5664
load_const_optimized(Z_R1, entry_addr);
5665
z_lg(Z_R1, 0, Z_R1);
5666
call_c(Z_R1);
5667
5668
restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, false);
5669
pop_frame();
5670
restore_return_pc();
5671
5672
BLOCK_COMMENT("} verify_oop ");
5673
}
5674
5675
const char* MacroAssembler::stop_types[] = {
5676
"stop",
5677
"untested",
5678
"unimplemented",
5679
"shouldnotreachhere"
5680
};
5681
5682
static void stop_on_request(const char* tp, const char* msg) {
5683
tty->print("Z assembly code requires stop: (%s) %s\n", tp, msg);
5684
guarantee(false, "Z assembly code requires stop: %s", msg);
5685
}
5686
5687
void MacroAssembler::stop(int type, const char* msg, int id) {
5688
BLOCK_COMMENT(err_msg("stop: %s {", msg));
5689
5690
// Setup arguments.
5691
load_const(Z_ARG1, (void*) stop_types[type%stop_end]);
5692
load_const(Z_ARG2, (void*) msg);
5693
get_PC(Z_R14); // Following code pushes a frame without entering a new function. Use current pc as return address.
5694
save_return_pc(); // Saves return pc Z_R14.
5695
push_frame_abi160(0);
5696
call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
5697
// The plain disassembler does not recognize illtrap. It instead displays
5698
// a 32-bit value. Issueing two illtraps assures the disassembler finds
5699
// the proper beginning of the next instruction.
5700
z_illtrap(); // Illegal instruction.
5701
z_illtrap(); // Illegal instruction.
5702
5703
BLOCK_COMMENT(" } stop");
5704
}
5705
5706
// Special version of stop() for code size reduction.
5707
// Reuses the previously generated call sequence, if any.
5708
// Generates the call sequence on its own, if necessary.
5709
// Note: This code will work only in non-relocatable code!
5710
// The relative address of the data elements (arg1, arg2) must not change.
5711
// The reentry point must not move relative to it's users. This prerequisite
5712
// should be given for "hand-written" code, if all chain calls are in the same code blob.
5713
// Generated code must not undergo any transformation, e.g. ShortenBranches, to be safe.
5714
address MacroAssembler::stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation) {
5715
BLOCK_COMMENT(err_msg("stop_chain(%s,%s): %s {", reentry==NULL?"init":"cont", allow_relocation?"reloc ":"static", msg));
5716
5717
// Setup arguments.
5718
if (allow_relocation) {
5719
// Relocatable version (for comparison purposes). Remove after some time.
5720
load_const(Z_ARG1, (void*) stop_types[type%stop_end]);
5721
load_const(Z_ARG2, (void*) msg);
5722
} else {
5723
load_absolute_address(Z_ARG1, (address)stop_types[type%stop_end]);
5724
load_absolute_address(Z_ARG2, (address)msg);
5725
}
5726
if ((reentry != NULL) && RelAddr::is_in_range_of_RelAddr16(reentry, pc())) {
5727
BLOCK_COMMENT("branch to reentry point:");
5728
z_brc(bcondAlways, reentry);
5729
} else {
5730
BLOCK_COMMENT("reentry point:");
5731
reentry = pc(); // Re-entry point for subsequent stop calls.
5732
save_return_pc(); // Saves return pc Z_R14.
5733
push_frame_abi160(0);
5734
if (allow_relocation) {
5735
reentry = NULL; // Prevent reentry if code relocation is allowed.
5736
call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
5737
} else {
5738
call_VM_leaf_static(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2);
5739
}
5740
z_illtrap(); // Illegal instruction as emergency stop, should the above call return.
5741
}
5742
BLOCK_COMMENT(" } stop_chain");
5743
5744
return reentry;
5745
}
5746
5747
// Special version of stop() for code size reduction.
5748
// Assumes constant relative addresses for data and runtime call.
5749
void MacroAssembler::stop_static(int type, const char* msg, int id) {
5750
stop_chain(NULL, type, msg, id, false);
5751
}
5752
5753
void MacroAssembler::stop_subroutine() {
5754
unimplemented("stop_subroutine", 710);
5755
}
5756
5757
// Prints msg to stdout from within generated code..
5758
void MacroAssembler::warn(const char* msg) {
5759
RegisterSaver::save_live_registers(this, RegisterSaver::all_registers, Z_R14);
5760
load_absolute_address(Z_R1, (address) warning);
5761
load_absolute_address(Z_ARG1, (address) msg);
5762
(void) call(Z_R1);
5763
RegisterSaver::restore_live_registers(this, RegisterSaver::all_registers);
5764
}
5765
5766
#ifndef PRODUCT
5767
5768
// Write pattern 0x0101010101010101 in region [low-before, high+after].
5769
void MacroAssembler::zap_from_to(Register low, Register high, Register val, Register addr, int before, int after) {
5770
if (!ZapEmptyStackFields) return;
5771
BLOCK_COMMENT("zap memory region {");
5772
load_const_optimized(val, 0x0101010101010101);
5773
int size = before + after;
5774
if (low == high && size < 5 && size > 0) {
5775
int offset = -before*BytesPerWord;
5776
for (int i = 0; i < size; ++i) {
5777
z_stg(val, Address(low, offset));
5778
offset +=(1*BytesPerWord);
5779
}
5780
} else {
5781
add2reg(addr, -before*BytesPerWord, low);
5782
if (after) {
5783
#ifdef ASSERT
5784
jlong check = after * BytesPerWord;
5785
assert(Immediate::is_simm32(check) && Immediate::is_simm32(-check), "value not encodable !");
5786
#endif
5787
add2reg(high, after * BytesPerWord);
5788
}
5789
NearLabel loop;
5790
bind(loop);
5791
z_stg(val, Address(addr));
5792
add2reg(addr, 8);
5793
compare64_and_branch(addr, high, bcondNotHigh, loop);
5794
if (after) {
5795
add2reg(high, -after * BytesPerWord);
5796
}
5797
}
5798
BLOCK_COMMENT("} zap memory region");
5799
}
5800
#endif // !PRODUCT
5801
5802
SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value, Register _rscratch) {
5803
_masm = masm;
5804
_masm->load_absolute_address(_rscratch, (address)flag_addr);
5805
_masm->load_and_test_int(_rscratch, Address(_rscratch));
5806
if (value) {
5807
_masm->z_brne(_label); // Skip if true, i.e. != 0.
5808
} else {
5809
_masm->z_bre(_label); // Skip if false, i.e. == 0.
5810
}
5811
}
5812
5813
SkipIfEqual::~SkipIfEqual() {
5814
_masm->bind(_label);
5815
}
5816
5817