Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/hotspot/cpu/s390/copy_s390.hpp
40930 views
1
/*
2
* Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
3
* Copyright (c) 2016, 2020 SAP SE. All rights reserved.
4
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5
*
6
* This code is free software; you can redistribute it and/or modify it
7
* under the terms of the GNU General Public License version 2 only, as
8
* published by the Free Software Foundation.
9
*
10
* This code is distributed in the hope that it will be useful, but WITHOUT
11
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13
* version 2 for more details (a copy is included in the LICENSE file that
14
* accompanied this code).
15
*
16
* You should have received a copy of the GNU General Public License version
17
* 2 along with this work; if not, write to the Free Software Foundation,
18
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19
*
20
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21
* or visit www.oracle.com if you need additional information or have any
22
* questions.
23
*
24
*/
25
26
// Major contributions by LS
27
28
#ifndef CPU_S390_COPY_S390_HPP
29
#define CPU_S390_COPY_S390_HPP
30
31
// Inline functions for memory copy and fill.
32
33
// HeapWordSize (the size of class HeapWord) is 8 Bytes (the size of a
34
// pointer variable), since we always run the _LP64 model. As a consequence,
35
// HeapWord* memory ranges are always assumed to be doubleword-aligned,
36
// having a size which is an integer multiple of HeapWordSize.
37
//
38
// Dealing only with doubleword-aligned doubleword units has important
39
// positive performance and data access consequences. Many of the move
40
// instructions perform particularly well under these circumstances.
41
// Data access is "doubleword-concurrent", except for MVC and XC.
42
// Furthermore, data access can be forced to be sequential (MVCL and MVCLE)
43
// by use of the special padding byte 0xb1, where required. For copying,
44
// we use padding byte 0xb0 to prevent the D-cache from being polluted.
45
//
46
// On z/Architecture, gcc optimizes memcpy into a series of MVC instructions.
47
// This is optimal, even if just one HeapWord is copied. However, MVC
48
// copying is not atomic, i.e. not "doubleword concurrent" by definition.
49
//
50
// If the -mmvcle compiler option is specified, memcpy translates into
51
// code such that the entire memory range is copied or preset with just
52
// one MVCLE instruction.
53
//
54
// *to = *from is transformed into a MVC instruction already with -O1.
55
// Thus, for atomic copy operations, (inline) assembler code is required
56
// to guarantee atomic data accesses.
57
//
58
// For large (len >= MVCLEThreshold) chunks of memory, we exploit
59
// special H/W support of z/Architecture:
60
// 1) copy short piece of memory to page-align address(es)
61
// 2) copy largest part (all contained full pages) of memory using mvcle instruction.
62
// z/Architecture processors have special H/W support for page-aligned storage
63
// where len is an int multiple of page size. In that case, up to 4 cache lines are
64
// processed in parallel and L1 cache is not polluted.
65
// 3) copy the remaining piece of memory.
66
//
67
// Measurement classifications:
68
// very rare - <= 10.000 calls AND <= 1.000 usec elapsed
69
// rare - <= 100.000 calls AND <= 10.000 usec elapsed
70
// some - <= 1.000.000 calls AND <= 100.000 usec elapsed
71
// freq - <= 10.000.000 calls AND <= 1.000.000 usec elapsed
72
// very freq - > 10.000.000 calls OR > 1.000.000 usec elapsed
73
74
#undef USE_INLINE_ASM
75
76
static void copy_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
77
if (from > to) {
78
while (count-- > 0) {
79
// Copy forwards
80
*to++ = *from++;
81
}
82
} else {
83
from += count - 1;
84
to += count - 1;
85
while (count-- > 0) {
86
// Copy backwards
87
*to-- = *from--;
88
}
89
}
90
}
91
92
static void copy_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
93
if (from > to) {
94
while (count-- > 0) {
95
// Copy forwards
96
*to++ = *from++;
97
}
98
} else {
99
from += count - 1;
100
to += count - 1;
101
while (count-- > 0) {
102
// Copy backwards
103
*to-- = *from--;
104
}
105
}
106
}
107
108
static bool has_destructive_overlap(const char* from, char* to, size_t byte_count) {
109
return (from < to) && ((to-from) < (ptrdiff_t)byte_count);
110
}
111
112
#ifdef USE_INLINE_ASM
113
114
//--------------------------------------------------------------
115
// Atomic copying. Atomicity is given by the minimum of source
116
// and target alignment. Refer to mail comm with Tim Slegel/IBM.
117
// Only usable for disjoint source and target.
118
//--------------------------------------------------------------
119
#define MOVE8_ATOMIC_4(_to,_from) { \
120
unsigned long toaddr; \
121
unsigned long fromaddr; \
122
asm( \
123
"LG %[toaddr],%[to] \n\t" /* address of to area */ \
124
"LG %[fromaddr],%[from] \n\t" /* address of from area */ \
125
"MVC 0(32,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \
126
: [to] "+Q" (_to) /* outputs */ \
127
, [from] "+Q" (_from) \
128
, [toaddr] "=a" (toaddr) \
129
, [fromaddr] "=a" (fromaddr) \
130
: \
131
: "cc" /* clobbered */ \
132
); \
133
}
134
#define MOVE8_ATOMIC_3(_to,_from) { \
135
unsigned long toaddr; \
136
unsigned long fromaddr; \
137
asm( \
138
"LG %[toaddr],%[to] \n\t" /* address of to area */ \
139
"LG %[fromaddr],%[from] \n\t" /* address of from area */ \
140
"MVC 0(24,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \
141
: [to] "+Q" (_to) /* outputs */ \
142
, [from] "+Q" (_from) \
143
, [toaddr] "=a" (toaddr) \
144
, [fromaddr] "=a" (fromaddr) \
145
: \
146
: "cc" /* clobbered */ \
147
); \
148
}
149
#define MOVE8_ATOMIC_2(_to,_from) { \
150
unsigned long toaddr; \
151
unsigned long fromaddr; \
152
asm( \
153
"LG %[toaddr],%[to] \n\t" /* address of to area */ \
154
"LG %[fromaddr],%[from] \n\t" /* address of from area */ \
155
"MVC 0(16,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \
156
: [to] "+Q" (_to) /* outputs */ \
157
, [from] "+Q" (_from) \
158
, [toaddr] "=a" (toaddr) \
159
, [fromaddr] "=a" (fromaddr) \
160
: \
161
: "cc" /* clobbered */ \
162
); \
163
}
164
#define MOVE8_ATOMIC_1(_to,_from) { \
165
unsigned long toaddr; \
166
unsigned long fromaddr; \
167
asm( \
168
"LG %[toaddr],%[to] \n\t" /* address of to area */ \
169
"LG %[fromaddr],%[from] \n\t" /* address of from area */ \
170
"MVC 0(8,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \
171
: [to] "+Q" (_to) /* outputs */ \
172
, [from] "+Q" (_from) \
173
, [toaddr] "=a" (toaddr) \
174
, [fromaddr] "=a" (fromaddr) \
175
: \
176
: "cc" /* clobbered */ \
177
); \
178
}
179
180
//--------------------------------------------------------------
181
// Atomic copying of 8-byte entities.
182
// Conjoint/disjoint property does not matter. Entities are first
183
// loaded and then stored.
184
// _to and _from must be 8-byte aligned.
185
//--------------------------------------------------------------
186
#define COPY8_ATOMIC_4(_to,_from) { \
187
unsigned long toaddr; \
188
asm( \
189
"LG 3,%[from] \n\t" /* address of from area */ \
190
"LG %[toaddr],%[to] \n\t" /* address of to area */ \
191
"LMG 0,3,0(3) \n\t" /* load data */ \
192
"STMG 0,3,0(%[toaddr]) \n\t" /* store data */ \
193
: [to] "+Q" (_to) /* outputs */ \
194
, [from] "+Q" (_from) /* outputs */ \
195
, [toaddr] "=a" (toaddr) /* inputs */ \
196
: \
197
: "cc", "r0", "r1", "r2", "r3" /* clobbered */ \
198
); \
199
}
200
#define COPY8_ATOMIC_3(_to,_from) { \
201
unsigned long toaddr; \
202
asm( \
203
"LG 2,%[from] \n\t" /* address of from area */ \
204
"LG %[toaddr],%[to] \n\t" /* address of to area */ \
205
"LMG 0,2,0(2) \n\t" /* load data */ \
206
"STMG 0,2,0(%[toaddr]) \n\t" /* store data */ \
207
: [to] "+Q" (_to) /* outputs */ \
208
, [from] "+Q" (_from) /* outputs */ \
209
, [toaddr] "=a" (toaddr) /* inputs */ \
210
: \
211
: "cc", "r0", "r1", "r2" /* clobbered */ \
212
); \
213
}
214
#define COPY8_ATOMIC_2(_to,_from) { \
215
unsigned long toaddr; \
216
asm( \
217
"LG 1,%[from] \n\t" /* address of from area */ \
218
"LG %[toaddr],%[to] \n\t" /* address of to area */ \
219
"LMG 0,1,0(1) \n\t" /* load data */ \
220
"STMG 0,1,0(%[toaddr]) \n\t" /* store data */ \
221
: [to] "+Q" (_to) /* outputs */ \
222
, [from] "+Q" (_from) /* outputs */ \
223
, [toaddr] "=a" (toaddr) /* inputs */ \
224
: \
225
: "cc", "r0", "r1" /* clobbered */ \
226
); \
227
}
228
#define COPY8_ATOMIC_1(_to,_from) { \
229
unsigned long addr; \
230
asm( \
231
"LG %[addr],%[from] \n\t" /* address of from area */ \
232
"LG 0,0(0,%[addr]) \n\t" /* load data */ \
233
"LG %[addr],%[to] \n\t" /* address of to area */ \
234
"STG 0,0(0,%[addr]) \n\t" /* store data */ \
235
: [to] "+Q" (_to) /* outputs */ \
236
, [from] "+Q" (_from) /* outputs */ \
237
, [addr] "=a" (addr) /* inputs */ \
238
: \
239
: "cc", "r0" /* clobbered */ \
240
); \
241
}
242
243
//--------------------------------------------------------------
244
// Atomic copying of 4-byte entities.
245
// Exactly 4 (four) entities are copied.
246
// Conjoint/disjoint property does not matter. Entities are first
247
// loaded and then stored.
248
// _to and _from must be 4-byte aligned.
249
//--------------------------------------------------------------
250
#define COPY4_ATOMIC_4(_to,_from) { \
251
unsigned long toaddr; \
252
asm( \
253
"LG 3,%[from] \n\t" /* address of from area */ \
254
"LG %[toaddr],%[to] \n\t" /* address of to area */ \
255
"LM 0,3,0(3) \n\t" /* load data */ \
256
"STM 0,3,0(%[toaddr]) \n\t" /* store data */ \
257
: [to] "+Q" (_to) /* outputs */ \
258
, [from] "+Q" (_from) /* outputs */ \
259
, [toaddr] "=a" (toaddr) /* inputs */ \
260
: \
261
: "cc", "r0", "r1", "r2", "r3" /* clobbered */ \
262
); \
263
}
264
#define COPY4_ATOMIC_3(_to,_from) { \
265
unsigned long toaddr; \
266
asm( \
267
"LG 2,%[from] \n\t" /* address of from area */ \
268
"LG %[toaddr],%[to] \n\t" /* address of to area */ \
269
"LM 0,2,0(2) \n\t" /* load data */ \
270
"STM 0,2,0(%[toaddr]) \n\t" /* store data */ \
271
: [to] "+Q" (_to) /* outputs */ \
272
, [from] "+Q" (_from) /* outputs */ \
273
, [toaddr] "=a" (toaddr) /* inputs */ \
274
: \
275
: "cc", "r0", "r1", "r2" /* clobbered */ \
276
); \
277
}
278
#define COPY4_ATOMIC_2(_to,_from) { \
279
unsigned long toaddr; \
280
asm( \
281
"LG 1,%[from] \n\t" /* address of from area */ \
282
"LG %[toaddr],%[to] \n\t" /* address of to area */ \
283
"LM 0,1,0(1) \n\t" /* load data */ \
284
"STM 0,1,0(%[toaddr]) \n\t" /* store data */ \
285
: [to] "+Q" (_to) /* outputs */ \
286
, [from] "+Q" (_from) /* outputs */ \
287
, [toaddr] "=a" (toaddr) /* inputs */ \
288
: \
289
: "cc", "r0", "r1" /* clobbered */ \
290
); \
291
}
292
#define COPY4_ATOMIC_1(_to,_from) { \
293
unsigned long addr; \
294
asm( \
295
"LG %[addr],%[from] \n\t" /* address of from area */ \
296
"L 0,0(0,%[addr]) \n\t" /* load data */ \
297
"LG %[addr],%[to] \n\t" /* address of to area */ \
298
"ST 0,0(0,%[addr]) \n\t" /* store data */ \
299
: [to] "+Q" (_to) /* outputs */ \
300
, [from] "+Q" (_from) /* outputs */ \
301
, [addr] "=a" (addr) /* inputs */ \
302
: \
303
: "cc", "r0" /* clobbered */ \
304
); \
305
}
306
307
#if 0 // Waiting for gcc to support EXRL.
308
#define MVC_MEMCOPY(_to,_from,_len) \
309
if (VM_Version::has_ExecuteExtensions()) { \
310
asm("\t" \
311
" LAY 1,-1(0,%[len]) \n\t" /* decr for MVC */ \
312
" EXRL 1,1f \n\t" /* execute MVC instr */ \
313
" BRC 15,2f \n\t" /* skip template */ \
314
"1: MVC 0(%[len],%[to]),0(%[from]) \n\t" \
315
"2: BCR 0,0 \n\t" \
316
: [to] "+Q" (_to) /* outputs */ \
317
, [from] "+Q" (_from) /* outputs */ \
318
: [len] "r" (_len) /* inputs */ \
319
: "cc", "r1" /* clobbered */ \
320
); \
321
} else { \
322
asm("\t" \
323
" LARL 2,3f \n\t" \
324
" LAY 1,-1(0,%[len]) \n\t" /* decr for MVC */ \
325
" EX 1,0(2) \n\t" /* execute MVC instr */ \
326
" BRC 15,4f \n\t" /* skip template */ \
327
"3: MVC 0(%[len],%[to]),0(%[from]) \n\t" \
328
"4: BCR 0,0 \n\t" \
329
: [to] "+Q" (_to) /* outputs */ \
330
, [from] "+Q" (_from) /* outputs */ \
331
: [len] "r" (_len) /* inputs */ \
332
: "cc", "r1", "r2" /* clobbered */ \
333
); \
334
}
335
#else
336
#define MVC_MEMCOPY(_to,_from,_len) \
337
{ unsigned long toaddr; unsigned long tolen; \
338
unsigned long fromaddr; unsigned long target; \
339
asm("\t" \
340
" LTGR %[tolen],%[len] \n\t" /* decr for MVC */ \
341
" BRC 8,2f \n\t" /* do nothing for l=0*/ \
342
" AGHI %[tolen],-1 \n\t" \
343
" LG %[toaddr],%[to] \n\t" \
344
" LG %[fromaddr],%[from] \n\t" \
345
" LARL %[target],1f \n\t" /* addr of MVC instr */ \
346
" EX %[tolen],0(%[target]) \n\t" /* execute MVC instr */ \
347
" BRC 15,2f \n\t" /* skip template */ \
348
"1: MVC 0(1,%[toaddr]),0(%[fromaddr]) \n\t" \
349
"2: BCR 0,0 \n\t" /* nop a branch target*/\
350
: [to] "+Q" (_to) /* outputs */ \
351
, [from] "+Q" (_from) \
352
, [tolen] "=a" (tolen) \
353
, [toaddr] "=a" (toaddr) \
354
, [fromaddr] "=a" (fromaddr) \
355
, [target] "=a" (target) \
356
: [len] "r" (_len) /* inputs */ \
357
: "cc" /* clobbered */ \
358
); \
359
}
360
#endif
361
362
#if 0 // code snippet to be used for debugging
363
/* ASSERT code BEGIN */ \
364
" LARL %[len],5f \n\t" \
365
" LARL %[mta],4f \n\t" \
366
" SLGR %[len],%[mta] \n\t" \
367
" CGHI %[len],16 \n\t" \
368
" BRC 7,9f \n\t" /* block size != 16 */ \
369
\
370
" LARL %[len],1f \n\t" \
371
" SLGR %[len],%[mta] \n\t" \
372
" CGHI %[len],256 \n\t" \
373
" BRC 7,9f \n\t" /* list len != 256 */ \
374
\
375
" LGR 0,0 \n\t" /* artificial SIGILL */ \
376
"9: BRC 7,-2 \n\t" \
377
" LARL %[mta],1f \n\t" /* restore MVC table begin */ \
378
/* ASSERT code END */
379
#endif
380
381
// Optimized copying for data less than 4k
382
// - no destructive overlap
383
// - 0 <= _n_bytes <= 4096
384
// This macro needs to be gcc-compiled with -march=z990. Otherwise, the
385
// LAY instruction is not available.
386
#define MVC_MULTI(_to,_from,_n_bytes) \
387
{ unsigned long toaddr; \
388
unsigned long fromaddr; \
389
unsigned long movetable; \
390
unsigned long len; \
391
asm("\t" \
392
" LTGFR %[len],%[nby] \n\t" \
393
" LG %[ta],%[to] \n\t" /* address of to area */ \
394
" BRC 8,1f \n\t" /* nothing to copy */ \
395
\
396
" NILL %[nby],255 \n\t" /* # bytes mod 256 */ \
397
" LG %[fa],%[from] \n\t" /* address of from area */ \
398
" BRC 8,3f \n\t" /* no rest, skip copying */ \
399
\
400
" LARL %[mta],2f \n\t" /* MVC template addr */ \
401
" AHI %[nby],-1 \n\t" /* adjust for EX MVC */ \
402
\
403
" EX %[nby],0(%[mta]) \n\t" /* only rightmost */ \
404
/* 8 bits of nby used */ \
405
/* Since nby is <= 4096 on entry to this code, we do need */ \
406
/* no zero extension before using it in addr calc. */ \
407
" LA %[fa],1(%[nby],%[fa]) \n\t"/* adjust from addr */ \
408
" LA %[ta],1(%[nby],%[ta]) \n\t"/* adjust to addr */ \
409
\
410
"3: SRAG %[nby],%[len],8 \n\t" /* # cache lines */ \
411
" LARL %[mta],1f \n\t" /* MVC table begin */ \
412
" BRC 8,1f \n\t" /* nothing to copy */ \
413
\
414
/* Insert ASSERT code here if required. */ \
415
\
416
\
417
" LNGFR %[nby],%[nby] \n\t" /* negative offset into */ \
418
" SLLG %[nby],%[nby],4 \n\t" /* MVC table 16-byte blocks */ \
419
" BC 15,0(%[nby],%[mta]) \n\t" /* branch to block #ncl */ \
420
\
421
"2: MVC 0(1,%[ta]),0(%[fa]) \n\t" /* MVC template */ \
422
\
423
"4: MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 4096 == l */ \
424
" LAY %[ta],256(0,%[ta]) \n\t" \
425
" LA %[fa],256(0,%[fa]) \n\t" \
426
"5: MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 3840 <= l < 4096 */ \
427
" LAY %[ta],256(0,%[ta]) \n\t" \
428
" LA %[fa],256(0,%[fa]) \n\t" \
429
" MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 3548 <= l < 3328 */ \
430
" LAY %[ta],256(0,%[ta]) \n\t" \
431
" LA %[fa],256(0,%[fa]) \n\t" \
432
" MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 3328 <= l < 3328 */ \
433
" LAY %[ta],256(0,%[ta]) \n\t" \
434
" LA %[fa],256(0,%[fa]) \n\t" \
435
" MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 3072 <= l < 3328 */ \
436
" LAY %[ta],256(0,%[ta]) \n\t" \
437
" LA %[fa],256(0,%[fa]) \n\t" \
438
" MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 2816 <= l < 3072 */ \
439
" LAY %[ta],256(0,%[ta]) \n\t" \
440
" LA %[fa],256(0,%[fa]) \n\t" \
441
" MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 2560 <= l < 2816 */ \
442
" LAY %[ta],256(0,%[ta]) \n\t" \
443
" LA %[fa],256(0,%[fa]) \n\t" \
444
" MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 2304 <= l < 2560 */ \
445
" LAY %[ta],256(0,%[ta]) \n\t" \
446
" LA %[fa],256(0,%[fa]) \n\t" \
447
" MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 2048 <= l < 2304 */ \
448
" LAY %[ta],256(0,%[ta]) \n\t" \
449
" LA %[fa],256(0,%[fa]) \n\t" \
450
" MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 1792 <= l < 2048 */ \
451
" LAY %[ta],256(0,%[ta]) \n\t" \
452
" LA %[fa],256(0,%[fa]) \n\t" \
453
" MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 1536 <= l < 1792 */ \
454
" LAY %[ta],256(0,%[ta]) \n\t" \
455
" LA %[fa],256(0,%[fa]) \n\t" \
456
" MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 1280 <= l < 1536 */ \
457
" LAY %[ta],256(0,%[ta]) \n\t" \
458
" LA %[fa],256(0,%[fa]) \n\t" \
459
" MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 1024 <= l < 1280 */ \
460
" LAY %[ta],256(0,%[ta]) \n\t" \
461
" LA %[fa],256(0,%[fa]) \n\t" \
462
" MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 768 <= l < 1024 */ \
463
" LAY %[ta],256(0,%[ta]) \n\t" \
464
" LA %[fa],256(0,%[fa]) \n\t" \
465
" MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 512 <= l < 768 */ \
466
" LAY %[ta],256(0,%[ta]) \n\t" \
467
" LA %[fa],256(0,%[fa]) \n\t" \
468
" MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 256 <= l < 512 */ \
469
" LAY %[ta],256(0,%[ta]) \n\t" \
470
" LA %[fa],256(0,%[fa]) \n\t" \
471
"1: BCR 0,0 \n\t" /* nop as branch target */ \
472
: [to] "+Q" (_to) /* outputs */ \
473
, [from] "+Q" (_from) \
474
, [ta] "=a" (toaddr) \
475
, [fa] "=a" (fromaddr) \
476
, [mta] "=a" (movetable) \
477
, [nby] "+a" (_n_bytes) \
478
, [len] "=a" (len) \
479
: \
480
: "cc" /* clobbered */ \
481
); \
482
}
483
484
#define MVCLE_MEMCOPY(_to,_from,_len) \
485
asm( \
486
" LG 0,%[to] \n\t" /* address of to area */ \
487
" LG 2,%[from] \n\t" /* address of from area */ \
488
" LGR 1,%[len] \n\t" /* len of to area */ \
489
" LGR 3,%[len] \n\t" /* len of from area */ \
490
"1: MVCLE 0,2,176 \n\t" /* copy storage, bypass cache (0xb0) */ \
491
" BRC 1,1b \n\t" /* retry if interrupted */ \
492
: [to] "+Q" (_to) /* outputs */ \
493
, [from] "+Q" (_from) /* outputs */ \
494
: [len] "r" (_len) /* inputs */ \
495
: "cc", "r0", "r1", "r2", "r3" /* clobbered */ \
496
);
497
498
#define MVCLE_MEMINIT(_to,_val,_len) \
499
asm( \
500
" LG 0,%[to] \n\t" /* address of to area */ \
501
" LGR 1,%[len] \n\t" /* len of to area */ \
502
" XGR 3,3 \n\t" /* from area len = 0 */ \
503
"1: MVCLE 0,2,0(%[val]) \n\t" /* init storage */ \
504
" BRC 1,1b \n\t" /* retry if interrupted */ \
505
: [to] "+Q" (_to) /* outputs */ \
506
: [len] "r" (_len) /* inputs */ \
507
, [val] "r" (_val) /* inputs */ \
508
: "cc", "r0", "r1", "r3" /* clobbered */ \
509
);
510
#define MVCLE_MEMZERO(_to,_len) \
511
asm( \
512
" LG 0,%[to] \n\t" /* address of to area */ \
513
" LGR 1,%[len] \n\t" /* len of to area */ \
514
" XGR 3,3 \n\t" /* from area len = 0 */ \
515
"1: MVCLE 0,2,0 \n\t" /* clear storage */ \
516
" BRC 1,1b \n\t" /* retry if interrupted */ \
517
: [to] "+Q" (_to) /* outputs */ \
518
: [len] "r" (_len) /* inputs */ \
519
: "cc", "r0", "r1", "r3" /* clobbered */ \
520
);
521
522
// Clear a stretch of memory, 0 <= _len <= 256.
523
// There is no alignment prereq.
524
// There is no test for len out of range specified above.
525
#define XC_MEMZERO_256(_to,_len) \
526
{ unsigned long toaddr; unsigned long tolen; \
527
unsigned long target; \
528
asm("\t" \
529
" LTGR %[tolen],%[len] \n\t" /* decr for MVC */ \
530
" BRC 8,2f \n\t" /* do nothing for l=0*/ \
531
" AGHI %[tolen],-1 \n\t" /* adjust for EX XC */ \
532
" LARL %[target],1f \n\t" /* addr of XC instr */ \
533
" LG %[toaddr],%[to] \n\t" /* addr of data area */ \
534
" EX %[tolen],0(%[target]) \n\t" /* execute MVC instr */ \
535
" BRC 15,2f \n\t" /* skip template */ \
536
"1: XC 0(1,%[toaddr]),0(%[toaddr]) \n\t" \
537
"2: BCR 0,0 \n\t" /* nop a branch target*/\
538
: [to] "+Q" (_to) /* outputs */ \
539
, [tolen] "=a" (tolen) \
540
, [toaddr] "=a" (toaddr) \
541
, [target] "=a" (target) \
542
: [len] "r" (_len) /* inputs */ \
543
: "cc" /* clobbered */ \
544
); \
545
}
546
547
// Clear a stretch of memory, 256 < _len.
548
// XC_MEMZERO_256 may be used to clear shorter areas.
549
//
550
// The code
551
// - first zeroes a few bytes to align on a HeapWord.
552
// This step is currently inactive because all calls seem
553
// to have their data aligned on HeapWord boundaries.
554
// - then zeroes a few HeapWords to align on a cache line.
555
// - then zeroes entire cache lines in a loop.
556
// - then zeroes the remaining (partial) cache line.
557
#if 1
558
#define XC_MEMZERO_ANY(_to,_len) \
559
{ unsigned long toaddr; unsigned long tolen; \
560
unsigned long len8; unsigned long len256; \
561
unsigned long target; unsigned long lenx; \
562
asm("\t" \
563
" LTGR %[tolen],%[len] \n\t" /* */ \
564
" BRC 8,2f \n\t" /* do nothing for l=0*/ \
565
" LG %[toaddr],%[to] \n\t" /* addr of data area */ \
566
" LARL %[target],1f \n\t" /* addr of XC instr */ \
567
" " \
568
" LCGR %[len256],%[toaddr] \n\t" /* cache line alignment */\
569
" NILL %[len256],0xff \n\t" \
570
" BRC 8,4f \n\t" /* already aligned */ \
571
" NILH %[len256],0x00 \n\t" /* zero extend */ \
572
" LLGFR %[len256],%[len256] \n\t" \
573
" LAY %[lenx],-1(,%[len256]) \n\t" \
574
" EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \
575
" LA %[toaddr],0(%[len256],%[toaddr]) \n\t" \
576
" SGR %[tolen],%[len256] \n\t" /* adjust len */ \
577
" " \
578
"4: SRAG %[lenx],%[tolen],8 \n\t" /* # cache lines */ \
579
" BRC 8,6f \n\t" /* no full cache lines */ \
580
"5: XC 0(256,%[toaddr]),0(%[toaddr]) \n\t" \
581
" LA %[toaddr],256(,%[toaddr]) \n\t" \
582
" BRCTG %[lenx],5b \n\t" /* iterate */ \
583
" " \
584
"6: NILL %[tolen],0xff \n\t" /* leftover bytes */ \
585
" BRC 8,2f \n\t" /* done if none */ \
586
" LAY %[lenx],-1(,%[tolen]) \n\t" \
587
" EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \
588
" BRC 15,2f \n\t" /* skip template */ \
589
" " \
590
"1: XC 0(1,%[toaddr]),0(%[toaddr]) \n\t" \
591
"2: BCR 0,0 \n\t" /* nop a branch target */ \
592
: [to] "+Q" (_to) /* outputs */ \
593
, [lenx] "=a" (lenx) \
594
, [len256] "=a" (len256) \
595
, [tolen] "=a" (tolen) \
596
, [toaddr] "=a" (toaddr) \
597
, [target] "=a" (target) \
598
: [len] "r" (_len) /* inputs */ \
599
: "cc" /* clobbered */ \
600
); \
601
}
602
#else
603
#define XC_MEMZERO_ANY(_to,_len) \
604
{ unsigned long toaddr; unsigned long tolen; \
605
unsigned long len8; unsigned long len256; \
606
unsigned long target; unsigned long lenx; \
607
asm("\t" \
608
" LTGR %[tolen],%[len] \n\t" /* */ \
609
" BRC 8,2f \n\t" /* do nothing for l=0*/ \
610
" LG %[toaddr],%[to] \n\t" /* addr of data area */ \
611
" LARL %[target],1f \n\t" /* addr of XC instr */ \
612
" " \
613
" LCGR %[len8],%[toaddr] \n\t" /* HeapWord alignment */ \
614
" NILL %[len8],0x07 \n\t" \
615
" BRC 8,3f \n\t" /* already aligned */ \
616
" NILH %[len8],0x00 \n\t" /* zero extend */ \
617
" LLGFR %[len8],%[len8] \n\t" \
618
" LAY %[lenx],-1(,%[len8]) \n\t" \
619
" EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \
620
" LA %[toaddr],0(%[len8],%[toaddr]) \n\t" \
621
" SGR %[tolen],%[len8] \n\t" /* adjust len */ \
622
" " \
623
"3: LCGR %[len256],%[toaddr] \n\t" /* cache line alignment */\
624
" NILL %[len256],0xff \n\t" \
625
" BRC 8,4f \n\t" /* already aligned */ \
626
" NILH %[len256],0x00 \n\t" /* zero extend */ \
627
" LLGFR %[len256],%[len256] \n\t" \
628
" LAY %[lenx],-1(,%[len256]) \n\t" \
629
" EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \
630
" LA %[toaddr],0(%[len256],%[toaddr]) \n\t" \
631
" SGR %[tolen],%[len256] \n\t" /* adjust len */ \
632
" " \
633
"4: SRAG %[lenx],%[tolen],8 \n\t" /* # cache lines */ \
634
" BRC 8,6f \n\t" /* no full cache lines */ \
635
"5: XC 0(256,%[toaddr]),0(%[toaddr]) \n\t" \
636
" LA %[toaddr],256(,%[toaddr]) \n\t" \
637
" BRCTG %[lenx],5b \n\t" /* iterate */ \
638
" " \
639
"6: NILL %[tolen],0xff \n\t" /* leftover bytes */ \
640
" BRC 8,2f \n\t" /* done if none */ \
641
" LAY %[lenx],-1(,%[tolen]) \n\t" \
642
" EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \
643
" BRC 15,2f \n\t" /* skip template */ \
644
" " \
645
"1: XC 0(1,%[toaddr]),0(%[toaddr]) \n\t" \
646
"2: BCR 0,0 \n\t" /* nop a branch target */ \
647
: [to] "+Q" (_to) /* outputs */ \
648
, [lenx] "=a" (lenx) \
649
, [len8] "=a" (len8) \
650
, [len256] "=a" (len256) \
651
, [tolen] "=a" (tolen) \
652
, [toaddr] "=a" (toaddr) \
653
, [target] "=a" (target) \
654
: [len] "r" (_len) /* inputs */ \
655
: "cc" /* clobbered */ \
656
); \
657
}
658
#endif
659
#endif // USE_INLINE_ASM
660
661
//*************************************//
662
// D I S J O I N T C O P Y I N G //
663
//*************************************//
664
665
static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
666
// JVM2008: very frequent, some tests frequent.
667
668
// Copy HeapWord (=DW) aligned storage. Use MVCLE in inline-asm code.
669
// MVCLE guarantees DW concurrent (i.e. atomic) accesses if both the addresses of the operands
670
// are DW aligned and the length is an integer multiple of a DW. Should always be true here.
671
//
672
// No special exploit needed. H/W discovers suitable situations itself.
673
//
674
// For large chunks of memory, exploit special H/W support of z/Architecture:
675
// 1) copy short piece of memory to page-align address(es)
676
// 2) copy largest part (all contained full pages) of memory using mvcle instruction.
677
// z/Architecture processors have special H/W support for page-aligned storage
678
// where len is an int multiple of page size. In that case, up to 4 cache lines are
679
// processed in parallel and L1 cache is not polluted.
680
// 3) copy the remaining piece of memory.
681
//
682
#ifdef USE_INLINE_ASM
683
jbyte* to_bytes = (jbyte*)to;
684
jbyte* from_bytes = (jbyte*)from;
685
size_t len_bytes = count*HeapWordSize;
686
687
// Optimized copying for data less than 4k
688
switch (count) {
689
case 0: return;
690
case 1: MOVE8_ATOMIC_1(to,from)
691
return;
692
case 2: MOVE8_ATOMIC_2(to,from)
693
return;
694
// case 3: MOVE8_ATOMIC_3(to,from)
695
// return;
696
// case 4: MOVE8_ATOMIC_4(to,from)
697
// return;
698
default:
699
if (len_bytes <= 4096) {
700
MVC_MULTI(to,from,len_bytes)
701
return;
702
}
703
// else
704
MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
705
return;
706
}
707
#else
708
// Fallback code.
709
switch (count) {
710
case 0:
711
return;
712
713
case 1:
714
*to = *from;
715
return;
716
717
case 2:
718
*to++ = *from++;
719
*to = *from;
720
return;
721
722
case 3:
723
*to++ = *from++;
724
*to++ = *from++;
725
*to = *from;
726
return;
727
728
case 4:
729
*to++ = *from++;
730
*to++ = *from++;
731
*to++ = *from++;
732
*to = *from;
733
return;
734
735
default:
736
while (count-- > 0)
737
*(to++) = *(from++);
738
return;
739
}
740
#endif
741
}
742
743
static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
744
// JVM2008: < 4k calls.
745
assert(((((size_t)from) & 0x07L) | (((size_t)to) & 0x07L)) == 0, "No atomic copy w/o aligned data");
746
pd_aligned_disjoint_words(from, to, count); // Rare calls -> just delegate.
747
}
748
749
static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
750
// JVM2008: very rare.
751
pd_aligned_disjoint_words(from, to, count); // Rare calls -> just delegate.
752
}
753
754
755
//*************************************//
756
// C O N J O I N T C O P Y I N G //
757
//*************************************//
758
759
static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
760
// JVM2008: between some and lower end of frequent.
761
762
#ifdef USE_INLINE_ASM
763
size_t count_in = count;
764
if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerLong)) {
765
switch (count_in) {
766
case 4: COPY8_ATOMIC_4(to,from)
767
return;
768
case 3: COPY8_ATOMIC_3(to,from)
769
return;
770
case 2: COPY8_ATOMIC_2(to,from)
771
return;
772
case 1: COPY8_ATOMIC_1(to,from)
773
return;
774
case 0: return;
775
default:
776
from += count_in;
777
to += count_in;
778
while (count_in-- > 0)
779
*(--to) = *(--from); // Copy backwards, areas overlap destructively.
780
return;
781
}
782
}
783
// else
784
jbyte* to_bytes = (jbyte*)to;
785
jbyte* from_bytes = (jbyte*)from;
786
size_t len_bytes = count_in*BytesPerLong;
787
MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
788
return;
789
#else
790
// Fallback code.
791
if (has_destructive_overlap((char*)from, (char*)to, count*BytesPerLong)) {
792
HeapWord t1, t2, t3;
793
switch (count) {
794
case 0:
795
return;
796
797
case 1:
798
*to = *from;
799
return;
800
801
case 2:
802
t1 = *(from+1);
803
*to = *from;
804
*(to+1) = t1;
805
return;
806
807
case 3:
808
t1 = *(from+1);
809
t2 = *(from+2);
810
*to = *from;
811
*(to+1) = t1;
812
*(to+2) = t2;
813
return;
814
815
case 4:
816
t1 = *(from+1);
817
t2 = *(from+2);
818
t3 = *(from+3);
819
*to = *from;
820
*(to+1) = t1;
821
*(to+2) = t2;
822
*(to+3) = t3;
823
return;
824
825
default:
826
from += count;
827
to += count;
828
while (count-- > 0)
829
*(--to) = *(--from); // Copy backwards, areas overlap destructively.
830
return;
831
}
832
}
833
// else
834
// Just delegate. HeapWords are optimally aligned anyway.
835
pd_aligned_disjoint_words(from, to, count);
836
#endif
837
}
838
839
static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
840
841
// Just delegate. HeapWords are optimally aligned anyway.
842
pd_aligned_conjoint_words(from, to, count);
843
}
844
845
static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
846
847
#ifdef USE_INLINE_ASM
848
size_t count_in = count;
849
if (has_destructive_overlap((char*)from, (char*)to, count_in))
850
(void)memmove(to, from, count_in);
851
else {
852
jbyte* to_bytes = (jbyte*)to;
853
jbyte* from_bytes = (jbyte*)from;
854
size_t len_bytes = count_in;
855
MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
856
}
857
#else
858
if (has_destructive_overlap((char*)from, (char*)to, count))
859
(void)memmove(to, from, count);
860
else
861
(void)memcpy(to, from, count);
862
#endif
863
}
864
865
//**************************************************//
866
// C O N J O I N T A T O M I C C O P Y I N G //
867
//**************************************************//
868
869
static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
870
// Call arraycopy stubs to do the job.
871
pd_conjoint_bytes(from, to, count); // bytes are always accessed atomically.
872
}
873
874
static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
875
876
#ifdef USE_INLINE_ASM
877
size_t count_in = count;
878
if (has_destructive_overlap((const char*)from, (char*)to, count_in*BytesPerShort)) {
879
// Use optimizations from shared code where no z-specific optimization exists.
880
copy_conjoint_jshorts_atomic(from, to, count);
881
} else {
882
jbyte* to_bytes = (jbyte*)to;
883
jbyte* from_bytes = (jbyte*)from;
884
size_t len_bytes = count_in*BytesPerShort;
885
MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
886
}
887
#else
888
// Use optimizations from shared code where no z-specific optimization exists.
889
copy_conjoint_jshorts_atomic(from, to, count);
890
#endif
891
}
892
893
static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
894
895
#ifdef USE_INLINE_ASM
896
size_t count_in = count;
897
if (has_destructive_overlap((const char*)from, (char*)to, count_in*BytesPerInt)) {
898
switch (count_in) {
899
case 4: COPY4_ATOMIC_4(to,from)
900
return;
901
case 3: COPY4_ATOMIC_3(to,from)
902
return;
903
case 2: COPY4_ATOMIC_2(to,from)
904
return;
905
case 1: COPY4_ATOMIC_1(to,from)
906
return;
907
case 0: return;
908
default:
909
// Use optimizations from shared code where no z-specific optimization exists.
910
copy_conjoint_jints_atomic(from, to, count_in);
911
return;
912
}
913
}
914
// else
915
jbyte* to_bytes = (jbyte*)to;
916
jbyte* from_bytes = (jbyte*)from;
917
size_t len_bytes = count_in*BytesPerInt;
918
MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
919
#else
920
// Use optimizations from shared code where no z-specific optimization exists.
921
copy_conjoint_jints_atomic(from, to, count);
922
#endif
923
}
924
925
static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
926
927
#ifdef USE_INLINE_ASM
928
size_t count_in = count;
929
if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerLong)) {
930
switch (count_in) {
931
case 4: COPY8_ATOMIC_4(to,from) return;
932
case 3: COPY8_ATOMIC_3(to,from) return;
933
case 2: COPY8_ATOMIC_2(to,from) return;
934
case 1: COPY8_ATOMIC_1(to,from) return;
935
case 0: return;
936
default:
937
from += count_in;
938
to += count_in;
939
while (count_in-- > 0) { *(--to) = *(--from); } // Copy backwards, areas overlap destructively.
940
return;
941
}
942
}
943
// else {
944
jbyte* to_bytes = (jbyte*)to;
945
jbyte* from_bytes = (jbyte*)from;
946
size_t len_bytes = count_in*BytesPerLong;
947
MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
948
#else
949
size_t count_in = count;
950
if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerLong)) {
951
if (count_in < 8) {
952
from += count_in;
953
to += count_in;
954
while (count_in-- > 0)
955
*(--to) = *(--from); // Copy backwards, areas overlap destructively.
956
return;
957
}
958
// else {
959
from += count_in-1;
960
to += count_in-1;
961
if (count_in&0x01) {
962
*(to--) = *(from--);
963
count_in--;
964
}
965
for (; count_in>0; count_in-=2) {
966
*to = *from;
967
*(to-1) = *(from-1);
968
to -= 2;
969
from -= 2;
970
}
971
}
972
else
973
pd_aligned_disjoint_words((const HeapWord*)from, (HeapWord*)to, count_in); // rare calls -> just delegate.
974
#endif
975
}
976
977
static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
978
979
#ifdef USE_INLINE_ASM
980
size_t count_in = count;
981
if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerOop)) {
982
switch (count_in) {
983
case 4: COPY8_ATOMIC_4(to,from) return;
984
case 3: COPY8_ATOMIC_3(to,from) return;
985
case 2: COPY8_ATOMIC_2(to,from) return;
986
case 1: COPY8_ATOMIC_1(to,from) return;
987
case 0: return;
988
default:
989
from += count_in;
990
to += count_in;
991
while (count_in-- > 0) { *(--to) = *(--from); } // Copy backwards, areas overlap destructively.
992
return;
993
}
994
}
995
// else
996
jbyte* to_bytes = (jbyte*)to;
997
jbyte* from_bytes = (jbyte*)from;
998
size_t len_bytes = count_in*BytesPerOop;
999
MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes)
1000
#else
1001
size_t count_in = count;
1002
if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerOop)) {
1003
from += count_in;
1004
to += count_in;
1005
while (count_in-- > 0) *(--to) = *(--from); // Copy backwards, areas overlap destructively.
1006
return;
1007
}
1008
// else
1009
pd_aligned_disjoint_words((HeapWord*)from, (HeapWord*)to, count_in); // rare calls -> just delegate.
1010
return;
1011
#endif
1012
}
1013
1014
static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
1015
pd_conjoint_bytes_atomic(from, to, count);
1016
}
1017
1018
static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
1019
pd_conjoint_jshorts_atomic((const jshort*)from, (jshort*)to, count);
1020
}
1021
1022
static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
1023
pd_conjoint_jints_atomic((const jint*)from, (jint*)to, count);
1024
}
1025
1026
static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
1027
pd_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
1028
}
1029
1030
static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
1031
pd_conjoint_oops_atomic((const oop*)from, (oop*)to, count);
1032
}
1033
1034
//**********************************************//
1035
// M E M O R Y I N I T I A L I S A T I O N //
1036
//**********************************************//
1037
1038
static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
1039
// JVM2008: very rare, only in some tests.
1040
#ifdef USE_INLINE_ASM
1041
// Initialize storage to a given value. Use memset instead of copy loop.
1042
// For large chunks of memory, exploit special H/W support of z/Architecture:
1043
// 1) init short piece of memory to page-align address
1044
// 2) init largest part (all contained full pages) of memory using mvcle instruction.
1045
// z/Architecture processors have special H/W support for page-aligned storage
1046
// where len is an int multiple of page size. In that case, up to 4 cache lines are
1047
// processed in parallel and L1 cache is not polluted.
1048
// 3) init the remaining piece of memory.
1049
// Atomicity cannot really be an issue since gcc implements the loop body with XC anyway.
1050
// If atomicity is a problem, we have to prevent gcc optimization. Best workaround: inline asm.
1051
1052
jbyte* to_bytes = (jbyte*)to;
1053
size_t len_bytes = count;
1054
1055
MVCLE_MEMINIT(to_bytes, value, len_bytes)
1056
1057
#else
1058
// Memset does the best job possible: loop over 256-byte MVCs, with
1059
// the last MVC EXecuted. With the -mmvcle option, initialization
1060
// is done using MVCLE -> slight advantage for large areas.
1061
(void)memset(to, value, count);
1062
#endif
1063
}
1064
1065
static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
1066
// Occurs in dbg builds only. Usually memory poisoning with BAADBABE, DEADBEEF, etc.
1067
// JVM2008: < 4k calls.
1068
if (value == 0) {
1069
pd_zero_to_words(tohw, count);
1070
return;
1071
}
1072
if (value == ~(juint)(0)) {
1073
pd_fill_to_bytes(tohw, count*HeapWordSize, (jubyte)(~(juint)(0)));
1074
return;
1075
}
1076
julong* to = (julong*) tohw;
1077
julong v = ((julong) value << 32) | value;
1078
while (count-- > 0) {
1079
*to++ = v;
1080
}
1081
}
1082
1083
static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
1084
// JVM2008: very frequent, but virtually all calls are with value == 0.
1085
pd_fill_to_words(tohw, count, value);
1086
}
1087
1088
//**********************************//
1089
// M E M O R Y C L E A R I N G //
1090
//**********************************//
1091
1092
// Delegate to pd_zero_to_bytes. It also works HeapWord-atomic.
1093
// Distinguish between simple and large zero_to_words.
1094
static void pd_zero_to_words(HeapWord* tohw, size_t count) {
1095
pd_zero_to_bytes(tohw, count*HeapWordSize);
1096
}
1097
1098
static void pd_zero_to_bytes(void* to, size_t count) {
1099
// JVM2008: some calls (generally), some tests frequent
1100
#ifdef USE_INLINE_ASM
1101
// Even zero_to_bytes() requires HeapWord-atomic, or, at least, sequential
1102
// zeroing of the memory. MVCLE is not fit for that job:
1103
// "As observed by other CPUs and by the channel subsystem,
1104
// that portion of the first operand which is filled
1105
// with the padding byte is not necessarily stored into in
1106
// a left-to-right direction and may appear to be stored
1107
// into more than once."
1108
// Therefore, implementation was changed to use (multiple) XC instructions.
1109
1110
const long line_size = 256;
1111
jbyte* to_bytes = (jbyte*)to;
1112
size_t len_bytes = count;
1113
1114
if (len_bytes <= line_size) {
1115
XC_MEMZERO_256(to_bytes, len_bytes);
1116
} else {
1117
XC_MEMZERO_ANY(to_bytes, len_bytes);
1118
}
1119
1120
#else
1121
// Memset does the best job possible: loop over 256-byte MVCs, with
1122
// the last MVC EXecuted. With the -mmvcle option, initialization
1123
// is done using MVCLE -> slight advantage for large areas.
1124
(void)memset(to, 0, count);
1125
#endif
1126
}
1127
1128
#endif // CPU_S390_COPY_S390_HPP
1129
1130