Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/jdk17u
Path: blob/master/src/hotspot/cpu/x86/assembler_x86.hpp
64441 views
1
/*
2
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation.
8
*
9
* This code is distributed in the hope that it will be useful, but WITHOUT
10
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12
* version 2 for more details (a copy is included in the LICENSE file that
13
* accompanied this code).
14
*
15
* You should have received a copy of the GNU General Public License version
16
* 2 along with this work; if not, write to the Free Software Foundation,
17
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
*
19
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
* or visit www.oracle.com if you need additional information or have any
21
* questions.
22
*
23
*/
24
25
#ifndef CPU_X86_ASSEMBLER_X86_HPP
26
#define CPU_X86_ASSEMBLER_X86_HPP
27
28
#include "asm/register.hpp"
29
#include "utilities/powerOfTwo.hpp"
30
31
class BiasedLockingCounters;
32
33
// Contains all the definitions needed for x86 assembly code generation.
34
35
// Calling convention
36
class Argument {
37
public:
38
enum {
39
#ifdef _LP64
40
#ifdef _WIN64
41
n_int_register_parameters_c = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
42
n_float_register_parameters_c = 4, // xmm0 - xmm3 (c_farg0, c_farg1, ... )
43
n_int_register_returns_c = 1, // rax
44
n_float_register_returns_c = 1, // xmm0
45
#else
46
n_int_register_parameters_c = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
47
n_float_register_parameters_c = 8, // xmm0 - xmm7 (c_farg0, c_farg1, ... )
48
n_int_register_returns_c = 2, // rax, rdx
49
n_float_register_returns_c = 2, // xmm0, xmm1
50
#endif // _WIN64
51
n_int_register_parameters_j = 6, // j_rarg0, j_rarg1, ...
52
n_float_register_parameters_j = 8 // j_farg0, j_farg1, ...
53
#else
54
n_register_parameters = 0 // 0 registers used to pass arguments
55
#endif // _LP64
56
};
57
};
58
59
60
#ifdef _LP64
61
// Symbolically name the register arguments used by the c calling convention.
62
// Windows is different from linux/solaris. So much for standards...
63
64
#ifdef _WIN64
65
66
REGISTER_DECLARATION(Register, c_rarg0, rcx);
67
REGISTER_DECLARATION(Register, c_rarg1, rdx);
68
REGISTER_DECLARATION(Register, c_rarg2, r8);
69
REGISTER_DECLARATION(Register, c_rarg3, r9);
70
71
REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
72
REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
73
REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
74
REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
75
76
#else
77
78
REGISTER_DECLARATION(Register, c_rarg0, rdi);
79
REGISTER_DECLARATION(Register, c_rarg1, rsi);
80
REGISTER_DECLARATION(Register, c_rarg2, rdx);
81
REGISTER_DECLARATION(Register, c_rarg3, rcx);
82
REGISTER_DECLARATION(Register, c_rarg4, r8);
83
REGISTER_DECLARATION(Register, c_rarg5, r9);
84
85
REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
86
REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
87
REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
88
REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
89
REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4);
90
REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5);
91
REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6);
92
REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7);
93
94
#endif // _WIN64
95
96
// Symbolically name the register arguments used by the Java calling convention.
97
// We have control over the convention for java so we can do what we please.
98
// What pleases us is to offset the java calling convention so that when
99
// we call a suitable jni method the arguments are lined up and we don't
100
// have to do little shuffling. A suitable jni method is non-static and a
101
// small number of arguments (two fewer args on windows)
102
//
103
// |-------------------------------------------------------|
104
// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 |
105
// |-------------------------------------------------------|
106
// | rcx rdx r8 r9 rdi* rsi* | windows (* not a c_rarg)
107
// | rdi rsi rdx rcx r8 r9 | solaris/linux
108
// |-------------------------------------------------------|
109
// | j_rarg5 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 |
110
// |-------------------------------------------------------|
111
112
REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
113
REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
114
REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
115
// Windows runs out of register args here
116
#ifdef _WIN64
117
REGISTER_DECLARATION(Register, j_rarg3, rdi);
118
REGISTER_DECLARATION(Register, j_rarg4, rsi);
119
#else
120
REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
121
REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
122
#endif /* _WIN64 */
123
REGISTER_DECLARATION(Register, j_rarg5, c_rarg0);
124
125
REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0);
126
REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1);
127
REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2);
128
REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3);
129
REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4);
130
REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5);
131
REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6);
132
REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7);
133
134
REGISTER_DECLARATION(Register, rscratch1, r10); // volatile
135
REGISTER_DECLARATION(Register, rscratch2, r11); // volatile
136
137
REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved
138
REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved
139
140
#else
141
// rscratch1 will apear in 32bit code that is dead but of course must compile
142
// Using noreg ensures if the dead code is incorrectly live and executed it
143
// will cause an assertion failure
144
#define rscratch1 noreg
145
#define rscratch2 noreg
146
147
#endif // _LP64
148
149
// JSR 292
150
// On x86, the SP does not have to be saved when invoking method handle intrinsics
151
// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
152
REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg);
153
154
// Address is an abstraction used to represent a memory location
155
// using any of the amd64 addressing modes with one object.
156
//
157
// Note: A register location is represented via a Register, not
158
// via an address for efficiency & simplicity reasons.
159
160
class ArrayAddress;
161
162
class Address {
163
public:
164
enum ScaleFactor {
165
no_scale = -1,
166
times_1 = 0,
167
times_2 = 1,
168
times_4 = 2,
169
times_8 = 3,
170
times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4)
171
};
172
static ScaleFactor times(int size) {
173
assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
174
if (size == 8) return times_8;
175
if (size == 4) return times_4;
176
if (size == 2) return times_2;
177
return times_1;
178
}
179
static int scale_size(ScaleFactor scale) {
180
assert(scale != no_scale, "");
181
assert(((1 << (int)times_1) == 1 &&
182
(1 << (int)times_2) == 2 &&
183
(1 << (int)times_4) == 4 &&
184
(1 << (int)times_8) == 8), "");
185
return (1 << (int)scale);
186
}
187
188
private:
189
Register _base;
190
Register _index;
191
XMMRegister _xmmindex;
192
ScaleFactor _scale;
193
int _disp;
194
bool _isxmmindex;
195
RelocationHolder _rspec;
196
197
// Easily misused constructors make them private
198
// %%% can we make these go away?
199
NOT_LP64(Address(address loc, RelocationHolder spec);)
200
Address(int disp, address loc, relocInfo::relocType rtype);
201
Address(int disp, address loc, RelocationHolder spec);
202
203
public:
204
205
int disp() { return _disp; }
206
// creation
207
Address()
208
: _base(noreg),
209
_index(noreg),
210
_xmmindex(xnoreg),
211
_scale(no_scale),
212
_disp(0),
213
_isxmmindex(false){
214
}
215
216
// No default displacement otherwise Register can be implicitly
217
// converted to 0(Register) which is quite a different animal.
218
219
Address(Register base, int disp)
220
: _base(base),
221
_index(noreg),
222
_xmmindex(xnoreg),
223
_scale(no_scale),
224
_disp(disp),
225
_isxmmindex(false){
226
}
227
228
Address(Register base, Register index, ScaleFactor scale, int disp = 0)
229
: _base (base),
230
_index(index),
231
_xmmindex(xnoreg),
232
_scale(scale),
233
_disp (disp),
234
_isxmmindex(false) {
235
assert(!index->is_valid() == (scale == Address::no_scale),
236
"inconsistent address");
237
}
238
239
Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
240
: _base (base),
241
_index(index.register_or_noreg()),
242
_xmmindex(xnoreg),
243
_scale(scale),
244
_disp (disp + (index.constant_or_zero() * scale_size(scale))),
245
_isxmmindex(false){
246
if (!index.is_register()) scale = Address::no_scale;
247
assert(!_index->is_valid() == (scale == Address::no_scale),
248
"inconsistent address");
249
}
250
251
Address(Register base, XMMRegister index, ScaleFactor scale, int disp = 0)
252
: _base (base),
253
_index(noreg),
254
_xmmindex(index),
255
_scale(scale),
256
_disp(disp),
257
_isxmmindex(true) {
258
assert(!index->is_valid() == (scale == Address::no_scale),
259
"inconsistent address");
260
}
261
262
// The following overloads are used in connection with the
263
// ByteSize type (see sizes.hpp). They simplify the use of
264
// ByteSize'd arguments in assembly code.
265
266
Address(Register base, ByteSize disp)
267
: Address(base, in_bytes(disp)) {}
268
269
Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
270
: Address(base, index, scale, in_bytes(disp)) {}
271
272
Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
273
: Address(base, index, scale, in_bytes(disp)) {}
274
275
Address plus_disp(int disp) const {
276
Address a = (*this);
277
a._disp += disp;
278
return a;
279
}
280
Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
281
Address a = (*this);
282
a._disp += disp.constant_or_zero() * scale_size(scale);
283
if (disp.is_register()) {
284
assert(!a.index()->is_valid(), "competing indexes");
285
a._index = disp.as_register();
286
a._scale = scale;
287
}
288
return a;
289
}
290
bool is_same_address(Address a) const {
291
// disregard _rspec
292
return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
293
}
294
295
// accessors
296
bool uses(Register reg) const { return _base == reg || _index == reg; }
297
Register base() const { return _base; }
298
Register index() const { return _index; }
299
XMMRegister xmmindex() const { return _xmmindex; }
300
ScaleFactor scale() const { return _scale; }
301
int disp() const { return _disp; }
302
bool isxmmindex() const { return _isxmmindex; }
303
304
// Convert the raw encoding form into the form expected by the constructor for
305
// Address. An index of 4 (rsp) corresponds to having no index, so convert
306
// that to noreg for the Address constructor.
307
static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
308
309
static Address make_array(ArrayAddress);
310
311
private:
312
bool base_needs_rex() const {
313
return _base->is_valid() && _base->encoding() >= 8;
314
}
315
316
bool index_needs_rex() const {
317
return _index->is_valid() &&_index->encoding() >= 8;
318
}
319
320
bool xmmindex_needs_rex() const {
321
return _xmmindex->is_valid() && _xmmindex->encoding() >= 8;
322
}
323
324
relocInfo::relocType reloc() const { return _rspec.type(); }
325
326
friend class Assembler;
327
friend class MacroAssembler;
328
friend class LIR_Assembler; // base/index/scale/disp
329
};
330
331
//
332
// AddressLiteral has been split out from Address because operands of this type
333
// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
334
// the few instructions that need to deal with address literals are unique and the
335
// MacroAssembler does not have to implement every instruction in the Assembler
336
// in order to search for address literals that may need special handling depending
337
// on the instruction and the platform. As small step on the way to merging i486/amd64
338
// directories.
339
//
340
class AddressLiteral {
341
friend class ArrayAddress;
342
RelocationHolder _rspec;
343
// Typically we use AddressLiterals we want to use their rval
344
// However in some situations we want the lval (effect address) of the item.
345
// We provide a special factory for making those lvals.
346
bool _is_lval;
347
348
// If the target is far we'll need to load the ea of this to
349
// a register to reach it. Otherwise if near we can do rip
350
// relative addressing.
351
352
address _target;
353
354
protected:
355
// creation
356
AddressLiteral()
357
: _is_lval(false),
358
_target(NULL)
359
{}
360
361
public:
362
363
364
AddressLiteral(address target, relocInfo::relocType rtype);
365
366
AddressLiteral(address target, RelocationHolder const& rspec)
367
: _rspec(rspec),
368
_is_lval(false),
369
_target(target)
370
{}
371
372
AddressLiteral addr() {
373
AddressLiteral ret = *this;
374
ret._is_lval = true;
375
return ret;
376
}
377
378
379
private:
380
381
address target() { return _target; }
382
bool is_lval() { return _is_lval; }
383
384
relocInfo::relocType reloc() const { return _rspec.type(); }
385
const RelocationHolder& rspec() const { return _rspec; }
386
387
friend class Assembler;
388
friend class MacroAssembler;
389
friend class Address;
390
friend class LIR_Assembler;
391
};
392
393
// Convience classes
394
class RuntimeAddress: public AddressLiteral {
395
396
public:
397
398
RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
399
400
};
401
402
class ExternalAddress: public AddressLiteral {
403
private:
404
static relocInfo::relocType reloc_for_target(address target) {
405
// Sometimes ExternalAddress is used for values which aren't
406
// exactly addresses, like the card table base.
407
// external_word_type can't be used for values in the first page
408
// so just skip the reloc in that case.
409
return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
410
}
411
412
public:
413
414
ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {}
415
416
};
417
418
class InternalAddress: public AddressLiteral {
419
420
public:
421
422
InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
423
424
};
425
426
// x86 can do array addressing as a single operation since disp can be an absolute
427
// address amd64 can't. We create a class that expresses the concept but does extra
428
// magic on amd64 to get the final result
429
430
class ArrayAddress {
431
private:
432
433
AddressLiteral _base;
434
Address _index;
435
436
public:
437
438
ArrayAddress() {};
439
ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
440
AddressLiteral base() { return _base; }
441
Address index() { return _index; }
442
443
};
444
445
class InstructionAttr;
446
447
// 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
448
// See fxsave and xsave(EVEX enabled) documentation for layout
449
const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize);
450
451
// The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
452
// level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
453
// is what you get. The Assembler is generating code into a CodeBuffer.
454
455
class Assembler : public AbstractAssembler {
456
friend class AbstractAssembler; // for the non-virtual hack
457
friend class LIR_Assembler; // as_Address()
458
friend class StubGenerator;
459
460
public:
461
enum Condition { // The x86 condition codes used for conditional jumps/moves.
462
zero = 0x4,
463
notZero = 0x5,
464
equal = 0x4,
465
notEqual = 0x5,
466
less = 0xc,
467
lessEqual = 0xe,
468
greater = 0xf,
469
greaterEqual = 0xd,
470
below = 0x2,
471
belowEqual = 0x6,
472
above = 0x7,
473
aboveEqual = 0x3,
474
overflow = 0x0,
475
noOverflow = 0x1,
476
carrySet = 0x2,
477
carryClear = 0x3,
478
negative = 0x8,
479
positive = 0x9,
480
parity = 0xa,
481
noParity = 0xb
482
};
483
484
enum Prefix {
485
// segment overrides
486
CS_segment = 0x2e,
487
SS_segment = 0x36,
488
DS_segment = 0x3e,
489
ES_segment = 0x26,
490
FS_segment = 0x64,
491
GS_segment = 0x65,
492
493
REX = 0x40,
494
495
REX_B = 0x41,
496
REX_X = 0x42,
497
REX_XB = 0x43,
498
REX_R = 0x44,
499
REX_RB = 0x45,
500
REX_RX = 0x46,
501
REX_RXB = 0x47,
502
503
REX_W = 0x48,
504
505
REX_WB = 0x49,
506
REX_WX = 0x4A,
507
REX_WXB = 0x4B,
508
REX_WR = 0x4C,
509
REX_WRB = 0x4D,
510
REX_WRX = 0x4E,
511
REX_WRXB = 0x4F,
512
513
VEX_3bytes = 0xC4,
514
VEX_2bytes = 0xC5,
515
EVEX_4bytes = 0x62,
516
Prefix_EMPTY = 0x0
517
};
518
519
enum VexPrefix {
520
VEX_B = 0x20,
521
VEX_X = 0x40,
522
VEX_R = 0x80,
523
VEX_W = 0x80
524
};
525
526
enum ExexPrefix {
527
EVEX_F = 0x04,
528
EVEX_V = 0x08,
529
EVEX_Rb = 0x10,
530
EVEX_X = 0x40,
531
EVEX_Z = 0x80
532
};
533
534
enum VexSimdPrefix {
535
VEX_SIMD_NONE = 0x0,
536
VEX_SIMD_66 = 0x1,
537
VEX_SIMD_F3 = 0x2,
538
VEX_SIMD_F2 = 0x3
539
};
540
541
enum VexOpcode {
542
VEX_OPCODE_NONE = 0x0,
543
VEX_OPCODE_0F = 0x1,
544
VEX_OPCODE_0F_38 = 0x2,
545
VEX_OPCODE_0F_3A = 0x3,
546
VEX_OPCODE_MASK = 0x1F
547
};
548
549
enum AvxVectorLen {
550
AVX_128bit = 0x0,
551
AVX_256bit = 0x1,
552
AVX_512bit = 0x2,
553
AVX_NoVec = 0x4
554
};
555
556
enum EvexTupleType {
557
EVEX_FV = 0,
558
EVEX_HV = 4,
559
EVEX_FVM = 6,
560
EVEX_T1S = 7,
561
EVEX_T1F = 11,
562
EVEX_T2 = 13,
563
EVEX_T4 = 15,
564
EVEX_T8 = 17,
565
EVEX_HVM = 18,
566
EVEX_QVM = 19,
567
EVEX_OVM = 20,
568
EVEX_M128 = 21,
569
EVEX_DUP = 22,
570
EVEX_ETUP = 23
571
};
572
573
enum EvexInputSizeInBits {
574
EVEX_8bit = 0,
575
EVEX_16bit = 1,
576
EVEX_32bit = 2,
577
EVEX_64bit = 3,
578
EVEX_NObit = 4
579
};
580
581
enum WhichOperand {
582
// input to locate_operand, and format code for relocations
583
imm_operand = 0, // embedded 32-bit|64-bit immediate operand
584
disp32_operand = 1, // embedded 32-bit displacement or address
585
call32_operand = 2, // embedded 32-bit self-relative displacement
586
#ifndef _LP64
587
_WhichOperand_limit = 3
588
#else
589
narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
590
_WhichOperand_limit = 4
591
#endif
592
};
593
594
// Comparison predicates for integral types & FP types when using SSE
595
enum ComparisonPredicate {
596
eq = 0,
597
lt = 1,
598
le = 2,
599
_false = 3,
600
neq = 4,
601
nlt = 5,
602
nle = 6,
603
_true = 7
604
};
605
606
// Comparison predicates for FP types when using AVX
607
// O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.
608
// S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.
609
enum ComparisonPredicateFP {
610
EQ_OQ = 0,
611
LT_OS = 1,
612
LE_OS = 2,
613
UNORD_Q = 3,
614
NEQ_UQ = 4,
615
NLT_US = 5,
616
NLE_US = 6,
617
ORD_Q = 7,
618
EQ_UQ = 8,
619
NGE_US = 9,
620
NGT_US = 0xA,
621
FALSE_OQ = 0XB,
622
NEQ_OQ = 0xC,
623
GE_OS = 0xD,
624
GT_OS = 0xE,
625
TRUE_UQ = 0xF,
626
EQ_OS = 0x10,
627
LT_OQ = 0x11,
628
LE_OQ = 0x12,
629
UNORD_S = 0x13,
630
NEQ_US = 0x14,
631
NLT_UQ = 0x15,
632
NLE_UQ = 0x16,
633
ORD_S = 0x17,
634
EQ_US = 0x18,
635
NGE_UQ = 0x19,
636
NGT_UQ = 0x1A,
637
FALSE_OS = 0x1B,
638
NEQ_OS = 0x1C,
639
GE_OQ = 0x1D,
640
GT_OQ = 0x1E,
641
TRUE_US =0x1F
642
};
643
644
enum Width {
645
B = 0,
646
W = 1,
647
D = 2,
648
Q = 3
649
};
650
651
//---< calculate length of instruction >---
652
// As instruction size can't be found out easily on x86/x64,
653
// we just use '4' for len and maxlen.
654
// instruction must start at passed address
655
static unsigned int instr_len(unsigned char *instr) { return 4; }
656
657
//---< longest instructions >---
658
// Max instruction length is not specified in architecture documentation.
659
// We could use a "safe enough" estimate (15), but just default to
660
// instruction length guess from above.
661
static unsigned int instr_maxlen() { return 4; }
662
663
// NOTE: The general philopsophy of the declarations here is that 64bit versions
664
// of instructions are freely declared without the need for wrapping them an ifdef.
665
// (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
666
// In the .cpp file the implementations are wrapped so that they are dropped out
667
// of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
668
// to the size it was prior to merging up the 32bit and 64bit assemblers.
669
//
670
// This does mean you'll get a linker/runtime error if you use a 64bit only instruction
671
// in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
672
673
private:
674
675
bool _legacy_mode_bw;
676
bool _legacy_mode_dq;
677
bool _legacy_mode_vl;
678
bool _legacy_mode_vlbw;
679
NOT_LP64(bool _is_managed;)
680
681
class InstructionAttr *_attributes;
682
683
// 64bit prefixes
684
void prefix(Register reg);
685
void prefix(Register dst, Register src, Prefix p);
686
void prefix(Register dst, Address adr, Prefix p);
687
688
void prefix(Address adr);
689
void prefix(Address adr, Register reg, bool byteinst = false);
690
void prefix(Address adr, XMMRegister reg);
691
692
int prefix_and_encode(int reg_enc, bool byteinst = false);
693
int prefix_and_encode(int dst_enc, int src_enc) {
694
return prefix_and_encode(dst_enc, false, src_enc, false);
695
}
696
int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte);
697
698
// Some prefixq variants always emit exactly one prefix byte, so besides a
699
// prefix-emitting method we provide a method to get the prefix byte to emit,
700
// which can then be folded into a byte stream.
701
int8_t get_prefixq(Address adr);
702
int8_t get_prefixq(Address adr, Register reg);
703
704
void prefixq(Address adr);
705
void prefixq(Address adr, Register reg);
706
void prefixq(Address adr, XMMRegister reg);
707
708
int prefixq_and_encode(int reg_enc);
709
int prefixq_and_encode(int dst_enc, int src_enc);
710
711
void rex_prefix(Address adr, XMMRegister xreg,
712
VexSimdPrefix pre, VexOpcode opc, bool rex_w);
713
int rex_prefix_and_encode(int dst_enc, int src_enc,
714
VexSimdPrefix pre, VexOpcode opc, bool rex_w);
715
716
void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
717
718
void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
719
int nds_enc, VexSimdPrefix pre, VexOpcode opc);
720
721
void vex_prefix(Address adr, int nds_enc, int xreg_enc,
722
VexSimdPrefix pre, VexOpcode opc,
723
InstructionAttr *attributes);
724
725
int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
726
VexSimdPrefix pre, VexOpcode opc,
727
InstructionAttr *attributes);
728
729
void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
730
VexOpcode opc, InstructionAttr *attributes);
731
732
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
733
VexOpcode opc, InstructionAttr *attributes);
734
735
// Helper functions for groups of instructions
736
void emit_arith_b(int op1, int op2, Register dst, int imm8);
737
738
void emit_arith(int op1, int op2, Register dst, int32_t imm32);
739
// Force generation of a 4 byte immediate value even if it fits into 8bit
740
void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
741
void emit_arith(int op1, int op2, Register dst, Register src);
742
743
bool emit_compressed_disp_byte(int &disp);
744
745
void emit_modrm(int mod, int dst_enc, int src_enc);
746
void emit_modrm_disp8(int mod, int dst_enc, int src_enc,
747
int disp);
748
void emit_modrm_sib(int mod, int dst_enc, int src_enc,
749
Address::ScaleFactor scale, int index_enc, int base_enc);
750
void emit_modrm_sib_disp8(int mod, int dst_enc, int src_enc,
751
Address::ScaleFactor scale, int index_enc, int base_enc,
752
int disp);
753
754
void emit_operand_helper(int reg_enc,
755
int base_enc, int index_enc, Address::ScaleFactor scale,
756
int disp,
757
RelocationHolder const& rspec,
758
int rip_relative_correction = 0);
759
760
void emit_operand(Register reg,
761
Register base, Register index, Address::ScaleFactor scale,
762
int disp,
763
RelocationHolder const& rspec,
764
int rip_relative_correction = 0);
765
766
void emit_operand(Register reg,
767
Register base, XMMRegister index, Address::ScaleFactor scale,
768
int disp,
769
RelocationHolder const& rspec);
770
771
void emit_operand(XMMRegister xreg,
772
Register base, XMMRegister xindex, Address::ScaleFactor scale,
773
int disp,
774
RelocationHolder const& rspec);
775
776
void emit_operand(Register reg, Address adr,
777
int rip_relative_correction = 0);
778
779
void emit_operand(XMMRegister reg,
780
Register base, Register index, Address::ScaleFactor scale,
781
int disp,
782
RelocationHolder const& rspec);
783
784
void emit_operand(XMMRegister reg, Address adr);
785
786
// Immediate-to-memory forms
787
void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
788
789
protected:
790
#ifdef ASSERT
791
void check_relocation(RelocationHolder const& rspec, int format);
792
#endif
793
794
void emit_data(jint data, relocInfo::relocType rtype, int format);
795
void emit_data(jint data, RelocationHolder const& rspec, int format);
796
void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
797
void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
798
799
bool reachable(AddressLiteral adr) NOT_LP64({ return true;});
800
801
// These are all easily abused and hence protected
802
803
// 32BIT ONLY SECTION
804
#ifndef _LP64
805
// Make these disappear in 64bit mode since they would never be correct
806
void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
807
void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
808
809
void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
810
void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
811
812
void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
813
#else
814
// 64BIT ONLY SECTION
815
void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY
816
817
void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
818
void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
819
820
void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
821
void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
822
#endif // _LP64
823
824
// These are unique in that we are ensured by the caller that the 32bit
825
// relative in these instructions will always be able to reach the potentially
826
// 64bit address described by entry. Since they can take a 64bit address they
827
// don't have the 32 suffix like the other instructions in this class.
828
829
void call_literal(address entry, RelocationHolder const& rspec);
830
void jmp_literal(address entry, RelocationHolder const& rspec);
831
832
// Avoid using directly section
833
// Instructions in this section are actually usable by anyone without danger
834
// of failure but have performance issues that are addressed my enhanced
835
// instructions which will do the proper thing base on the particular cpu.
836
// We protect them because we don't trust you...
837
838
// Don't use next inc() and dec() methods directly. INC & DEC instructions
839
// could cause a partial flag stall since they don't set CF flag.
840
// Use MacroAssembler::decrement() & MacroAssembler::increment() methods
841
// which call inc() & dec() or add() & sub() in accordance with
842
// the product flag UseIncDec value.
843
844
void decl(Register dst);
845
void decl(Address dst);
846
void decq(Address dst);
847
848
void incl(Register dst);
849
void incl(Address dst);
850
void incq(Register dst);
851
void incq(Address dst);
852
853
// New cpus require use of movsd and movss to avoid partial register stall
854
// when loading from memory. But for old Opteron use movlpd instead of movsd.
855
// The selection is done in MacroAssembler::movdbl() and movflt().
856
857
// Move Scalar Single-Precision Floating-Point Values
858
void movss(XMMRegister dst, Address src);
859
void movss(XMMRegister dst, XMMRegister src);
860
void movss(Address dst, XMMRegister src);
861
862
// Move Scalar Double-Precision Floating-Point Values
863
void movsd(XMMRegister dst, Address src);
864
void movsd(XMMRegister dst, XMMRegister src);
865
void movsd(Address dst, XMMRegister src);
866
void movlpd(XMMRegister dst, Address src);
867
868
// New cpus require use of movaps and movapd to avoid partial register stall
869
// when moving between registers.
870
void movaps(XMMRegister dst, XMMRegister src);
871
void movapd(XMMRegister dst, XMMRegister src);
872
873
// End avoid using directly
874
875
876
// Instruction prefixes
877
void prefix(Prefix p);
878
879
public:
880
881
// Creation
882
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
883
init_attributes();
884
}
885
886
// Decoding
887
static address locate_operand(address inst, WhichOperand which);
888
static address locate_next_instruction(address inst);
889
890
// Utilities
891
static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
892
int cur_tuple_type, int in_size_in_bits, int cur_encoding);
893
894
// Generic instructions
895
// Does 32bit or 64bit as needed for the platform. In some sense these
896
// belong in macro assembler but there is no need for both varieties to exist
897
898
void init_attributes(void);
899
900
void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
901
void clear_attributes(void) { _attributes = NULL; }
902
903
void set_managed(void) { NOT_LP64(_is_managed = true;) }
904
void clear_managed(void) { NOT_LP64(_is_managed = false;) }
905
bool is_managed(void) {
906
NOT_LP64(return _is_managed;)
907
LP64_ONLY(return false;) }
908
909
void lea(Register dst, Address src);
910
911
void mov(Register dst, Register src);
912
913
#ifdef _LP64
914
// support caching the result of some routines
915
916
// must be called before pusha(), popa(), vzeroupper() - checked with asserts
917
static void precompute_instructions();
918
919
void pusha_uncached();
920
void popa_uncached();
921
#endif
922
void vzeroupper_uncached();
923
void decq(Register dst);
924
925
void pusha();
926
void popa();
927
928
void pushf();
929
void popf();
930
931
void push(int32_t imm32);
932
933
void push(Register src);
934
935
void pop(Register dst);
936
937
// These are dummies to prevent surprise implicit conversions to Register
938
void push(void* v);
939
void pop(void* v);
940
941
// These do register sized moves/scans
942
void rep_mov();
943
void rep_stos();
944
void rep_stosb();
945
void repne_scan();
946
#ifdef _LP64
947
void repne_scanl();
948
#endif
949
950
// Vanilla instructions in lexical order
951
952
void adcl(Address dst, int32_t imm32);
953
void adcl(Address dst, Register src);
954
void adcl(Register dst, int32_t imm32);
955
void adcl(Register dst, Address src);
956
void adcl(Register dst, Register src);
957
958
void adcq(Register dst, int32_t imm32);
959
void adcq(Register dst, Address src);
960
void adcq(Register dst, Register src);
961
962
void addb(Address dst, int imm8);
963
void addw(Register dst, Register src);
964
void addw(Address dst, int imm16);
965
966
void addl(Address dst, int32_t imm32);
967
void addl(Address dst, Register src);
968
void addl(Register dst, int32_t imm32);
969
void addl(Register dst, Address src);
970
void addl(Register dst, Register src);
971
972
void addq(Address dst, int32_t imm32);
973
void addq(Address dst, Register src);
974
void addq(Register dst, int32_t imm32);
975
void addq(Register dst, Address src);
976
void addq(Register dst, Register src);
977
978
#ifdef _LP64
979
//Add Unsigned Integers with Carry Flag
980
void adcxq(Register dst, Register src);
981
982
//Add Unsigned Integers with Overflow Flag
983
void adoxq(Register dst, Register src);
984
#endif
985
986
void addr_nop_4();
987
void addr_nop_5();
988
void addr_nop_7();
989
void addr_nop_8();
990
991
// Add Scalar Double-Precision Floating-Point Values
992
void addsd(XMMRegister dst, Address src);
993
void addsd(XMMRegister dst, XMMRegister src);
994
995
// Add Scalar Single-Precision Floating-Point Values
996
void addss(XMMRegister dst, Address src);
997
void addss(XMMRegister dst, XMMRegister src);
998
999
// AES instructions
1000
void aesdec(XMMRegister dst, Address src);
1001
void aesdec(XMMRegister dst, XMMRegister src);
1002
void aesdeclast(XMMRegister dst, Address src);
1003
void aesdeclast(XMMRegister dst, XMMRegister src);
1004
void aesenc(XMMRegister dst, Address src);
1005
void aesenc(XMMRegister dst, XMMRegister src);
1006
void aesenclast(XMMRegister dst, Address src);
1007
void aesenclast(XMMRegister dst, XMMRegister src);
1008
// Vector AES instructions
1009
void vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1010
void vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1011
void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1012
void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1013
1014
void andw(Register dst, Register src);
1015
void andb(Address dst, Register src);
1016
1017
void andl(Address dst, int32_t imm32);
1018
void andl(Register dst, int32_t imm32);
1019
void andl(Register dst, Address src);
1020
void andl(Register dst, Register src);
1021
void andl(Address dst, Register src);
1022
1023
void andq(Address dst, int32_t imm32);
1024
void andq(Register dst, int32_t imm32);
1025
void andq(Register dst, Address src);
1026
void andq(Register dst, Register src);
1027
void andq(Address dst, Register src);
1028
1029
// BMI instructions
1030
void andnl(Register dst, Register src1, Register src2);
1031
void andnl(Register dst, Register src1, Address src2);
1032
void andnq(Register dst, Register src1, Register src2);
1033
void andnq(Register dst, Register src1, Address src2);
1034
1035
void blsil(Register dst, Register src);
1036
void blsil(Register dst, Address src);
1037
void blsiq(Register dst, Register src);
1038
void blsiq(Register dst, Address src);
1039
1040
void blsmskl(Register dst, Register src);
1041
void blsmskl(Register dst, Address src);
1042
void blsmskq(Register dst, Register src);
1043
void blsmskq(Register dst, Address src);
1044
1045
void blsrl(Register dst, Register src);
1046
void blsrl(Register dst, Address src);
1047
void blsrq(Register dst, Register src);
1048
void blsrq(Register dst, Address src);
1049
1050
void bsfl(Register dst, Register src);
1051
void bsrl(Register dst, Register src);
1052
1053
#ifdef _LP64
1054
void bsfq(Register dst, Register src);
1055
void bsrq(Register dst, Register src);
1056
#endif
1057
1058
void bswapl(Register reg);
1059
1060
void bswapq(Register reg);
1061
1062
void call(Label& L, relocInfo::relocType rtype);
1063
void call(Register reg); // push pc; pc <- reg
1064
void call(Address adr); // push pc; pc <- adr
1065
1066
void cdql();
1067
1068
void cdqq();
1069
1070
void cld();
1071
1072
void clflush(Address adr);
1073
void clflushopt(Address adr);
1074
void clwb(Address adr);
1075
1076
void cmovl(Condition cc, Register dst, Register src);
1077
void cmovl(Condition cc, Register dst, Address src);
1078
1079
void cmovq(Condition cc, Register dst, Register src);
1080
void cmovq(Condition cc, Register dst, Address src);
1081
1082
1083
void cmpb(Address dst, int imm8);
1084
1085
void cmpl(Address dst, int32_t imm32);
1086
1087
void cmp(Register dst, int32_t imm32);
1088
void cmpl(Register dst, int32_t imm32);
1089
void cmpl(Register dst, Register src);
1090
void cmpl(Register dst, Address src);
1091
1092
void cmpq(Address dst, int32_t imm32);
1093
void cmpq(Address dst, Register src);
1094
1095
void cmpq(Register dst, int32_t imm32);
1096
void cmpq(Register dst, Register src);
1097
void cmpq(Register dst, Address src);
1098
1099
// these are dummies used to catch attempting to convert NULL to Register
1100
void cmpl(Register dst, void* junk); // dummy
1101
void cmpq(Register dst, void* junk); // dummy
1102
1103
void cmpw(Address dst, int imm16);
1104
1105
void cmpxchg8 (Address adr);
1106
1107
void cmpxchgb(Register reg, Address adr);
1108
void cmpxchgl(Register reg, Address adr);
1109
1110
void cmpxchgq(Register reg, Address adr);
1111
void cmpxchgw(Register reg, Address adr);
1112
1113
// Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
1114
void comisd(XMMRegister dst, Address src);
1115
void comisd(XMMRegister dst, XMMRegister src);
1116
1117
// Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
1118
void comiss(XMMRegister dst, Address src);
1119
void comiss(XMMRegister dst, XMMRegister src);
1120
1121
// Identify processor type and features
1122
void cpuid();
1123
1124
// CRC32C
1125
void crc32(Register crc, Register v, int8_t sizeInBytes);
1126
void crc32(Register crc, Address adr, int8_t sizeInBytes);
1127
1128
// Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
1129
void cvtsd2ss(XMMRegister dst, XMMRegister src);
1130
void cvtsd2ss(XMMRegister dst, Address src);
1131
1132
// Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
1133
void cvtsi2sdl(XMMRegister dst, Register src);
1134
void cvtsi2sdl(XMMRegister dst, Address src);
1135
void cvtsi2sdq(XMMRegister dst, Register src);
1136
void cvtsi2sdq(XMMRegister dst, Address src);
1137
1138
// Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
1139
void cvtsi2ssl(XMMRegister dst, Register src);
1140
void cvtsi2ssl(XMMRegister dst, Address src);
1141
void cvtsi2ssq(XMMRegister dst, Register src);
1142
void cvtsi2ssq(XMMRegister dst, Address src);
1143
1144
// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
1145
void cvtdq2pd(XMMRegister dst, XMMRegister src);
1146
void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
1147
1148
// Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
1149
void cvtdq2ps(XMMRegister dst, XMMRegister src);
1150
void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);
1151
1152
// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
1153
void cvtss2sd(XMMRegister dst, XMMRegister src);
1154
void cvtss2sd(XMMRegister dst, Address src);
1155
1156
// Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
1157
void cvttsd2sil(Register dst, Address src);
1158
void cvttsd2sil(Register dst, XMMRegister src);
1159
void cvttsd2siq(Register dst, Address src);
1160
void cvttsd2siq(Register dst, XMMRegister src);
1161
1162
// Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
1163
void cvttss2sil(Register dst, XMMRegister src);
1164
void cvttss2siq(Register dst, XMMRegister src);
1165
1166
// Convert vector double to int
1167
void cvttpd2dq(XMMRegister dst, XMMRegister src);
1168
1169
// Convert vector float and double
1170
void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
1171
void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
1172
1173
// Convert vector long to vector FP
1174
void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
1175
void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
1176
1177
// Evex casts with truncation
1178
void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
1179
void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
1180
void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);
1181
void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);
1182
void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
1183
void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
1184
1185
//Abs of packed Integer values
1186
void pabsb(XMMRegister dst, XMMRegister src);
1187
void pabsw(XMMRegister dst, XMMRegister src);
1188
void pabsd(XMMRegister dst, XMMRegister src);
1189
void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
1190
void vpabsw(XMMRegister dst, XMMRegister src, int vector_len);
1191
void vpabsd(XMMRegister dst, XMMRegister src, int vector_len);
1192
void evpabsq(XMMRegister dst, XMMRegister src, int vector_len);
1193
1194
// Divide Scalar Double-Precision Floating-Point Values
1195
void divsd(XMMRegister dst, Address src);
1196
void divsd(XMMRegister dst, XMMRegister src);
1197
1198
// Divide Scalar Single-Precision Floating-Point Values
1199
void divss(XMMRegister dst, Address src);
1200
void divss(XMMRegister dst, XMMRegister src);
1201
1202
1203
#ifndef _LP64
1204
private:
1205
1206
void emit_farith(int b1, int b2, int i);
1207
1208
public:
1209
void emms();
1210
1211
void fabs();
1212
1213
void fadd(int i);
1214
1215
void fadd_d(Address src);
1216
void fadd_s(Address src);
1217
1218
// "Alternate" versions of x87 instructions place result down in FPU
1219
// stack instead of on TOS
1220
1221
void fadda(int i); // "alternate" fadd
1222
void faddp(int i = 1);
1223
1224
void fchs();
1225
1226
void fcom(int i);
1227
1228
void fcomp(int i = 1);
1229
void fcomp_d(Address src);
1230
void fcomp_s(Address src);
1231
1232
void fcompp();
1233
1234
void fcos();
1235
1236
void fdecstp();
1237
1238
void fdiv(int i);
1239
void fdiv_d(Address src);
1240
void fdivr_s(Address src);
1241
void fdiva(int i); // "alternate" fdiv
1242
void fdivp(int i = 1);
1243
1244
void fdivr(int i);
1245
void fdivr_d(Address src);
1246
void fdiv_s(Address src);
1247
1248
void fdivra(int i); // "alternate" reversed fdiv
1249
1250
void fdivrp(int i = 1);
1251
1252
void ffree(int i = 0);
1253
1254
void fild_d(Address adr);
1255
void fild_s(Address adr);
1256
1257
void fincstp();
1258
1259
void finit();
1260
1261
void fist_s (Address adr);
1262
void fistp_d(Address adr);
1263
void fistp_s(Address adr);
1264
1265
void fld1();
1266
1267
void fld_d(Address adr);
1268
void fld_s(Address adr);
1269
void fld_s(int index);
1270
1271
void fldcw(Address src);
1272
1273
void fldenv(Address src);
1274
1275
void fldlg2();
1276
1277
void fldln2();
1278
1279
void fldz();
1280
1281
void flog();
1282
void flog10();
1283
1284
void fmul(int i);
1285
1286
void fmul_d(Address src);
1287
void fmul_s(Address src);
1288
1289
void fmula(int i); // "alternate" fmul
1290
1291
void fmulp(int i = 1);
1292
1293
void fnsave(Address dst);
1294
1295
void fnstcw(Address src);
1296
1297
void fnstsw_ax();
1298
1299
void fprem();
1300
void fprem1();
1301
1302
void frstor(Address src);
1303
1304
void fsin();
1305
1306
void fsqrt();
1307
1308
void fst_d(Address adr);
1309
void fst_s(Address adr);
1310
1311
void fstp_d(Address adr);
1312
void fstp_d(int index);
1313
void fstp_s(Address adr);
1314
1315
void fsub(int i);
1316
void fsub_d(Address src);
1317
void fsub_s(Address src);
1318
1319
void fsuba(int i); // "alternate" fsub
1320
1321
void fsubp(int i = 1);
1322
1323
void fsubr(int i);
1324
void fsubr_d(Address src);
1325
void fsubr_s(Address src);
1326
1327
void fsubra(int i); // "alternate" reversed fsub
1328
1329
void fsubrp(int i = 1);
1330
1331
void ftan();
1332
1333
void ftst();
1334
1335
void fucomi(int i = 1);
1336
void fucomip(int i = 1);
1337
1338
void fwait();
1339
1340
void fxch(int i = 1);
1341
1342
void fyl2x();
1343
void frndint();
1344
void f2xm1();
1345
void fldl2e();
1346
#endif // !_LP64
1347
1348
// operands that only take the original 32bit registers
1349
void emit_operand32(Register reg, Address adr);
1350
1351
void fld_x(Address adr); // extended-precision (80-bit) format
1352
void fstp_x(Address adr); // extended-precision (80-bit) format
1353
void fxrstor(Address src);
1354
void xrstor(Address src);
1355
1356
void fxsave(Address dst);
1357
void xsave(Address dst);
1358
1359
void hlt();
1360
1361
void idivl(Register src);
1362
void divl(Register src); // Unsigned division
1363
1364
#ifdef _LP64
1365
void idivq(Register src);
1366
#endif
1367
1368
void imull(Register src);
1369
void imull(Register dst, Register src);
1370
void imull(Register dst, Register src, int value);
1371
void imull(Register dst, Address src, int value);
1372
void imull(Register dst, Address src);
1373
1374
#ifdef _LP64
1375
void imulq(Register dst, Register src);
1376
void imulq(Register dst, Register src, int value);
1377
void imulq(Register dst, Address src, int value);
1378
void imulq(Register dst, Address src);
1379
void imulq(Register dst);
1380
#endif
1381
1382
// jcc is the generic conditional branch generator to run-
1383
// time routines, jcc is used for branches to labels. jcc
1384
// takes a branch opcode (cc) and a label (L) and generates
1385
// either a backward branch or a forward branch and links it
1386
// to the label fixup chain. Usage:
1387
//
1388
// Label L; // unbound label
1389
// jcc(cc, L); // forward branch to unbound label
1390
// bind(L); // bind label to the current pc
1391
// jcc(cc, L); // backward branch to bound label
1392
// bind(L); // illegal: a label may be bound only once
1393
//
1394
// Note: The same Label can be used for forward and backward branches
1395
// but it may be bound only once.
1396
1397
void jcc(Condition cc, Label& L, bool maybe_short = true);
1398
1399
// Conditional jump to a 8-bit offset to L.
1400
// WARNING: be very careful using this for forward jumps. If the label is
1401
// not bound within an 8-bit offset of this instruction, a run-time error
1402
// will occur.
1403
1404
// Use macro to record file and line number.
1405
#define jccb(cc, L) jccb_0(cc, L, __FILE__, __LINE__)
1406
1407
void jccb_0(Condition cc, Label& L, const char* file, int line);
1408
1409
void jmp(Address entry); // pc <- entry
1410
1411
// Label operations & relative jumps (PPUM Appendix D)
1412
void jmp(Label& L, bool maybe_short = true); // unconditional jump to L
1413
1414
void jmp(Register entry); // pc <- entry
1415
1416
// Unconditional 8-bit offset jump to L.
1417
// WARNING: be very careful using this for forward jumps. If the label is
1418
// not bound within an 8-bit offset of this instruction, a run-time error
1419
// will occur.
1420
1421
// Use macro to record file and line number.
1422
#define jmpb(L) jmpb_0(L, __FILE__, __LINE__)
1423
1424
void jmpb_0(Label& L, const char* file, int line);
1425
1426
void ldmxcsr( Address src );
1427
1428
void leal(Register dst, Address src);
1429
1430
void leaq(Register dst, Address src);
1431
1432
void lfence();
1433
1434
void lock();
1435
void size_prefix();
1436
1437
void lzcntl(Register dst, Register src);
1438
1439
#ifdef _LP64
1440
void lzcntq(Register dst, Register src);
1441
#endif
1442
1443
enum Membar_mask_bits {
1444
StoreStore = 1 << 3,
1445
LoadStore = 1 << 2,
1446
StoreLoad = 1 << 1,
1447
LoadLoad = 1 << 0
1448
};
1449
1450
// Serializes memory and blows flags
1451
void membar(Membar_mask_bits order_constraint);
1452
1453
void mfence();
1454
void sfence();
1455
1456
// Moves
1457
1458
void mov64(Register dst, int64_t imm64);
1459
void mov64(Register dst, int64_t imm64, relocInfo::relocType rtype, int format);
1460
1461
void movb(Address dst, Register src);
1462
void movb(Address dst, int imm8);
1463
void movb(Register dst, Address src);
1464
1465
void movddup(XMMRegister dst, XMMRegister src);
1466
1467
void kmovbl(KRegister dst, Register src);
1468
void kmovbl(Register dst, KRegister src);
1469
void kmovwl(KRegister dst, Register src);
1470
void kmovwl(KRegister dst, Address src);
1471
void kmovwl(Register dst, KRegister src);
1472
void kmovwl(Address dst, KRegister src);
1473
void kmovwl(KRegister dst, KRegister src);
1474
void kmovdl(KRegister dst, Register src);
1475
void kmovdl(Register dst, KRegister src);
1476
void kmovql(KRegister dst, KRegister src);
1477
void kmovql(Address dst, KRegister src);
1478
void kmovql(KRegister dst, Address src);
1479
void kmovql(KRegister dst, Register src);
1480
void kmovql(Register dst, KRegister src);
1481
1482
void knotwl(KRegister dst, KRegister src);
1483
void knotql(KRegister dst, KRegister src);
1484
1485
void kortestbl(KRegister dst, KRegister src);
1486
void kortestwl(KRegister dst, KRegister src);
1487
void kortestdl(KRegister dst, KRegister src);
1488
void kortestql(KRegister dst, KRegister src);
1489
1490
void ktestq(KRegister src1, KRegister src2);
1491
void ktestd(KRegister src1, KRegister src2);
1492
1493
void ktestql(KRegister dst, KRegister src);
1494
1495
void movdl(XMMRegister dst, Register src);
1496
void movdl(Register dst, XMMRegister src);
1497
void movdl(XMMRegister dst, Address src);
1498
void movdl(Address dst, XMMRegister src);
1499
1500
// Move Double Quadword
1501
void movdq(XMMRegister dst, Register src);
1502
void movdq(Register dst, XMMRegister src);
1503
1504
// Move Aligned Double Quadword
1505
void movdqa(XMMRegister dst, XMMRegister src);
1506
void movdqa(XMMRegister dst, Address src);
1507
1508
// Move Unaligned Double Quadword
1509
void movdqu(Address dst, XMMRegister src);
1510
void movdqu(XMMRegister dst, Address src);
1511
void movdqu(XMMRegister dst, XMMRegister src);
1512
1513
// Move Unaligned 256bit Vector
1514
void vmovdqu(Address dst, XMMRegister src);
1515
void vmovdqu(XMMRegister dst, Address src);
1516
void vmovdqu(XMMRegister dst, XMMRegister src);
1517
1518
// Move Unaligned 512bit Vector
1519
void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len);
1520
void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len);
1521
void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len);
1522
void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1523
void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1524
void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);
1525
void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1526
void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len);
1527
void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1528
void evmovdqul(Address dst, XMMRegister src, int vector_len);
1529
void evmovdqul(XMMRegister dst, Address src, int vector_len);
1530
void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
1531
void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1532
void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1533
void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1534
void evmovdquq(Address dst, XMMRegister src, int vector_len);
1535
void evmovdquq(XMMRegister dst, Address src, int vector_len);
1536
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
1537
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1538
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1539
void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1540
1541
// Move lower 64bit to high 64bit in 128bit register
1542
void movlhps(XMMRegister dst, XMMRegister src);
1543
1544
void movl(Register dst, int32_t imm32);
1545
void movl(Address dst, int32_t imm32);
1546
void movl(Register dst, Register src);
1547
void movl(Register dst, Address src);
1548
void movl(Address dst, Register src);
1549
1550
// These dummies prevent using movl from converting a zero (like NULL) into Register
1551
// by giving the compiler two choices it can't resolve
1552
1553
void movl(Address dst, void* junk);
1554
void movl(Register dst, void* junk);
1555
1556
#ifdef _LP64
1557
void movq(Register dst, Register src);
1558
void movq(Register dst, Address src);
1559
void movq(Address dst, Register src);
1560
void movq(Address dst, int32_t imm32);
1561
void movq(Register dst, int32_t imm32);
1562
1563
// These dummies prevent using movq from converting a zero (like NULL) into Register
1564
// by giving the compiler two choices it can't resolve
1565
1566
void movq(Address dst, void* dummy);
1567
void movq(Register dst, void* dummy);
1568
#endif
1569
1570
// Move Quadword
1571
void movq(Address dst, XMMRegister src);
1572
void movq(XMMRegister dst, Address src);
1573
void movq(XMMRegister dst, XMMRegister src);
1574
void movq(Register dst, XMMRegister src);
1575
void movq(XMMRegister dst, Register src);
1576
1577
void movsbl(Register dst, Address src);
1578
void movsbl(Register dst, Register src);
1579
1580
#ifdef _LP64
1581
void movsbq(Register dst, Address src);
1582
void movsbq(Register dst, Register src);
1583
1584
// Move signed 32bit immediate to 64bit extending sign
1585
void movslq(Address dst, int32_t imm64);
1586
void movslq(Register dst, int32_t imm64);
1587
1588
void movslq(Register dst, Address src);
1589
void movslq(Register dst, Register src);
1590
void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous
1591
#endif
1592
1593
void movswl(Register dst, Address src);
1594
void movswl(Register dst, Register src);
1595
1596
#ifdef _LP64
1597
void movswq(Register dst, Address src);
1598
void movswq(Register dst, Register src);
1599
#endif
1600
1601
void movw(Address dst, int imm16);
1602
void movw(Register dst, Address src);
1603
void movw(Address dst, Register src);
1604
1605
void movzbl(Register dst, Address src);
1606
void movzbl(Register dst, Register src);
1607
1608
#ifdef _LP64
1609
void movzbq(Register dst, Address src);
1610
void movzbq(Register dst, Register src);
1611
#endif
1612
1613
void movzwl(Register dst, Address src);
1614
void movzwl(Register dst, Register src);
1615
1616
#ifdef _LP64
1617
void movzwq(Register dst, Address src);
1618
void movzwq(Register dst, Register src);
1619
#endif
1620
1621
// Unsigned multiply with RAX destination register
1622
void mull(Address src);
1623
void mull(Register src);
1624
1625
#ifdef _LP64
1626
void mulq(Address src);
1627
void mulq(Register src);
1628
void mulxq(Register dst1, Register dst2, Register src);
1629
#endif
1630
1631
// Multiply Scalar Double-Precision Floating-Point Values
1632
void mulsd(XMMRegister dst, Address src);
1633
void mulsd(XMMRegister dst, XMMRegister src);
1634
1635
// Multiply Scalar Single-Precision Floating-Point Values
1636
void mulss(XMMRegister dst, Address src);
1637
void mulss(XMMRegister dst, XMMRegister src);
1638
1639
void negl(Register dst);
1640
void negl(Address dst);
1641
1642
#ifdef _LP64
1643
void negq(Register dst);
1644
void negq(Address dst);
1645
#endif
1646
1647
void nop(int i = 1);
1648
1649
void notl(Register dst);
1650
1651
#ifdef _LP64
1652
void notq(Register dst);
1653
1654
void btsq(Address dst, int imm8);
1655
void btrq(Address dst, int imm8);
1656
#endif
1657
1658
void orw(Register dst, Register src);
1659
1660
void orl(Address dst, int32_t imm32);
1661
void orl(Register dst, int32_t imm32);
1662
void orl(Register dst, Address src);
1663
void orl(Register dst, Register src);
1664
void orl(Address dst, Register src);
1665
1666
void orb(Address dst, int imm8);
1667
void orb(Address dst, Register src);
1668
1669
void orq(Address dst, int32_t imm32);
1670
void orq(Address dst, Register src);
1671
void orq(Register dst, int32_t imm32);
1672
void orq(Register dst, Address src);
1673
void orq(Register dst, Register src);
1674
1675
// Pack with signed saturation
1676
void packsswb(XMMRegister dst, XMMRegister src);
1677
void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1678
void packssdw(XMMRegister dst, XMMRegister src);
1679
void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1680
1681
// Pack with unsigned saturation
1682
void packuswb(XMMRegister dst, XMMRegister src);
1683
void packuswb(XMMRegister dst, Address src);
1684
void packusdw(XMMRegister dst, XMMRegister src);
1685
void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1686
void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1687
1688
// Permutations
1689
void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1690
void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1691
void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1692
void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1693
void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1694
void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1695
void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1696
void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1697
void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1698
void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1699
void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1700
void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1701
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1702
1703
void pause();
1704
1705
// Undefined Instruction
1706
void ud2();
1707
1708
// SSE4.2 string instructions
1709
void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1710
void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1711
1712
void pcmpeqb(XMMRegister dst, XMMRegister src);
1713
void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
1714
1715
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1716
void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1717
void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1718
void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1719
1720
void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1721
void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1722
void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1723
1724
void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1725
void evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len);
1726
1727
void pcmpeqw(XMMRegister dst, XMMRegister src);
1728
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1729
void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1730
void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1731
1732
void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1733
1734
void pcmpeqd(XMMRegister dst, XMMRegister src);
1735
void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1736
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
1737
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1738
1739
void pcmpeqq(XMMRegister dst, XMMRegister src);
1740
void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
1741
void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1742
void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1743
void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1744
1745
void pcmpgtq(XMMRegister dst, XMMRegister src);
1746
void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1747
1748
void pmovmskb(Register dst, XMMRegister src);
1749
void vpmovmskb(Register dst, XMMRegister src, int vec_enc);
1750
1751
// SSE 4.1 extract
1752
void pextrd(Register dst, XMMRegister src, int imm8);
1753
void pextrq(Register dst, XMMRegister src, int imm8);
1754
void pextrd(Address dst, XMMRegister src, int imm8);
1755
void pextrq(Address dst, XMMRegister src, int imm8);
1756
void pextrb(Register dst, XMMRegister src, int imm8);
1757
void pextrb(Address dst, XMMRegister src, int imm8);
1758
// SSE 2 extract
1759
void pextrw(Register dst, XMMRegister src, int imm8);
1760
void pextrw(Address dst, XMMRegister src, int imm8);
1761
1762
// SSE 4.1 insert
1763
void pinsrd(XMMRegister dst, Register src, int imm8);
1764
void pinsrq(XMMRegister dst, Register src, int imm8);
1765
void pinsrb(XMMRegister dst, Register src, int imm8);
1766
void pinsrd(XMMRegister dst, Address src, int imm8);
1767
void pinsrq(XMMRegister dst, Address src, int imm8);
1768
void pinsrb(XMMRegister dst, Address src, int imm8);
1769
void insertps(XMMRegister dst, XMMRegister src, int imm8);
1770
// SSE 2 insert
1771
void pinsrw(XMMRegister dst, Register src, int imm8);
1772
void pinsrw(XMMRegister dst, Address src, int imm8);
1773
1774
// AVX insert
1775
void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1776
void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1777
void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1778
void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1779
void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1780
1781
// Zero extend moves
1782
void pmovzxbw(XMMRegister dst, XMMRegister src);
1783
void pmovzxbw(XMMRegister dst, Address src);
1784
void pmovzxbd(XMMRegister dst, XMMRegister src);
1785
void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1786
void pmovzxdq(XMMRegister dst, XMMRegister src);
1787
void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
1788
void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);
1789
void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);
1790
void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);
1791
void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1792
1793
// Sign extend moves
1794
void pmovsxbd(XMMRegister dst, XMMRegister src);
1795
void pmovsxbq(XMMRegister dst, XMMRegister src);
1796
void pmovsxbw(XMMRegister dst, XMMRegister src);
1797
void pmovsxwd(XMMRegister dst, XMMRegister src);
1798
void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
1799
void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
1800
void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
1801
void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);
1802
void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);
1803
void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);
1804
1805
void evpmovwb(Address dst, XMMRegister src, int vector_len);
1806
void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1807
1808
void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
1809
1810
void evpmovdb(Address dst, XMMRegister src, int vector_len);
1811
1812
// Multiply add
1813
void pmaddwd(XMMRegister dst, XMMRegister src);
1814
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1815
// Multiply add accumulate
1816
void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1817
1818
#ifndef _LP64 // no 32bit push/pop on amd64
1819
void popl(Address dst);
1820
#endif
1821
1822
#ifdef _LP64
1823
void popq(Address dst);
1824
void popq(Register dst);
1825
#endif
1826
1827
void popcntl(Register dst, Address src);
1828
void popcntl(Register dst, Register src);
1829
1830
void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1831
1832
#ifdef _LP64
1833
void popcntq(Register dst, Address src);
1834
void popcntq(Register dst, Register src);
1835
#endif
1836
1837
// Prefetches (SSE, SSE2, 3DNOW only)
1838
1839
void prefetchnta(Address src);
1840
void prefetchr(Address src);
1841
void prefetcht0(Address src);
1842
void prefetcht1(Address src);
1843
void prefetcht2(Address src);
1844
void prefetchw(Address src);
1845
1846
// Shuffle Bytes
1847
void pshufb(XMMRegister dst, XMMRegister src);
1848
void pshufb(XMMRegister dst, Address src);
1849
void vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1850
1851
// Shuffle Packed Doublewords
1852
void pshufd(XMMRegister dst, XMMRegister src, int mode);
1853
void pshufd(XMMRegister dst, Address src, int mode);
1854
void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
1855
1856
// Shuffle Packed High/Low Words
1857
void pshufhw(XMMRegister dst, XMMRegister src, int mode);
1858
void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1859
void pshuflw(XMMRegister dst, Address src, int mode);
1860
1861
//shuffle floats and doubles
1862
void pshufps(XMMRegister, XMMRegister, int);
1863
void pshufpd(XMMRegister, XMMRegister, int);
1864
void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
1865
void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
1866
1867
// Shuffle packed values at 128 bit granularity
1868
void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
1869
1870
// Shift Right by bytes Logical DoubleQuadword Immediate
1871
void psrldq(XMMRegister dst, int shift);
1872
// Shift Left by bytes Logical DoubleQuadword Immediate
1873
void pslldq(XMMRegister dst, int shift);
1874
1875
// Logical Compare 128bit
1876
void ptest(XMMRegister dst, XMMRegister src);
1877
void ptest(XMMRegister dst, Address src);
1878
// Logical Compare 256bit
1879
void vptest(XMMRegister dst, XMMRegister src);
1880
void vptest(XMMRegister dst, Address src);
1881
1882
// Vector compare
1883
void vptest(XMMRegister dst, XMMRegister src, int vector_len);
1884
1885
// Interleave Low Bytes
1886
void punpcklbw(XMMRegister dst, XMMRegister src);
1887
void punpcklbw(XMMRegister dst, Address src);
1888
1889
// Interleave Low Doublewords
1890
void punpckldq(XMMRegister dst, XMMRegister src);
1891
void punpckldq(XMMRegister dst, Address src);
1892
1893
// Interleave Low Quadwords
1894
void punpcklqdq(XMMRegister dst, XMMRegister src);
1895
1896
#ifndef _LP64 // no 32bit push/pop on amd64
1897
void pushl(Address src);
1898
#endif
1899
1900
void pushq(Address src);
1901
1902
void rcll(Register dst, int imm8);
1903
1904
void rclq(Register dst, int imm8);
1905
1906
void rcrq(Register dst, int imm8);
1907
1908
void rcpps(XMMRegister dst, XMMRegister src);
1909
1910
void rcpss(XMMRegister dst, XMMRegister src);
1911
1912
void rdtsc();
1913
1914
void ret(int imm16);
1915
1916
void roll(Register dst);
1917
1918
void roll(Register dst, int imm8);
1919
1920
void rorl(Register dst);
1921
1922
void rorl(Register dst, int imm8);
1923
1924
#ifdef _LP64
1925
void rolq(Register dst);
1926
void rolq(Register dst, int imm8);
1927
void rorq(Register dst);
1928
void rorq(Register dst, int imm8);
1929
void rorxq(Register dst, Register src, int imm8);
1930
void rorxd(Register dst, Register src, int imm8);
1931
#endif
1932
1933
void sahf();
1934
1935
void sall(Register dst, int imm8);
1936
void sall(Register dst);
1937
void sall(Address dst, int imm8);
1938
void sall(Address dst);
1939
1940
void sarl(Address dst, int imm8);
1941
void sarl(Address dst);
1942
void sarl(Register dst, int imm8);
1943
void sarl(Register dst);
1944
1945
#ifdef _LP64
1946
void salq(Register dst, int imm8);
1947
void salq(Register dst);
1948
void salq(Address dst, int imm8);
1949
void salq(Address dst);
1950
1951
void sarq(Address dst, int imm8);
1952
void sarq(Address dst);
1953
void sarq(Register dst, int imm8);
1954
void sarq(Register dst);
1955
#endif
1956
1957
void sbbl(Address dst, int32_t imm32);
1958
void sbbl(Register dst, int32_t imm32);
1959
void sbbl(Register dst, Address src);
1960
void sbbl(Register dst, Register src);
1961
1962
void sbbq(Address dst, int32_t imm32);
1963
void sbbq(Register dst, int32_t imm32);
1964
void sbbq(Register dst, Address src);
1965
void sbbq(Register dst, Register src);
1966
1967
void setb(Condition cc, Register dst);
1968
1969
void sete(Register dst);
1970
void setl(Register dst);
1971
void setne(Register dst);
1972
1973
void palignr(XMMRegister dst, XMMRegister src, int imm8);
1974
void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
1975
void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
1976
1977
void pblendw(XMMRegister dst, XMMRegister src, int imm8);
1978
void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
1979
1980
void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
1981
void sha1nexte(XMMRegister dst, XMMRegister src);
1982
void sha1msg1(XMMRegister dst, XMMRegister src);
1983
void sha1msg2(XMMRegister dst, XMMRegister src);
1984
// xmm0 is implicit additional source to the following instruction.
1985
void sha256rnds2(XMMRegister dst, XMMRegister src);
1986
void sha256msg1(XMMRegister dst, XMMRegister src);
1987
void sha256msg2(XMMRegister dst, XMMRegister src);
1988
1989
void shldl(Register dst, Register src);
1990
void shldl(Register dst, Register src, int8_t imm8);
1991
void shrdl(Register dst, Register src);
1992
void shrdl(Register dst, Register src, int8_t imm8);
1993
1994
void shll(Register dst, int imm8);
1995
void shll(Register dst);
1996
1997
void shlq(Register dst, int imm8);
1998
void shlq(Register dst);
1999
2000
void shrl(Register dst, int imm8);
2001
void shrl(Register dst);
2002
void shrl(Address dst);
2003
void shrl(Address dst, int imm8);
2004
2005
void shrq(Register dst, int imm8);
2006
void shrq(Register dst);
2007
void shrq(Address dst);
2008
void shrq(Address dst, int imm8);
2009
2010
void smovl(); // QQQ generic?
2011
2012
// Compute Square Root of Scalar Double-Precision Floating-Point Value
2013
void sqrtsd(XMMRegister dst, Address src);
2014
void sqrtsd(XMMRegister dst, XMMRegister src);
2015
2016
void roundsd(XMMRegister dst, Address src, int32_t rmode);
2017
void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode);
2018
2019
// Compute Square Root of Scalar Single-Precision Floating-Point Value
2020
void sqrtss(XMMRegister dst, Address src);
2021
void sqrtss(XMMRegister dst, XMMRegister src);
2022
2023
void std();
2024
2025
void stmxcsr( Address dst );
2026
2027
void subl(Address dst, int32_t imm32);
2028
void subl(Address dst, Register src);
2029
void subl(Register dst, int32_t imm32);
2030
void subl(Register dst, Address src);
2031
void subl(Register dst, Register src);
2032
2033
void subq(Address dst, int32_t imm32);
2034
void subq(Address dst, Register src);
2035
void subq(Register dst, int32_t imm32);
2036
void subq(Register dst, Address src);
2037
void subq(Register dst, Register src);
2038
2039
// Force generation of a 4 byte immediate value even if it fits into 8bit
2040
void subl_imm32(Register dst, int32_t imm32);
2041
void subq_imm32(Register dst, int32_t imm32);
2042
2043
// Subtract Scalar Double-Precision Floating-Point Values
2044
void subsd(XMMRegister dst, Address src);
2045
void subsd(XMMRegister dst, XMMRegister src);
2046
2047
// Subtract Scalar Single-Precision Floating-Point Values
2048
void subss(XMMRegister dst, Address src);
2049
void subss(XMMRegister dst, XMMRegister src);
2050
2051
void testb(Register dst, int imm8);
2052
void testb(Address dst, int imm8);
2053
2054
void testl(Register dst, int32_t imm32);
2055
void testl(Register dst, Register src);
2056
void testl(Register dst, Address src);
2057
2058
void testq(Address dst, int32_t imm32);
2059
void testq(Register dst, int32_t imm32);
2060
void testq(Register dst, Register src);
2061
void testq(Register dst, Address src);
2062
2063
// BMI - count trailing zeros
2064
void tzcntl(Register dst, Register src);
2065
void tzcntq(Register dst, Register src);
2066
2067
// Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
2068
void ucomisd(XMMRegister dst, Address src);
2069
void ucomisd(XMMRegister dst, XMMRegister src);
2070
2071
// Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
2072
void ucomiss(XMMRegister dst, Address src);
2073
void ucomiss(XMMRegister dst, XMMRegister src);
2074
2075
void xabort(int8_t imm8);
2076
2077
void xaddb(Address dst, Register src);
2078
void xaddw(Address dst, Register src);
2079
void xaddl(Address dst, Register src);
2080
void xaddq(Address dst, Register src);
2081
2082
void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
2083
2084
void xchgb(Register reg, Address adr);
2085
void xchgw(Register reg, Address adr);
2086
void xchgl(Register reg, Address adr);
2087
void xchgl(Register dst, Register src);
2088
2089
void xchgq(Register reg, Address adr);
2090
void xchgq(Register dst, Register src);
2091
2092
void xend();
2093
2094
// Get Value of Extended Control Register
2095
void xgetbv();
2096
2097
void xorl(Register dst, int32_t imm32);
2098
void xorl(Address dst, int32_t imm32);
2099
void xorl(Register dst, Address src);
2100
void xorl(Register dst, Register src);
2101
void xorl(Address dst, Register src);
2102
2103
void xorb(Address dst, Register src);
2104
void xorb(Register dst, Address src);
2105
void xorw(Register dst, Register src);
2106
2107
void xorq(Register dst, Address src);
2108
void xorq(Address dst, int32_t imm32);
2109
void xorq(Register dst, Register src);
2110
void xorq(Register dst, int32_t imm32);
2111
void xorq(Address dst, Register src);
2112
2113
void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
2114
2115
// AVX 3-operands scalar instructions (encoded with VEX prefix)
2116
2117
void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
2118
void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2119
void vaddss(XMMRegister dst, XMMRegister nds, Address src);
2120
void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2121
void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
2122
void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2123
void vdivss(XMMRegister dst, XMMRegister nds, Address src);
2124
void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2125
void vfmadd231sd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2126
void vfmadd231ss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2127
void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
2128
void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2129
void vmulss(XMMRegister dst, XMMRegister nds, Address src);
2130
void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2131
void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
2132
void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2133
void vsubss(XMMRegister dst, XMMRegister nds, Address src);
2134
void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2135
2136
void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2137
void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2138
void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2139
void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2140
2141
void shlxl(Register dst, Register src1, Register src2);
2142
void shlxq(Register dst, Register src1, Register src2);
2143
void shrxq(Register dst, Register src1, Register src2);
2144
2145
void bzhiq(Register dst, Register src1, Register src2);
2146
2147
//====================VECTOR ARITHMETIC=====================================
2148
void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len);
2149
void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len);
2150
2151
// Add Packed Floating-Point Values
2152
void addpd(XMMRegister dst, XMMRegister src);
2153
void addpd(XMMRegister dst, Address src);
2154
void addps(XMMRegister dst, XMMRegister src);
2155
void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2156
void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2157
void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2158
void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2159
2160
// Subtract Packed Floating-Point Values
2161
void subpd(XMMRegister dst, XMMRegister src);
2162
void subps(XMMRegister dst, XMMRegister src);
2163
void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2164
void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2165
void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2166
void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2167
2168
// Multiply Packed Floating-Point Values
2169
void mulpd(XMMRegister dst, XMMRegister src);
2170
void mulpd(XMMRegister dst, Address src);
2171
void mulps(XMMRegister dst, XMMRegister src);
2172
void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2173
void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2174
void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2175
void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2176
2177
void vfmadd231pd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2178
void vfmadd231ps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2179
void vfmadd231pd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2180
void vfmadd231ps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2181
2182
// Divide Packed Floating-Point Values
2183
void divpd(XMMRegister dst, XMMRegister src);
2184
void divps(XMMRegister dst, XMMRegister src);
2185
void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2186
void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2187
void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2188
void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2189
2190
// Sqrt Packed Floating-Point Values
2191
void vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len);
2192
void vsqrtpd(XMMRegister dst, Address src, int vector_len);
2193
void vsqrtps(XMMRegister dst, XMMRegister src, int vector_len);
2194
void vsqrtps(XMMRegister dst, Address src, int vector_len);
2195
2196
// Round Packed Double precision value.
2197
void vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
2198
void vroundpd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
2199
void vrndscalepd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
2200
void vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
2201
2202
// Bitwise Logical AND of Packed Floating-Point Values
2203
void andpd(XMMRegister dst, XMMRegister src);
2204
void andps(XMMRegister dst, XMMRegister src);
2205
void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2206
void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2207
void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2208
void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2209
2210
void unpckhpd(XMMRegister dst, XMMRegister src);
2211
void unpcklpd(XMMRegister dst, XMMRegister src);
2212
2213
// Bitwise Logical XOR of Packed Floating-Point Values
2214
void xorpd(XMMRegister dst, XMMRegister src);
2215
void xorps(XMMRegister dst, XMMRegister src);
2216
void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2217
void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2218
void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2219
void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2220
2221
// Add horizontal packed integers
2222
void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2223
void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2224
void phaddw(XMMRegister dst, XMMRegister src);
2225
void phaddd(XMMRegister dst, XMMRegister src);
2226
2227
// Add packed integers
2228
void paddb(XMMRegister dst, XMMRegister src);
2229
void paddw(XMMRegister dst, XMMRegister src);
2230
void paddd(XMMRegister dst, XMMRegister src);
2231
void paddd(XMMRegister dst, Address src);
2232
void paddq(XMMRegister dst, XMMRegister src);
2233
void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2234
void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2235
void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2236
void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2237
void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2238
void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2239
void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2240
void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2241
2242
// Sub packed integers
2243
void psubb(XMMRegister dst, XMMRegister src);
2244
void psubw(XMMRegister dst, XMMRegister src);
2245
void psubd(XMMRegister dst, XMMRegister src);
2246
void psubq(XMMRegister dst, XMMRegister src);
2247
void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2248
void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2249
void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2250
void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2251
void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2252
void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2253
void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2254
void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2255
2256
// Multiply packed integers (only shorts and ints)
2257
void pmullw(XMMRegister dst, XMMRegister src);
2258
void pmulld(XMMRegister dst, XMMRegister src);
2259
void pmuludq(XMMRegister dst, XMMRegister src);
2260
void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2261
void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2262
void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2263
void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2264
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2265
void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2266
void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2267
2268
// Minimum of packed integers
2269
void pminsb(XMMRegister dst, XMMRegister src);
2270
void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2271
void pminsw(XMMRegister dst, XMMRegister src);
2272
void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2273
void pminsd(XMMRegister dst, XMMRegister src);
2274
void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2275
void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2276
void minps(XMMRegister dst, XMMRegister src);
2277
void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2278
void minpd(XMMRegister dst, XMMRegister src);
2279
void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2280
2281
// Maximum of packed integers
2282
void pmaxsb(XMMRegister dst, XMMRegister src);
2283
void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2284
void pmaxsw(XMMRegister dst, XMMRegister src);
2285
void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2286
void pmaxsd(XMMRegister dst, XMMRegister src);
2287
void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2288
void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2289
void maxps(XMMRegister dst, XMMRegister src);
2290
void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2291
void maxpd(XMMRegister dst, XMMRegister src);
2292
void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2293
2294
// Shift left packed integers
2295
void psllw(XMMRegister dst, int shift);
2296
void pslld(XMMRegister dst, int shift);
2297
void psllq(XMMRegister dst, int shift);
2298
void psllw(XMMRegister dst, XMMRegister shift);
2299
void pslld(XMMRegister dst, XMMRegister shift);
2300
void psllq(XMMRegister dst, XMMRegister shift);
2301
void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2302
void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2303
void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2304
void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2305
void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2306
void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2307
void vpslldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2308
2309
// Logical shift right packed integers
2310
void psrlw(XMMRegister dst, int shift);
2311
void psrld(XMMRegister dst, int shift);
2312
void psrlq(XMMRegister dst, int shift);
2313
void psrlw(XMMRegister dst, XMMRegister shift);
2314
void psrld(XMMRegister dst, XMMRegister shift);
2315
void psrlq(XMMRegister dst, XMMRegister shift);
2316
void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2317
void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2318
void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2319
void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2320
void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2321
void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2322
void vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2323
void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2324
void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2325
2326
// Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2327
void psraw(XMMRegister dst, int shift);
2328
void psrad(XMMRegister dst, int shift);
2329
void psraw(XMMRegister dst, XMMRegister shift);
2330
void psrad(XMMRegister dst, XMMRegister shift);
2331
void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2332
void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2333
void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2334
void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2335
void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2336
void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2337
void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2338
2339
// Variable shift left packed integers
2340
void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2341
void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2342
2343
// Variable shift right packed integers
2344
void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2345
void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2346
2347
// Variable shift right arithmetic packed integers
2348
void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2349
void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2350
2351
void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2352
void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2353
2354
// And packed integers
2355
void pand(XMMRegister dst, XMMRegister src);
2356
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2357
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2358
void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2359
void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2360
2361
// Andn packed integers
2362
void pandn(XMMRegister dst, XMMRegister src);
2363
void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2364
2365
// Or packed integers
2366
void por(XMMRegister dst, XMMRegister src);
2367
void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2368
void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2369
void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2370
2371
void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2372
void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2373
2374
// Xor packed integers
2375
void pxor(XMMRegister dst, XMMRegister src);
2376
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2377
void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2378
void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2379
void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2380
void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2381
void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2382
2383
// Ternary logic instruction.
2384
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2385
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
2386
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2387
2388
// Vector Rotate Left/Right instruction.
2389
void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2390
void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2391
void evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2392
void evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2393
void evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2394
void evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2395
void evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2396
void evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2397
2398
// vinserti forms
2399
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2400
void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2401
void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2402
void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2403
void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2404
2405
// vinsertf forms
2406
void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2407
void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2408
void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2409
void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2410
void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2411
void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2412
2413
// vextracti forms
2414
void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8);
2415
void vextracti128(Address dst, XMMRegister src, uint8_t imm8);
2416
void vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2417
void vextracti32x4(Address dst, XMMRegister src, uint8_t imm8);
2418
void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
2419
void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2420
void vextracti64x4(Address dst, XMMRegister src, uint8_t imm8);
2421
2422
// vextractf forms
2423
void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8);
2424
void vextractf128(Address dst, XMMRegister src, uint8_t imm8);
2425
void vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2426
void vextractf32x4(Address dst, XMMRegister src, uint8_t imm8);
2427
void vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
2428
void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2429
void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8);
2430
2431
// xmm/mem sourced byte/word/dword/qword replicate
2432
void vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2433
void vpbroadcastb(XMMRegister dst, Address src, int vector_len);
2434
void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2435
void vpbroadcastw(XMMRegister dst, Address src, int vector_len);
2436
void vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2437
void vpbroadcastd(XMMRegister dst, Address src, int vector_len);
2438
void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2439
void vpbroadcastq(XMMRegister dst, Address src, int vector_len);
2440
2441
void evbroadcasti32x4(XMMRegister dst, Address src, int vector_len);
2442
void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
2443
void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
2444
2445
// scalar single/double/128bit precision replicate
2446
void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2447
void vbroadcastss(XMMRegister dst, Address src, int vector_len);
2448
void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2449
void vbroadcastsd(XMMRegister dst, Address src, int vector_len);
2450
void vbroadcastf128(XMMRegister dst, Address src, int vector_len);
2451
2452
// gpr sourced byte/word/dword/qword replicate
2453
void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2454
void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2455
void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2456
void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2457
2458
// Gather AVX2 and AVX3
2459
void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2460
void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2461
void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2462
void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2463
void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len);
2464
void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len);
2465
void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len);
2466
void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len);
2467
2468
//Scatter AVX3 only
2469
void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len);
2470
void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len);
2471
void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len);
2472
void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len);
2473
2474
// Carry-Less Multiplication Quadword
2475
void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2476
void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2477
void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
2478
// AVX instruction which is used to clear upper 128 bits of YMM registers and
2479
// to avoid transaction penalty between AVX and SSE states. There is no
2480
// penalty if legacy SSE instructions are encoded using VEX prefix because
2481
// they always clear upper 128 bits. It should be used before calling
2482
// runtime code and native libraries.
2483
void vzeroupper();
2484
2485
// Vector double compares
2486
void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2487
void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2488
ComparisonPredicateFP comparison, int vector_len);
2489
2490
// Vector float compares
2491
void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len);
2492
void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2493
ComparisonPredicateFP comparison, int vector_len);
2494
2495
// Vector integer compares
2496
void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2497
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2498
int comparison, bool is_signed, int vector_len);
2499
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2500
int comparison, bool is_signed, int vector_len);
2501
2502
// Vector long compares
2503
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2504
int comparison, bool is_signed, int vector_len);
2505
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2506
int comparison, bool is_signed, int vector_len);
2507
2508
// Vector byte compares
2509
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2510
int comparison, bool is_signed, int vector_len);
2511
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2512
int comparison, bool is_signed, int vector_len);
2513
2514
// Vector short compares
2515
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2516
int comparison, bool is_signed, int vector_len);
2517
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2518
int comparison, bool is_signed, int vector_len);
2519
2520
void evpmovb2m(KRegister dst, XMMRegister src, int vector_len);
2521
2522
// Vector blends
2523
void blendvps(XMMRegister dst, XMMRegister src);
2524
void blendvpd(XMMRegister dst, XMMRegister src);
2525
void pblendvb(XMMRegister dst, XMMRegister src);
2526
void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2527
void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2528
void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2529
void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2530
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2531
void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2532
void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2533
void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2534
void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2535
void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2536
void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2537
protected:
2538
// Next instructions require address alignment 16 bytes SSE mode.
2539
// They should be called only from corresponding MacroAssembler instructions.
2540
void andpd(XMMRegister dst, Address src);
2541
void andps(XMMRegister dst, Address src);
2542
void xorpd(XMMRegister dst, Address src);
2543
void xorps(XMMRegister dst, Address src);
2544
2545
};
2546
2547
// The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2548
// Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2549
// are applied.
2550
class InstructionAttr {
2551
public:
2552
InstructionAttr(
2553
int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX
2554
bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2555
bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
2556
bool no_reg_mask, // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used
2557
bool uses_vl) // This instruction may have legacy constraints based on vector length for EVEX
2558
:
2559
_rex_vex_w(rex_vex_w),
2560
_legacy_mode(legacy_mode || UseAVX < 3),
2561
_no_reg_mask(no_reg_mask),
2562
_uses_vl(uses_vl),
2563
_rex_vex_w_reverted(false),
2564
_is_evex_instruction(false),
2565
_is_clear_context(true),
2566
_is_extended_context(false),
2567
_avx_vector_len(vector_len),
2568
_tuple_type(Assembler::EVEX_ETUP),
2569
_input_size_in_bits(Assembler::EVEX_NObit),
2570
_evex_encoding(0),
2571
_embedded_opmask_register_specifier(0), // hard code k0
2572
_current_assembler(NULL) { }
2573
2574
~InstructionAttr() {
2575
if (_current_assembler != NULL) {
2576
_current_assembler->clear_attributes();
2577
}
2578
_current_assembler = NULL;
2579
}
2580
2581
private:
2582
bool _rex_vex_w;
2583
bool _legacy_mode;
2584
bool _no_reg_mask;
2585
bool _uses_vl;
2586
bool _rex_vex_w_reverted;
2587
bool _is_evex_instruction;
2588
bool _is_clear_context;
2589
bool _is_extended_context;
2590
int _avx_vector_len;
2591
int _tuple_type;
2592
int _input_size_in_bits;
2593
int _evex_encoding;
2594
int _embedded_opmask_register_specifier;
2595
2596
Assembler *_current_assembler;
2597
2598
public:
2599
// query functions for field accessors
2600
bool is_rex_vex_w(void) const { return _rex_vex_w; }
2601
bool is_legacy_mode(void) const { return _legacy_mode; }
2602
bool is_no_reg_mask(void) const { return _no_reg_mask; }
2603
bool uses_vl(void) const { return _uses_vl; }
2604
bool is_rex_vex_w_reverted(void) { return _rex_vex_w_reverted; }
2605
bool is_evex_instruction(void) const { return _is_evex_instruction; }
2606
bool is_clear_context(void) const { return _is_clear_context; }
2607
bool is_extended_context(void) const { return _is_extended_context; }
2608
int get_vector_len(void) const { return _avx_vector_len; }
2609
int get_tuple_type(void) const { return _tuple_type; }
2610
int get_input_size(void) const { return _input_size_in_bits; }
2611
int get_evex_encoding(void) const { return _evex_encoding; }
2612
int get_embedded_opmask_register_specifier(void) const { return _embedded_opmask_register_specifier; }
2613
2614
// Set the vector len manually
2615
void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
2616
2617
// Set revert rex_vex_w for avx encoding
2618
void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }
2619
2620
// Set rex_vex_w based on state
2621
void set_rex_vex_w(bool state) { _rex_vex_w = state; }
2622
2623
// Set the instruction to be encoded in AVX mode
2624
void set_is_legacy_mode(void) { _legacy_mode = true; }
2625
2626
// Set the current instuction to be encoded as an EVEX instuction
2627
void set_is_evex_instruction(void) { _is_evex_instruction = true; }
2628
2629
// Internal encoding data used in compressed immediate offset programming
2630
void set_evex_encoding(int value) { _evex_encoding = value; }
2631
2632
// When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components.
2633
// This method unsets it so that merge semantics are used instead.
2634
void reset_is_clear_context(void) { _is_clear_context = false; }
2635
2636
// Map back to current asembler so that we can manage object level assocation
2637
void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
2638
2639
// Address modifiers used for compressed displacement calculation
2640
void set_address_attributes(int tuple_type, int input_size_in_bits);
2641
2642
// Set embedded opmask register specifier.
2643
void set_embedded_opmask_register_specifier(KRegister mask) {
2644
_embedded_opmask_register_specifier = (*mask).encoding() & 0x7;
2645
}
2646
2647
};
2648
2649
#endif // CPU_X86_ASSEMBLER_X86_HPP
2650
2651