CoCalc -- assembler

GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/hotspot/src/cpu/x86/vm/assembler_x86.hpp
³²²⁸⁵ views
1
/*
2
 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.
8
 *
9
 * This code is distributed in the hope that it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
 * version 2 for more details (a copy is included in the LICENSE file that
13
 * accompanied this code).
14
 *
15
 * You should have received a copy of the GNU General Public License version
16
 * 2 along with this work; if not, write to the Free Software Foundation,
17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
 *
19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
 * or visit www.oracle.com if you need additional information or have any
21
 * questions.
22
 *
23
 */
24

25
#ifndef CPU_X86_VM_ASSEMBLER_X86_HPP
26
#define CPU_X86_VM_ASSEMBLER_X86_HPP
27

28
#include "asm/register.hpp"
29

30
class BiasedLockingCounters;
31

32
// Contains all the definitions needed for x86 assembly code generation.
33

34
// Calling convention
35
class Argument VALUE_OBJ_CLASS_SPEC {
36
 public:
37
  enum {
38
#ifdef _LP64
39
#ifdef _WIN64
40
    n_int_register_parameters_c   = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
41
    n_float_register_parameters_c = 4,  // xmm0 - xmm3 (c_farg0, c_farg1, ... )
42
#else
43
    n_int_register_parameters_c   = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
44
    n_float_register_parameters_c = 8,  // xmm0 - xmm7 (c_farg0, c_farg1, ... )
45
#endif // _WIN64
46
    n_int_register_parameters_j   = 6, // j_rarg0, j_rarg1, ...
47
    n_float_register_parameters_j = 8  // j_farg0, j_farg1, ...
48
#else
49
    n_register_parameters = 0   // 0 registers used to pass arguments
50
#endif // _LP64
51
  };
52
};
53

54

55
#ifdef _LP64
56
// Symbolically name the register arguments used by the c calling convention.
57
// Windows is different from linux/solaris. So much for standards...
58

59
#ifdef _WIN64
60

61
REGISTER_DECLARATION(Register, c_rarg0, rcx);
62
REGISTER_DECLARATION(Register, c_rarg1, rdx);
63
REGISTER_DECLARATION(Register, c_rarg2, r8);
64
REGISTER_DECLARATION(Register, c_rarg3, r9);
65

66
REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
67
REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
68
REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
69
REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
70

71
#else
72

73
REGISTER_DECLARATION(Register, c_rarg0, rdi);
74
REGISTER_DECLARATION(Register, c_rarg1, rsi);
75
REGISTER_DECLARATION(Register, c_rarg2, rdx);
76
REGISTER_DECLARATION(Register, c_rarg3, rcx);
77
REGISTER_DECLARATION(Register, c_rarg4, r8);
78
REGISTER_DECLARATION(Register, c_rarg5, r9);
79

80
REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
81
REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
82
REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
83
REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
84
REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4);
85
REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5);
86
REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6);
87
REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7);
88

89
#endif // _WIN64
90

91
// Symbolically name the register arguments used by the Java calling convention.
92
// We have control over the convention for java so we can do what we please.
93
// What pleases us is to offset the java calling convention so that when
94
// we call a suitable jni method the arguments are lined up and we don't
95
// have to do little shuffling. A suitable jni method is non-static and a
96
// small number of arguments (two fewer args on windows)
97
//
98
//        |-------------------------------------------------------|
99
//        | c_rarg0   c_rarg1  c_rarg2 c_rarg3 c_rarg4 c_rarg5    |
100
//        |-------------------------------------------------------|
101
//        | rcx       rdx      r8      r9      rdi*    rsi*       | windows (* not a c_rarg)
102
//        | rdi       rsi      rdx     rcx     r8      r9         | solaris/linux
103
//        |-------------------------------------------------------|
104
//        | j_rarg5   j_rarg0  j_rarg1 j_rarg2 j_rarg3 j_rarg4    |
105
//        |-------------------------------------------------------|
106

107
REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
108
REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
109
REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
110
// Windows runs out of register args here
111
#ifdef _WIN64
112
REGISTER_DECLARATION(Register, j_rarg3, rdi);
113
REGISTER_DECLARATION(Register, j_rarg4, rsi);
114
#else
115
REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
116
REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
117
#endif /* _WIN64 */
118
REGISTER_DECLARATION(Register, j_rarg5, c_rarg0);
119

120
REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0);
121
REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1);
122
REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2);
123
REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3);
124
REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4);
125
REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5);
126
REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6);
127
REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7);
128

129
REGISTER_DECLARATION(Register, rscratch1, r10);  // volatile
130
REGISTER_DECLARATION(Register, rscratch2, r11);  // volatile
131

132
REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved
133
REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved
134

135
#else
136
// rscratch1 will apear in 32bit code that is dead but of course must compile
137
// Using noreg ensures if the dead code is incorrectly live and executed it
138
// will cause an assertion failure
139
#define rscratch1 noreg
140
#define rscratch2 noreg
141

142
#endif // _LP64
143

144
// JSR 292
145
// On x86, the SP does not have to be saved when invoking method handle intrinsics
146
// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
147
REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg);
148

149
// Address is an abstraction used to represent a memory location
150
// using any of the amd64 addressing modes with one object.
151
//
152
// Note: A register location is represented via a Register, not
153
//       via an address for efficiency & simplicity reasons.
154

155
class ArrayAddress;
156

157
class Address VALUE_OBJ_CLASS_SPEC {
158
 public:
159
  enum ScaleFactor {
160
    no_scale = -1,
161
    times_1  =  0,
162
    times_2  =  1,
163
    times_4  =  2,
164
    times_8  =  3,
165
    times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4)
166
  };
167
  static ScaleFactor times(int size) {
168
    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
169
    if (size == 8)  return times_8;
170
    if (size == 4)  return times_4;
171
    if (size == 2)  return times_2;
172
    return times_1;
173
  }
174
  static int scale_size(ScaleFactor scale) {
175
    assert(scale != no_scale, "");
176
    assert(((1 << (int)times_1) == 1 &&
177
            (1 << (int)times_2) == 2 &&
178
            (1 << (int)times_4) == 4 &&
179
            (1 << (int)times_8) == 8), "");
180
    return (1 << (int)scale);
181
  }
182

183
 private:
184
  Register         _base;
185
  Register         _index;
186
  ScaleFactor      _scale;
187
  int              _disp;
188
  RelocationHolder _rspec;
189

190
  // Easily misused constructors make them private
191
  // %%% can we make these go away?
192
  NOT_LP64(Address(address loc, RelocationHolder spec);)
193
  Address(int disp, address loc, relocInfo::relocType rtype);
194
  Address(int disp, address loc, RelocationHolder spec);
195

196
 public:
197

198
 int disp() { return _disp; }
199
  // creation
200
  Address()
201
    : _base(noreg),
202
      _index(noreg),
203
      _scale(no_scale),
204
      _disp(0) {
205
  }
206

207
  // No default displacement otherwise Register can be implicitly
208
  // converted to 0(Register) which is quite a different animal.
209

210
  Address(Register base, int disp)
211
    : _base(base),
212
      _index(noreg),
213
      _scale(no_scale),
214
      _disp(disp) {
215
  }
216

217
  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
218
    : _base (base),
219
      _index(index),
220
      _scale(scale),
221
      _disp (disp) {
222
    assert(!index->is_valid() == (scale == Address::no_scale),
223
           "inconsistent address");
224
  }
225

226
  Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
227
    : _base (base),
228
      _index(index.register_or_noreg()),
229
      _scale(scale),
230
      _disp (disp + (index.constant_or_zero() * scale_size(scale))) {
231
    if (!index.is_register())  scale = Address::no_scale;
232
    assert(!_index->is_valid() == (scale == Address::no_scale),
233
           "inconsistent address");
234
  }
235

236
  Address plus_disp(int disp) const {
237
    Address a = (*this);
238
    a._disp += disp;
239
    return a;
240
  }
241
  Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
242
    Address a = (*this);
243
    a._disp += disp.constant_or_zero() * scale_size(scale);
244
    if (disp.is_register()) {
245
      assert(!a.index()->is_valid(), "competing indexes");
246
      a._index = disp.as_register();
247
      a._scale = scale;
248
    }
249
    return a;
250
  }
251
  bool is_same_address(Address a) const {
252
    // disregard _rspec
253
    return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
254
  }
255

256
  // The following two overloads are used in connection with the
257
  // ByteSize type (see sizes.hpp).  They simplify the use of
258
  // ByteSize'd arguments in assembly code. Note that their equivalent
259
  // for the optimized build are the member functions with int disp
260
  // argument since ByteSize is mapped to an int type in that case.
261
  //
262
  // Note: DO NOT introduce similar overloaded functions for WordSize
263
  // arguments as in the optimized mode, both ByteSize and WordSize
264
  // are mapped to the same type and thus the compiler cannot make a
265
  // distinction anymore (=> compiler errors).
266

267
#ifdef ASSERT
268
  Address(Register base, ByteSize disp)
269
    : _base(base),
270
      _index(noreg),
271
      _scale(no_scale),
272
      _disp(in_bytes(disp)) {
273
  }
274

275
  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
276
    : _base(base),
277
      _index(index),
278
      _scale(scale),
279
      _disp(in_bytes(disp)) {
280
    assert(!index->is_valid() == (scale == Address::no_scale),
281
           "inconsistent address");
282
  }
283

284
  Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
285
    : _base (base),
286
      _index(index.register_or_noreg()),
287
      _scale(scale),
288
      _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) {
289
    if (!index.is_register())  scale = Address::no_scale;
290
    assert(!_index->is_valid() == (scale == Address::no_scale),
291
           "inconsistent address");
292
  }
293

294
#endif // ASSERT
295

296
  // accessors
297
  bool        uses(Register reg) const { return _base == reg || _index == reg; }
298
  Register    base()             const { return _base;  }
299
  Register    index()            const { return _index; }
300
  ScaleFactor scale()            const { return _scale; }
301
  int         disp()             const { return _disp;  }
302

303
  // Convert the raw encoding form into the form expected by the constructor for
304
  // Address.  An index of 4 (rsp) corresponds to having no index, so convert
305
  // that to noreg for the Address constructor.
306
  static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
307

308
  static Address make_array(ArrayAddress);
309

310
 private:
311
  bool base_needs_rex() const {
312
    return _base != noreg && _base->encoding() >= 8;
313
  }
314

315
  bool index_needs_rex() const {
316
    return _index != noreg &&_index->encoding() >= 8;
317
  }
318

319
  relocInfo::relocType reloc() const { return _rspec.type(); }
320

321
  friend class Assembler;
322
  friend class MacroAssembler;
323
  friend class LIR_Assembler; // base/index/scale/disp
324
};
325

326
//
327
// AddressLiteral has been split out from Address because operands of this type
328
// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
329
// the few instructions that need to deal with address literals are unique and the
330
// MacroAssembler does not have to implement every instruction in the Assembler
331
// in order to search for address literals that may need special handling depending
332
// on the instruction and the platform. As small step on the way to merging i486/amd64
333
// directories.
334
//
335
class AddressLiteral VALUE_OBJ_CLASS_SPEC {
336
  friend class ArrayAddress;
337
  RelocationHolder _rspec;
338
  // Typically we use AddressLiterals we want to use their rval
339
  // However in some situations we want the lval (effect address) of the item.
340
  // We provide a special factory for making those lvals.
341
  bool _is_lval;
342

343
  // If the target is far we'll need to load the ea of this to
344
  // a register to reach it. Otherwise if near we can do rip
345
  // relative addressing.
346

347
  address          _target;
348

349
 protected:
350
  // creation
351
  AddressLiteral()
352
    : _is_lval(false),
353
      _target(NULL)
354
  {}
355

356
  public:
357

358

359
  AddressLiteral(address target, relocInfo::relocType rtype);
360

361
  AddressLiteral(address target, RelocationHolder const& rspec)
362
    : _rspec(rspec),
363
      _is_lval(false),
364
      _target(target)
365
  {}
366

367
  AddressLiteral addr() {
368
    AddressLiteral ret = *this;
369
    ret._is_lval = true;
370
    return ret;
371
  }
372

373

374
 private:
375

376
  address target() { return _target; }
377
  bool is_lval() { return _is_lval; }
378

379
  relocInfo::relocType reloc() const { return _rspec.type(); }
380
  const RelocationHolder& rspec() const { return _rspec; }
381

382
  friend class Assembler;
383
  friend class MacroAssembler;
384
  friend class Address;
385
  friend class LIR_Assembler;
386
};
387

388
// Convience classes
389
class RuntimeAddress: public AddressLiteral {
390

391
  public:
392

393
  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
394

395
};
396

397
class ExternalAddress: public AddressLiteral {
398
 private:
399
  static relocInfo::relocType reloc_for_target(address target) {
400
    // Sometimes ExternalAddress is used for values which aren't
401
    // exactly addresses, like the card table base.
402
    // external_word_type can't be used for values in the first page
403
    // so just skip the reloc in that case.
404
    return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
405
  }
406

407
 public:
408

409
  ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {}
410

411
};
412

413
class InternalAddress: public AddressLiteral {
414

415
  public:
416

417
  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
418

419
};
420

421
// x86 can do array addressing as a single operation since disp can be an absolute
422
// address amd64 can't. We create a class that expresses the concept but does extra
423
// magic on amd64 to get the final result
424

425
class ArrayAddress VALUE_OBJ_CLASS_SPEC {
426
  private:
427

428
  AddressLiteral _base;
429
  Address        _index;
430

431
  public:
432

433
  ArrayAddress() {};
434
  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
435
  AddressLiteral base() { return _base; }
436
  Address index() { return _index; }
437

438
};
439

440
const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize);
441

442
// The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
443
// level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
444
// is what you get. The Assembler is generating code into a CodeBuffer.
445

446
class Assembler : public AbstractAssembler  {
447
  friend class AbstractAssembler; // for the non-virtual hack
448
  friend class LIR_Assembler; // as_Address()
449
  friend class StubGenerator;
450

451
 public:
452
  enum Condition {                     // The x86 condition codes used for conditional jumps/moves.
453
    zero          = 0x4,
454
    notZero       = 0x5,
455
    equal         = 0x4,
456
    notEqual      = 0x5,
457
    less          = 0xc,
458
    lessEqual     = 0xe,
459
    greater       = 0xf,
460
    greaterEqual  = 0xd,
461
    below         = 0x2,
462
    belowEqual    = 0x6,
463
    above         = 0x7,
464
    aboveEqual    = 0x3,
465
    overflow      = 0x0,
466
    noOverflow    = 0x1,
467
    carrySet      = 0x2,
468
    carryClear    = 0x3,
469
    negative      = 0x8,
470
    positive      = 0x9,
471
    parity        = 0xa,
472
    noParity      = 0xb
473
  };
474

475
  enum Prefix {
476
    // segment overrides
477
    CS_segment = 0x2e,
478
    SS_segment = 0x36,
479
    DS_segment = 0x3e,
480
    ES_segment = 0x26,
481
    FS_segment = 0x64,
482
    GS_segment = 0x65,
483

484
    REX        = 0x40,
485

486
    REX_B      = 0x41,
487
    REX_X      = 0x42,
488
    REX_XB     = 0x43,
489
    REX_R      = 0x44,
490
    REX_RB     = 0x45,
491
    REX_RX     = 0x46,
492
    REX_RXB    = 0x47,
493

494
    REX_W      = 0x48,
495

496
    REX_WB     = 0x49,
497
    REX_WX     = 0x4A,
498
    REX_WXB    = 0x4B,
499
    REX_WR     = 0x4C,
500
    REX_WRB    = 0x4D,
501
    REX_WRX    = 0x4E,
502
    REX_WRXB   = 0x4F,
503

504
    VEX_3bytes = 0xC4,
505
    VEX_2bytes = 0xC5
506
  };
507

508
  enum VexPrefix {
509
    VEX_B = 0x20,
510
    VEX_X = 0x40,
511
    VEX_R = 0x80,
512
    VEX_W = 0x80
513
  };
514

515
  enum VexSimdPrefix {
516
    VEX_SIMD_NONE = 0x0,
517
    VEX_SIMD_66   = 0x1,
518
    VEX_SIMD_F3   = 0x2,
519
    VEX_SIMD_F2   = 0x3
520
  };
521

522
  enum VexOpcode {
523
    VEX_OPCODE_NONE  = 0x0,
524
    VEX_OPCODE_0F    = 0x1,
525
    VEX_OPCODE_0F_38 = 0x2,
526
    VEX_OPCODE_0F_3A = 0x3
527
  };
528

529
  enum WhichOperand {
530
    // input to locate_operand, and format code for relocations
531
    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
532
    disp32_operand = 1,          // embedded 32-bit displacement or address
533
    call32_operand = 2,          // embedded 32-bit self-relative displacement
534
#ifndef _LP64
535
    _WhichOperand_limit = 3
536
#else
537
     narrow_oop_operand = 3,     // embedded 32-bit immediate narrow oop
538
    _WhichOperand_limit = 4
539
#endif
540
  };
541

542

543

544
  // NOTE: The general philopsophy of the declarations here is that 64bit versions
545
  // of instructions are freely declared without the need for wrapping them an ifdef.
546
  // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
547
  // In the .cpp file the implementations are wrapped so that they are dropped out
548
  // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
549
  // to the size it was prior to merging up the 32bit and 64bit assemblers.
550
  //
551
  // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
552
  // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
553

554
private:
555

556

557
  // 64bit prefixes
558
  int prefix_and_encode(int reg_enc, bool byteinst = false);
559
  int prefixq_and_encode(int reg_enc);
560

561
  int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false);
562
  int prefixq_and_encode(int dst_enc, int src_enc);
563

564
  void prefix(Register reg);
565
  void prefix(Address adr);
566
  void prefixq(Address adr);
567

568
  void prefix(Address adr, Register reg,  bool byteinst = false);
569
  void prefix(Address adr, XMMRegister reg);
570
  void prefixq(Address adr, Register reg);
571
  void prefixq(Address adr, XMMRegister reg);
572

573
  void prefetch_prefix(Address src);
574

575
  void rex_prefix(Address adr, XMMRegister xreg,
576
                  VexSimdPrefix pre, VexOpcode opc, bool rex_w);
577
  int  rex_prefix_and_encode(int dst_enc, int src_enc,
578
                             VexSimdPrefix pre, VexOpcode opc, bool rex_w);
579

580
  void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
581
                  int nds_enc, VexSimdPrefix pre, VexOpcode opc,
582
                  bool vector256);
583

584
  void vex_prefix(Address adr, int nds_enc, int xreg_enc,
585
                  VexSimdPrefix pre, VexOpcode opc,
586
                  bool vex_w, bool vector256);
587

588
  void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
589
                  VexSimdPrefix pre, bool vector256 = false) {
590
    int dst_enc = dst->encoding();
591
    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
592
    vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);
593
  }
594

595
  void vex_prefix_0F38(Register dst, Register nds, Address src) {
596
    bool vex_w = false;
597
    bool vector256 = false;
598
    vex_prefix(src, nds->encoding(), dst->encoding(),
599
               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
600
  }
601

602
  void vex_prefix_0F38_q(Register dst, Register nds, Address src) {
603
    bool vex_w = true;
604
    bool vector256 = false;
605
    vex_prefix(src, nds->encoding(), dst->encoding(),
606
               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
607
  }
608
  int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
609
                             VexSimdPrefix pre, VexOpcode opc,
610
                             bool vex_w, bool vector256);
611

612
  int  vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) {
613
    bool vex_w = false;
614
    bool vector256 = false;
615
    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
616
                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
617
  }
618
  int  vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) {
619
    bool vex_w = true;
620
    bool vector256 = false;
621
    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
622
                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
623
  }
624
  int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
625
                             VexSimdPrefix pre, bool vector256 = false,
626
                             VexOpcode opc = VEX_OPCODE_0F) {
627
    int src_enc = src->encoding();
628
    int dst_enc = dst->encoding();
629
    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
630
    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256);
631
  }
632

633
  void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
634
                   VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
635
                   bool rex_w = false, bool vector256 = false);
636

637
  void simd_prefix(XMMRegister dst, Address src,
638
                   VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
639
    simd_prefix(dst, xnoreg, src, pre, opc);
640
  }
641

642
  void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
643
    simd_prefix(src, dst, pre);
644
  }
645
  void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
646
                     VexSimdPrefix pre) {
647
    bool rex_w = true;
648
    simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
649
  }
650

651
  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
652
                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
653
                             bool rex_w = false, bool vector256 = false);
654

655
  // Move/convert 32-bit integer value.
656
  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
657
                             VexSimdPrefix pre) {
658
    // It is OK to cast from Register to XMMRegister to pass argument here
659
    // since only encoding is used in simd_prefix_and_encode() and number of
660
    // Gen and Xmm registers are the same.
661
    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
662
  }
663
  int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
664
    return simd_prefix_and_encode(dst, xnoreg, src, pre);
665
  }
666
  int simd_prefix_and_encode(Register dst, XMMRegister src,
667
                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
668
    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
669
  }
670

671
  // Move/convert 64-bit integer value.
672
  int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
673
                               VexSimdPrefix pre) {
674
    bool rex_w = true;
675
    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);
676
  }
677
  int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
678
    return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
679
  }
680
  int simd_prefix_and_encode_q(Register dst, XMMRegister src,
681
                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
682
    bool rex_w = true;
683
    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
684
  }
685

686
  // Helper functions for groups of instructions
687
  void emit_arith_b(int op1, int op2, Register dst, int imm8);
688

689
  void emit_arith(int op1, int op2, Register dst, int32_t imm32);
690
  // Force generation of a 4 byte immediate value even if it fits into 8bit
691
  void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
692
  void emit_arith(int op1, int op2, Register dst, Register src);
693

694
  void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
695
  void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
696
  void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
697
  void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
698
  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
699
                      Address src, VexSimdPrefix pre, bool vector256);
700
  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
701
                      XMMRegister src, VexSimdPrefix pre, bool vector256);
702

703
  void emit_operand(Register reg,
704
                    Register base, Register index, Address::ScaleFactor scale,
705
                    int disp,
706
                    RelocationHolder const& rspec,
707
                    int rip_relative_correction = 0);
708

709
  void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);
710

711
  // operands that only take the original 32bit registers
712
  void emit_operand32(Register reg, Address adr);
713

714
  void emit_operand(XMMRegister reg,
715
                    Register base, Register index, Address::ScaleFactor scale,
716
                    int disp,
717
                    RelocationHolder const& rspec);
718

719
  void emit_operand(XMMRegister reg, Address adr);
720

721
  void emit_operand(MMXRegister reg, Address adr);
722

723
  // workaround gcc (3.2.1-7) bug
724
  void emit_operand(Address adr, MMXRegister reg);
725

726

727
  // Immediate-to-memory forms
728
  void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
729

730
  void emit_farith(int b1, int b2, int i);
731

732

733
 protected:
734
  #ifdef ASSERT
735
  void check_relocation(RelocationHolder const& rspec, int format);
736
  #endif
737

738
  void emit_data(jint data, relocInfo::relocType    rtype, int format);
739
  void emit_data(jint data, RelocationHolder const& rspec, int format);
740
  void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
741
  void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
742

743
  bool reachable(AddressLiteral adr) NOT_LP64({ return true;});
744

745
  // These are all easily abused and hence protected
746

747
  // 32BIT ONLY SECTION
748
#ifndef _LP64
749
  // Make these disappear in 64bit mode since they would never be correct
750
  void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec);   // 32BIT ONLY
751
  void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
752

753
  void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
754
  void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec);     // 32BIT ONLY
755

756
  void push_literal32(int32_t imm32, RelocationHolder const& rspec);                 // 32BIT ONLY
757
#else
758
  // 64BIT ONLY SECTION
759
  void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec);   // 64BIT ONLY
760

761
  void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
762
  void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
763

764
  void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
765
  void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
766
#endif // _LP64
767

768
  // These are unique in that we are ensured by the caller that the 32bit
769
  // relative in these instructions will always be able to reach the potentially
770
  // 64bit address described by entry. Since they can take a 64bit address they
771
  // don't have the 32 suffix like the other instructions in this class.
772

773
  void call_literal(address entry, RelocationHolder const& rspec);
774
  void jmp_literal(address entry, RelocationHolder const& rspec);
775

776
  // Avoid using directly section
777
  // Instructions in this section are actually usable by anyone without danger
778
  // of failure but have performance issues that are addressed my enhanced
779
  // instructions which will do the proper thing base on the particular cpu.
780
  // We protect them because we don't trust you...
781

782
  // Don't use next inc() and dec() methods directly. INC & DEC instructions
783
  // could cause a partial flag stall since they don't set CF flag.
784
  // Use MacroAssembler::decrement() & MacroAssembler::increment() methods
785
  // which call inc() & dec() or add() & sub() in accordance with
786
  // the product flag UseIncDec value.
787

788
  void decl(Register dst);
789
  void decl(Address dst);
790
  void decq(Register dst);
791
  void decq(Address dst);
792

793
  void incl(Register dst);
794
  void incl(Address dst);
795
  void incq(Register dst);
796
  void incq(Address dst);
797

798
  // New cpus require use of movsd and movss to avoid partial register stall
799
  // when loading from memory. But for old Opteron use movlpd instead of movsd.
800
  // The selection is done in MacroAssembler::movdbl() and movflt().
801

802
  // Move Scalar Single-Precision Floating-Point Values
803
  void movss(XMMRegister dst, Address src);
804
  void movss(XMMRegister dst, XMMRegister src);
805
  void movss(Address dst, XMMRegister src);
806

807
  // Move Scalar Double-Precision Floating-Point Values
808
  void movsd(XMMRegister dst, Address src);
809
  void movsd(XMMRegister dst, XMMRegister src);
810
  void movsd(Address dst, XMMRegister src);
811
  void movlpd(XMMRegister dst, Address src);
812

813
  // New cpus require use of movaps and movapd to avoid partial register stall
814
  // when moving between registers.
815
  void movaps(XMMRegister dst, XMMRegister src);
816
  void movapd(XMMRegister dst, XMMRegister src);
817

818
  // End avoid using directly
819

820

821
  // Instruction prefixes
822
  void prefix(Prefix p);
823

824
  public:
825

826
  // Creation
827
  Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
828

829
  // Decoding
830
  static address locate_operand(address inst, WhichOperand which);
831
  static address locate_next_instruction(address inst);
832

833
  // Utilities
834
  static bool is_polling_page_far() NOT_LP64({ return false;});
835

836
  // Generic instructions
837
  // Does 32bit or 64bit as needed for the platform. In some sense these
838
  // belong in macro assembler but there is no need for both varieties to exist
839

840
  void lea(Register dst, Address src);
841

842
  void mov(Register dst, Register src);
843

844
  void pusha();
845
  void popa();
846

847
  void pushf();
848
  void popf();
849

850
  void push(int32_t imm32);
851

852
  void push(Register src);
853

854
  void pop(Register dst);
855

856
  // These are dummies to prevent surprise implicit conversions to Register
857
  void push(void* v);
858
  void pop(void* v);
859

860
  // These do register sized moves/scans
861
  void rep_mov();
862
  void rep_stos();
863
  void rep_stosb();
864
  void repne_scan();
865
#ifdef _LP64
866
  void repne_scanl();
867
#endif
868

869
  // Vanilla instructions in lexical order
870

871
  void adcl(Address dst, int32_t imm32);
872
  void adcl(Address dst, Register src);
873
  void adcl(Register dst, int32_t imm32);
874
  void adcl(Register dst, Address src);
875
  void adcl(Register dst, Register src);
876

877
  void adcq(Register dst, int32_t imm32);
878
  void adcq(Register dst, Address src);
879
  void adcq(Register dst, Register src);
880

881
  void addl(Address dst, int32_t imm32);
882
  void addl(Address dst, Register src);
883
  void addl(Register dst, int32_t imm32);
884
  void addl(Register dst, Address src);
885
  void addl(Register dst, Register src);
886

887
  void addq(Address dst, int32_t imm32);
888
  void addq(Address dst, Register src);
889
  void addq(Register dst, int32_t imm32);
890
  void addq(Register dst, Address src);
891
  void addq(Register dst, Register src);
892

893
#ifdef _LP64
894
 //Add Unsigned Integers with Carry Flag
895
  void adcxq(Register dst, Register src);
896

897
 //Add Unsigned Integers with Overflow Flag
898
  void adoxq(Register dst, Register src);
899
#endif
900

901
  void addr_nop_4();
902
  void addr_nop_5();
903
  void addr_nop_7();
904
  void addr_nop_8();
905

906
  // Add Scalar Double-Precision Floating-Point Values
907
  void addsd(XMMRegister dst, Address src);
908
  void addsd(XMMRegister dst, XMMRegister src);
909

910
  // Add Scalar Single-Precision Floating-Point Values
911
  void addss(XMMRegister dst, Address src);
912
  void addss(XMMRegister dst, XMMRegister src);
913

914
  // AES instructions
915
  void aesdec(XMMRegister dst, Address src);
916
  void aesdec(XMMRegister dst, XMMRegister src);
917
  void aesdeclast(XMMRegister dst, Address src);
918
  void aesdeclast(XMMRegister dst, XMMRegister src);
919
  void aesenc(XMMRegister dst, Address src);
920
  void aesenc(XMMRegister dst, XMMRegister src);
921
  void aesenclast(XMMRegister dst, Address src);
922
  void aesenclast(XMMRegister dst, XMMRegister src);
923

924

925
  void andl(Address  dst, int32_t imm32);
926
  void andl(Register dst, int32_t imm32);
927
  void andl(Register dst, Address src);
928
  void andl(Register dst, Register src);
929

930
  void andq(Address  dst, int32_t imm32);
931
  void andq(Register dst, int32_t imm32);
932
  void andq(Register dst, Address src);
933
  void andq(Register dst, Register src);
934

935
  // BMI instructions
936
  void andnl(Register dst, Register src1, Register src2);
937
  void andnl(Register dst, Register src1, Address src2);
938
  void andnq(Register dst, Register src1, Register src2);
939
  void andnq(Register dst, Register src1, Address src2);
940

941
  void blsil(Register dst, Register src);
942
  void blsil(Register dst, Address src);
943
  void blsiq(Register dst, Register src);
944
  void blsiq(Register dst, Address src);
945

946
  void blsmskl(Register dst, Register src);
947
  void blsmskl(Register dst, Address src);
948
  void blsmskq(Register dst, Register src);
949
  void blsmskq(Register dst, Address src);
950

951
  void blsrl(Register dst, Register src);
952
  void blsrl(Register dst, Address src);
953
  void blsrq(Register dst, Register src);
954
  void blsrq(Register dst, Address src);
955

956
  void bsfl(Register dst, Register src);
957
  void bsrl(Register dst, Register src);
958

959
#ifdef _LP64
960
  void bsfq(Register dst, Register src);
961
  void bsrq(Register dst, Register src);
962
#endif
963

964
  void bswapl(Register reg);
965

966
  void bswapq(Register reg);
967

968
  void call(Label& L, relocInfo::relocType rtype);
969
  void call(Register reg);  // push pc; pc <- reg
970
  void call(Address adr);   // push pc; pc <- adr
971

972
  void cdql();
973

974
  void cdqq();
975

976
  void cld();
977

978
  void clflush(Address adr);
979

980
  void cmovl(Condition cc, Register dst, Register src);
981
  void cmovl(Condition cc, Register dst, Address src);
982

983
  void cmovq(Condition cc, Register dst, Register src);
984
  void cmovq(Condition cc, Register dst, Address src);
985

986

987
  void cmpb(Address dst, int imm8);
988

989
  void cmpl(Address dst, int32_t imm32);
990

991
  void cmpl(Register dst, int32_t imm32);
992
  void cmpl(Register dst, Register src);
993
  void cmpl(Register dst, Address src);
994

995
  void cmpq(Address dst, int32_t imm32);
996
  void cmpq(Address dst, Register src);
997

998
  void cmpq(Register dst, int32_t imm32);
999
  void cmpq(Register dst, Register src);
1000
  void cmpq(Register dst, Address src);
1001

1002
  // these are dummies used to catch attempting to convert NULL to Register
1003
  void cmpl(Register dst, void* junk); // dummy
1004
  void cmpq(Register dst, void* junk); // dummy
1005

1006
  void cmpw(Address dst, int imm16);
1007

1008
  void cmpxchg8 (Address adr);
1009

1010
  void cmpxchgl(Register reg, Address adr);
1011

1012
  void cmpxchgq(Register reg, Address adr);
1013

1014
  // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
1015
  void comisd(XMMRegister dst, Address src);
1016
  void comisd(XMMRegister dst, XMMRegister src);
1017

1018
  // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
1019
  void comiss(XMMRegister dst, Address src);
1020
  void comiss(XMMRegister dst, XMMRegister src);
1021

1022
  // Identify processor type and features
1023
  void cpuid();
1024

1025
  // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
1026
  void cvtsd2ss(XMMRegister dst, XMMRegister src);
1027
  void cvtsd2ss(XMMRegister dst, Address src);
1028

1029
  // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
1030
  void cvtsi2sdl(XMMRegister dst, Register src);
1031
  void cvtsi2sdl(XMMRegister dst, Address src);
1032
  void cvtsi2sdq(XMMRegister dst, Register src);
1033
  void cvtsi2sdq(XMMRegister dst, Address src);
1034

1035
  // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
1036
  void cvtsi2ssl(XMMRegister dst, Register src);
1037
  void cvtsi2ssl(XMMRegister dst, Address src);
1038
  void cvtsi2ssq(XMMRegister dst, Register src);
1039
  void cvtsi2ssq(XMMRegister dst, Address src);
1040

1041
  // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
1042
  void cvtdq2pd(XMMRegister dst, XMMRegister src);
1043

1044
  // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
1045
  void cvtdq2ps(XMMRegister dst, XMMRegister src);
1046

1047
  // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
1048
  void cvtss2sd(XMMRegister dst, XMMRegister src);
1049
  void cvtss2sd(XMMRegister dst, Address src);
1050

1051
  // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
1052
  void cvttsd2sil(Register dst, Address src);
1053
  void cvttsd2sil(Register dst, XMMRegister src);
1054
  void cvttsd2siq(Register dst, XMMRegister src);
1055

1056
  // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
1057
  void cvttss2sil(Register dst, XMMRegister src);
1058
  void cvttss2siq(Register dst, XMMRegister src);
1059

1060
  // Divide Scalar Double-Precision Floating-Point Values
1061
  void divsd(XMMRegister dst, Address src);
1062
  void divsd(XMMRegister dst, XMMRegister src);
1063

1064
  // Divide Scalar Single-Precision Floating-Point Values
1065
  void divss(XMMRegister dst, Address src);
1066
  void divss(XMMRegister dst, XMMRegister src);
1067

1068
  void emms();
1069

1070
  void fabs();
1071

1072
  void fadd(int i);
1073

1074
  void fadd_d(Address src);
1075
  void fadd_s(Address src);
1076

1077
  // "Alternate" versions of x87 instructions place result down in FPU
1078
  // stack instead of on TOS
1079

1080
  void fadda(int i); // "alternate" fadd
1081
  void faddp(int i = 1);
1082

1083
  void fchs();
1084

1085
  void fcom(int i);
1086

1087
  void fcomp(int i = 1);
1088
  void fcomp_d(Address src);
1089
  void fcomp_s(Address src);
1090

1091
  void fcompp();
1092

1093
  void fcos();
1094

1095
  void fdecstp();
1096

1097
  void fdiv(int i);
1098
  void fdiv_d(Address src);
1099
  void fdivr_s(Address src);
1100
  void fdiva(int i);  // "alternate" fdiv
1101
  void fdivp(int i = 1);
1102

1103
  void fdivr(int i);
1104
  void fdivr_d(Address src);
1105
  void fdiv_s(Address src);
1106

1107
  void fdivra(int i); // "alternate" reversed fdiv
1108

1109
  void fdivrp(int i = 1);
1110

1111
  void ffree(int i = 0);
1112

1113
  void fild_d(Address adr);
1114
  void fild_s(Address adr);
1115

1116
  void fincstp();
1117

1118
  void finit();
1119

1120
  void fist_s (Address adr);
1121
  void fistp_d(Address adr);
1122
  void fistp_s(Address adr);
1123

1124
  void fld1();
1125

1126
  void fld_d(Address adr);
1127
  void fld_s(Address adr);
1128
  void fld_s(int index);
1129
  void fld_x(Address adr);  // extended-precision (80-bit) format
1130

1131
  void fldcw(Address src);
1132

1133
  void fldenv(Address src);
1134

1135
  void fldlg2();
1136

1137
  void fldln2();
1138

1139
  void fldz();
1140

1141
  void flog();
1142
  void flog10();
1143

1144
  void fmul(int i);
1145

1146
  void fmul_d(Address src);
1147
  void fmul_s(Address src);
1148

1149
  void fmula(int i);  // "alternate" fmul
1150

1151
  void fmulp(int i = 1);
1152

1153
  void fnsave(Address dst);
1154

1155
  void fnstcw(Address src);
1156

1157
  void fnstsw_ax();
1158

1159
  void fprem();
1160
  void fprem1();
1161

1162
  void frstor(Address src);
1163

1164
  void fsin();
1165

1166
  void fsqrt();
1167

1168
  void fst_d(Address adr);
1169
  void fst_s(Address adr);
1170

1171
  void fstp_d(Address adr);
1172
  void fstp_d(int index);
1173
  void fstp_s(Address adr);
1174
  void fstp_x(Address adr); // extended-precision (80-bit) format
1175

1176
  void fsub(int i);
1177
  void fsub_d(Address src);
1178
  void fsub_s(Address src);
1179

1180
  void fsuba(int i);  // "alternate" fsub
1181

1182
  void fsubp(int i = 1);
1183

1184
  void fsubr(int i);
1185
  void fsubr_d(Address src);
1186
  void fsubr_s(Address src);
1187

1188
  void fsubra(int i); // "alternate" reversed fsub
1189

1190
  void fsubrp(int i = 1);
1191

1192
  void ftan();
1193

1194
  void ftst();
1195

1196
  void fucomi(int i = 1);
1197
  void fucomip(int i = 1);
1198

1199
  void fwait();
1200

1201
  void fxch(int i = 1);
1202

1203
  void fxrstor(Address src);
1204

1205
  void fxsave(Address dst);
1206

1207
  void fyl2x();
1208
  void frndint();
1209
  void f2xm1();
1210
  void fldl2e();
1211

1212
  void hlt();
1213

1214
  void idivl(Register src);
1215
  void divl(Register src); // Unsigned division
1216

1217
#ifdef _LP64
1218
  void idivq(Register src);
1219
#endif
1220

1221
  void imull(Register dst, Register src);
1222
  void imull(Register dst, Register src, int value);
1223
  void imull(Register dst, Address src);
1224

1225
#ifdef _LP64
1226
  void imulq(Register dst, Register src);
1227
  void imulq(Register dst, Register src, int value);
1228
  void imulq(Register dst, Address src);
1229
#endif
1230

1231
  // jcc is the generic conditional branch generator to run-
1232
  // time routines, jcc is used for branches to labels. jcc
1233
  // takes a branch opcode (cc) and a label (L) and generates
1234
  // either a backward branch or a forward branch and links it
1235
  // to the label fixup chain. Usage:
1236
  //
1237
  // Label L;      // unbound label
1238
  // jcc(cc, L);   // forward branch to unbound label
1239
  // bind(L);      // bind label to the current pc
1240
  // jcc(cc, L);   // backward branch to bound label
1241
  // bind(L);      // illegal: a label may be bound only once
1242
  //
1243
  // Note: The same Label can be used for forward and backward branches
1244
  // but it may be bound only once.
1245

1246
  void jcc(Condition cc, Label& L, bool maybe_short = true);
1247

1248
  // Conditional jump to a 8-bit offset to L.
1249
  // WARNING: be very careful using this for forward jumps.  If the label is
1250
  // not bound within an 8-bit offset of this instruction, a run-time error
1251
  // will occur.
1252
  void jccb(Condition cc, Label& L);
1253

1254
  void jmp(Address entry);    // pc <- entry
1255

1256
  // Label operations & relative jumps (PPUM Appendix D)
1257
  void jmp(Label& L, bool maybe_short = true);   // unconditional jump to L
1258

1259
  void jmp(Register entry); // pc <- entry
1260

1261
  // Unconditional 8-bit offset jump to L.
1262
  // WARNING: be very careful using this for forward jumps.  If the label is
1263
  // not bound within an 8-bit offset of this instruction, a run-time error
1264
  // will occur.
1265
  void jmpb(Label& L);
1266

1267
  void ldmxcsr( Address src );
1268

1269
  void leal(Register dst, Address src);
1270

1271
  void leaq(Register dst, Address src);
1272

1273
  void lfence();
1274

1275
  void lock();
1276

1277
  void lzcntl(Register dst, Register src);
1278

1279
#ifdef _LP64
1280
  void lzcntq(Register dst, Register src);
1281
#endif
1282

1283
  enum Membar_mask_bits {
1284
    StoreStore = 1 << 3,
1285
    LoadStore  = 1 << 2,
1286
    StoreLoad  = 1 << 1,
1287
    LoadLoad   = 1 << 0
1288
  };
1289

1290
  // Serializes memory and blows flags
1291
  void membar(Membar_mask_bits order_constraint) {
1292
    if (os::is_MP()) {
1293
      // We only have to handle StoreLoad
1294
      if (order_constraint & StoreLoad) {
1295
        // All usable chips support "locked" instructions which suffice
1296
        // as barriers, and are much faster than the alternative of
1297
        // using cpuid instruction. We use here a locked add [esp],0.
1298
        // This is conveniently otherwise a no-op except for blowing
1299
        // flags.
1300
        // Any change to this code may need to revisit other places in
1301
        // the code where this idiom is used, in particular the
1302
        // orderAccess code.
1303
        lock();
1304
        addl(Address(rsp, 0), 0);// Assert the lock# signal here
1305
      }
1306
    }
1307
  }
1308

1309
  void mfence();
1310

1311
  // Moves
1312

1313
  void mov64(Register dst, int64_t imm64);
1314

1315
  void movb(Address dst, Register src);
1316
  void movb(Address dst, int imm8);
1317
  void movb(Register dst, Address src);
1318

1319
  void movdl(XMMRegister dst, Register src);
1320
  void movdl(Register dst, XMMRegister src);
1321
  void movdl(XMMRegister dst, Address src);
1322
  void movdl(Address dst, XMMRegister src);
1323

1324
  // Move Double Quadword
1325
  void movdq(XMMRegister dst, Register src);
1326
  void movdq(Register dst, XMMRegister src);
1327

1328
  // Move Aligned Double Quadword
1329
  void movdqa(XMMRegister dst, XMMRegister src);
1330
  void movdqa(XMMRegister dst, Address src);
1331

1332
  // Move Unaligned Double Quadword
1333
  void movdqu(Address     dst, XMMRegister src);
1334
  void movdqu(XMMRegister dst, Address src);
1335
  void movdqu(XMMRegister dst, XMMRegister src);
1336

1337
  // Move Unaligned 256bit Vector
1338
  void vmovdqu(Address dst, XMMRegister src);
1339
  void vmovdqu(XMMRegister dst, Address src);
1340
  void vmovdqu(XMMRegister dst, XMMRegister src);
1341

1342
  // Move lower 64bit to high 64bit in 128bit register
1343
  void movlhps(XMMRegister dst, XMMRegister src);
1344

1345
  void movl(Register dst, int32_t imm32);
1346
  void movl(Address dst, int32_t imm32);
1347
  void movl(Register dst, Register src);
1348
  void movl(Register dst, Address src);
1349
  void movl(Address dst, Register src);
1350

1351
  // These dummies prevent using movl from converting a zero (like NULL) into Register
1352
  // by giving the compiler two choices it can't resolve
1353

1354
  void movl(Address  dst, void* junk);
1355
  void movl(Register dst, void* junk);
1356

1357
#ifdef _LP64
1358
  void movq(Register dst, Register src);
1359
  void movq(Register dst, Address src);
1360
  void movq(Address  dst, Register src);
1361
#endif
1362

1363
  void movq(Address     dst, MMXRegister src );
1364
  void movq(MMXRegister dst, Address src );
1365

1366
#ifdef _LP64
1367
  // These dummies prevent using movq from converting a zero (like NULL) into Register
1368
  // by giving the compiler two choices it can't resolve
1369

1370
  void movq(Address  dst, void* dummy);
1371
  void movq(Register dst, void* dummy);
1372
#endif
1373

1374
  // Move Quadword
1375
  void movq(Address     dst, XMMRegister src);
1376
  void movq(XMMRegister dst, Address src);
1377

1378
  void movsbl(Register dst, Address src);
1379
  void movsbl(Register dst, Register src);
1380

1381
#ifdef _LP64
1382
  void movsbq(Register dst, Address src);
1383
  void movsbq(Register dst, Register src);
1384

1385
  // Move signed 32bit immediate to 64bit extending sign
1386
  void movslq(Address  dst, int32_t imm64);
1387
  void movslq(Register dst, int32_t imm64);
1388

1389
  void movslq(Register dst, Address src);
1390
  void movslq(Register dst, Register src);
1391
  void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous
1392
#endif
1393

1394
  void movswl(Register dst, Address src);
1395
  void movswl(Register dst, Register src);
1396

1397
#ifdef _LP64
1398
  void movswq(Register dst, Address src);
1399
  void movswq(Register dst, Register src);
1400
#endif
1401

1402
  void movw(Address dst, int imm16);
1403
  void movw(Register dst, Address src);
1404
  void movw(Address dst, Register src);
1405

1406
  void movzbl(Register dst, Address src);
1407
  void movzbl(Register dst, Register src);
1408

1409
#ifdef _LP64
1410
  void movzbq(Register dst, Address src);
1411
  void movzbq(Register dst, Register src);
1412
#endif
1413

1414
  void movzwl(Register dst, Address src);
1415
  void movzwl(Register dst, Register src);
1416

1417
#ifdef _LP64
1418
  void movzwq(Register dst, Address src);
1419
  void movzwq(Register dst, Register src);
1420
#endif
1421

1422
  // Unsigned multiply with RAX destination register
1423
  void mull(Address src);
1424
  void mull(Register src);
1425

1426
#ifdef _LP64
1427
  void mulq(Address src);
1428
  void mulq(Register src);
1429
  void mulxq(Register dst1, Register dst2, Register src);
1430
#endif
1431

1432
  // Multiply Scalar Double-Precision Floating-Point Values
1433
  void mulsd(XMMRegister dst, Address src);
1434
  void mulsd(XMMRegister dst, XMMRegister src);
1435

1436
  // Multiply Scalar Single-Precision Floating-Point Values
1437
  void mulss(XMMRegister dst, Address src);
1438
  void mulss(XMMRegister dst, XMMRegister src);
1439

1440
  void negl(Register dst);
1441

1442
#ifdef _LP64
1443
  void negq(Register dst);
1444
#endif
1445

1446
  void nop(int i = 1);
1447

1448
  void notl(Register dst);
1449

1450
#ifdef _LP64
1451
  void notq(Register dst);
1452
#endif
1453

1454
  void orl(Address dst, int32_t imm32);
1455
  void orl(Register dst, int32_t imm32);
1456
  void orl(Register dst, Address src);
1457
  void orl(Register dst, Register src);
1458
  void orl(Address dst, Register src);
1459

1460
  void orq(Address dst, int32_t imm32);
1461
  void orq(Register dst, int32_t imm32);
1462
  void orq(Register dst, Address src);
1463
  void orq(Register dst, Register src);
1464

1465
  // Pack with unsigned saturation
1466
  void packuswb(XMMRegister dst, XMMRegister src);
1467
  void packuswb(XMMRegister dst, Address src);
1468
  void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1469

1470
  // Pemutation of 64bit words
1471
  void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256);
1472

1473
  void pause();
1474

1475
  // SSE4.2 string instructions
1476
  void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1477
  void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1478

1479
  // SSE 4.1 extract
1480
  void pextrd(Register dst, XMMRegister src, int imm8);
1481
  void pextrq(Register dst, XMMRegister src, int imm8);
1482

1483
  // SSE 4.1 insert
1484
  void pinsrd(XMMRegister dst, Register src, int imm8);
1485
  void pinsrq(XMMRegister dst, Register src, int imm8);
1486

1487
  // SSE4.1 packed move
1488
  void pmovzxbw(XMMRegister dst, XMMRegister src);
1489
  void pmovzxbw(XMMRegister dst, Address src);
1490

1491
#ifndef _LP64 // no 32bit push/pop on amd64
1492
  void popl(Address dst);
1493
#endif
1494

1495
#ifdef _LP64
1496
  void popq(Address dst);
1497
#endif
1498

1499
  void popcntl(Register dst, Address src);
1500
  void popcntl(Register dst, Register src);
1501

1502
#ifdef _LP64
1503
  void popcntq(Register dst, Address src);
1504
  void popcntq(Register dst, Register src);
1505
#endif
1506

1507
  // Prefetches (SSE, SSE2, 3DNOW only)
1508

1509
  void prefetchnta(Address src);
1510
  void prefetchr(Address src);
1511
  void prefetcht0(Address src);
1512
  void prefetcht1(Address src);
1513
  void prefetcht2(Address src);
1514
  void prefetchw(Address src);
1515

1516
  // Shuffle Bytes
1517
  void pshufb(XMMRegister dst, XMMRegister src);
1518
  void pshufb(XMMRegister dst, Address src);
1519

1520
  // Shuffle Packed Doublewords
1521
  void pshufd(XMMRegister dst, XMMRegister src, int mode);
1522
  void pshufd(XMMRegister dst, Address src,     int mode);
1523

1524
  // Shuffle Packed Low Words
1525
  void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1526
  void pshuflw(XMMRegister dst, Address src,     int mode);
1527

1528
  // Shift Right by bytes Logical DoubleQuadword Immediate
1529
  void psrldq(XMMRegister dst, int shift);
1530
  // Shift Left by bytes Logical DoubleQuadword Immediate
1531
  void pslldq(XMMRegister dst, int shift);
1532

1533
  // Logical Compare 128bit
1534
  void ptest(XMMRegister dst, XMMRegister src);
1535
  void ptest(XMMRegister dst, Address src);
1536
  // Logical Compare 256bit
1537
  void vptest(XMMRegister dst, XMMRegister src);
1538
  void vptest(XMMRegister dst, Address src);
1539

1540
  // Interleave Low Bytes
1541
  void punpcklbw(XMMRegister dst, XMMRegister src);
1542
  void punpcklbw(XMMRegister dst, Address src);
1543

1544
  // Interleave Low Doublewords
1545
  void punpckldq(XMMRegister dst, XMMRegister src);
1546
  void punpckldq(XMMRegister dst, Address src);
1547

1548
  // Interleave Low Quadwords
1549
  void punpcklqdq(XMMRegister dst, XMMRegister src);
1550

1551
#ifndef _LP64 // no 32bit push/pop on amd64
1552
  void pushl(Address src);
1553
#endif
1554

1555
  void pushq(Address src);
1556

1557
  void rcll(Register dst, int imm8);
1558

1559
  void rclq(Register dst, int imm8);
1560

1561
  void rcrq(Register dst, int imm8);
1562

1563
  void rdtsc();
1564

1565
  void ret(int imm16);
1566

1567
#ifdef _LP64
1568
  void rorq(Register dst, int imm8);
1569
  void rorxq(Register dst, Register src, int imm8);
1570
#endif
1571

1572
  void sahf();
1573

1574
  void sarl(Register dst, int imm8);
1575
  void sarl(Register dst);
1576

1577
  void sarq(Register dst, int imm8);
1578
  void sarq(Register dst);
1579

1580
  void sbbl(Address dst, int32_t imm32);
1581
  void sbbl(Register dst, int32_t imm32);
1582
  void sbbl(Register dst, Address src);
1583
  void sbbl(Register dst, Register src);
1584

1585
  void sbbq(Address dst, int32_t imm32);
1586
  void sbbq(Register dst, int32_t imm32);
1587
  void sbbq(Register dst, Address src);
1588
  void sbbq(Register dst, Register src);
1589

1590
  void setb(Condition cc, Register dst);
1591

1592
  void shldl(Register dst, Register src);
1593

1594
  void shll(Register dst, int imm8);
1595
  void shll(Register dst);
1596

1597
  void shlq(Register dst, int imm8);
1598
  void shlq(Register dst);
1599

1600
  void shrdl(Register dst, Register src);
1601

1602
  void shrl(Register dst, int imm8);
1603
  void shrl(Register dst);
1604

1605
  void shrq(Register dst, int imm8);
1606
  void shrq(Register dst);
1607

1608
  void smovl(); // QQQ generic?
1609

1610
  // Compute Square Root of Scalar Double-Precision Floating-Point Value
1611
  void sqrtsd(XMMRegister dst, Address src);
1612
  void sqrtsd(XMMRegister dst, XMMRegister src);
1613

1614
  // Compute Square Root of Scalar Single-Precision Floating-Point Value
1615
  void sqrtss(XMMRegister dst, Address src);
1616
  void sqrtss(XMMRegister dst, XMMRegister src);
1617

1618
  void std();
1619

1620
  void stmxcsr( Address dst );
1621

1622
  void subl(Address dst, int32_t imm32);
1623
  void subl(Address dst, Register src);
1624
  void subl(Register dst, int32_t imm32);
1625
  void subl(Register dst, Address src);
1626
  void subl(Register dst, Register src);
1627

1628
  void subq(Address dst, int32_t imm32);
1629
  void subq(Address dst, Register src);
1630
  void subq(Register dst, int32_t imm32);
1631
  void subq(Register dst, Address src);
1632
  void subq(Register dst, Register src);
1633

1634
  // Force generation of a 4 byte immediate value even if it fits into 8bit
1635
  void subl_imm32(Register dst, int32_t imm32);
1636
  void subq_imm32(Register dst, int32_t imm32);
1637

1638
  // Subtract Scalar Double-Precision Floating-Point Values
1639
  void subsd(XMMRegister dst, Address src);
1640
  void subsd(XMMRegister dst, XMMRegister src);
1641

1642
  // Subtract Scalar Single-Precision Floating-Point Values
1643
  void subss(XMMRegister dst, Address src);
1644
  void subss(XMMRegister dst, XMMRegister src);
1645

1646
  void testb(Register dst, int imm8);
1647
  void testb(Address dst, int imm8);
1648

1649
  void testl(Register dst, int32_t imm32);
1650
  void testl(Register dst, Register src);
1651
  void testl(Register dst, Address src);
1652

1653
  void testq(Register dst, int32_t imm32);
1654
  void testq(Register dst, Register src);
1655

1656
  // BMI - count trailing zeros
1657
  void tzcntl(Register dst, Register src);
1658
  void tzcntq(Register dst, Register src);
1659

1660
  // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
1661
  void ucomisd(XMMRegister dst, Address src);
1662
  void ucomisd(XMMRegister dst, XMMRegister src);
1663

1664
  // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
1665
  void ucomiss(XMMRegister dst, Address src);
1666
  void ucomiss(XMMRegister dst, XMMRegister src);
1667

1668
  void xabort(int8_t imm8);
1669

1670
  void xaddl(Address dst, Register src);
1671

1672
  void xaddq(Address dst, Register src);
1673

1674
  void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
1675

1676
  void xchgl(Register reg, Address adr);
1677
  void xchgl(Register dst, Register src);
1678

1679
  void xchgq(Register reg, Address adr);
1680
  void xchgq(Register dst, Register src);
1681

1682
  void xend();
1683

1684
  // Get Value of Extended Control Register
1685
  void xgetbv();
1686

1687
  void xorl(Register dst, int32_t imm32);
1688
  void xorl(Register dst, Address src);
1689
  void xorl(Register dst, Register src);
1690

1691
  void xorq(Register dst, Address src);
1692
  void xorq(Register dst, Register src);
1693

1694
  void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
1695

1696
  // AVX 3-operands scalar instructions (encoded with VEX prefix)
1697

1698
  void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
1699
  void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1700
  void vaddss(XMMRegister dst, XMMRegister nds, Address src);
1701
  void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1702
  void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
1703
  void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1704
  void vdivss(XMMRegister dst, XMMRegister nds, Address src);
1705
  void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1706
  void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
1707
  void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1708
  void vmulss(XMMRegister dst, XMMRegister nds, Address src);
1709
  void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1710
  void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
1711
  void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
1712
  void vsubss(XMMRegister dst, XMMRegister nds, Address src);
1713
  void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
1714

1715

1716
  //====================VECTOR ARITHMETIC=====================================
1717

1718
  // Add Packed Floating-Point Values
1719
  void addpd(XMMRegister dst, XMMRegister src);
1720
  void addps(XMMRegister dst, XMMRegister src);
1721
  void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1722
  void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1723
  void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1724
  void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1725

1726
  // Subtract Packed Floating-Point Values
1727
  void subpd(XMMRegister dst, XMMRegister src);
1728
  void subps(XMMRegister dst, XMMRegister src);
1729
  void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1730
  void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1731
  void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1732
  void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1733

1734
  // Multiply Packed Floating-Point Values
1735
  void mulpd(XMMRegister dst, XMMRegister src);
1736
  void mulps(XMMRegister dst, XMMRegister src);
1737
  void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1738
  void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1739
  void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1740
  void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1741

1742
  // Divide Packed Floating-Point Values
1743
  void divpd(XMMRegister dst, XMMRegister src);
1744
  void divps(XMMRegister dst, XMMRegister src);
1745
  void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1746
  void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1747
  void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1748
  void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1749

1750
  // Bitwise Logical AND of Packed Floating-Point Values
1751
  void andpd(XMMRegister dst, XMMRegister src);
1752
  void andps(XMMRegister dst, XMMRegister src);
1753
  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1754
  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1755
  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1756
  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1757

1758
  // Bitwise Logical XOR of Packed Floating-Point Values
1759
  void xorpd(XMMRegister dst, XMMRegister src);
1760
  void xorps(XMMRegister dst, XMMRegister src);
1761
  void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1762
  void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1763
  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1764
  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1765

1766
  // Add packed integers
1767
  void paddb(XMMRegister dst, XMMRegister src);
1768
  void paddw(XMMRegister dst, XMMRegister src);
1769
  void paddd(XMMRegister dst, XMMRegister src);
1770
  void paddq(XMMRegister dst, XMMRegister src);
1771
  void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1772
  void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1773
  void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1774
  void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1775
  void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1776
  void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1777
  void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1778
  void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1779

1780
  // Sub packed integers
1781
  void psubb(XMMRegister dst, XMMRegister src);
1782
  void psubw(XMMRegister dst, XMMRegister src);
1783
  void psubd(XMMRegister dst, XMMRegister src);
1784
  void psubq(XMMRegister dst, XMMRegister src);
1785
  void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1786
  void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1787
  void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1788
  void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1789
  void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1790
  void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1791
  void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1792
  void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1793

1794
  // Multiply packed integers (only shorts and ints)
1795
  void pmullw(XMMRegister dst, XMMRegister src);
1796
  void pmulld(XMMRegister dst, XMMRegister src);
1797
  void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1798
  void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1799
  void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1800
  void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1801

1802
  // Shift left packed integers
1803
  void psllw(XMMRegister dst, int shift);
1804
  void pslld(XMMRegister dst, int shift);
1805
  void psllq(XMMRegister dst, int shift);
1806
  void psllw(XMMRegister dst, XMMRegister shift);
1807
  void pslld(XMMRegister dst, XMMRegister shift);
1808
  void psllq(XMMRegister dst, XMMRegister shift);
1809
  void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1810
  void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1811
  void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1812
  void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1813
  void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1814
  void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1815

1816
  // Logical shift right packed integers
1817
  void psrlw(XMMRegister dst, int shift);
1818
  void psrld(XMMRegister dst, int shift);
1819
  void psrlq(XMMRegister dst, int shift);
1820
  void psrlw(XMMRegister dst, XMMRegister shift);
1821
  void psrld(XMMRegister dst, XMMRegister shift);
1822
  void psrlq(XMMRegister dst, XMMRegister shift);
1823
  void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1824
  void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1825
  void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1826
  void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1827
  void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1828
  void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1829

1830
  // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
1831
  void psraw(XMMRegister dst, int shift);
1832
  void psrad(XMMRegister dst, int shift);
1833
  void psraw(XMMRegister dst, XMMRegister shift);
1834
  void psrad(XMMRegister dst, XMMRegister shift);
1835
  void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1836
  void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);
1837
  void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1838
  void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
1839

1840
  // And packed integers
1841
  void pand(XMMRegister dst, XMMRegister src);
1842
  void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1843
  void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1844

1845
  // Or packed integers
1846
  void por(XMMRegister dst, XMMRegister src);
1847
  void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1848
  void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1849

1850
  // Xor packed integers
1851
  void pxor(XMMRegister dst, XMMRegister src);
1852
  void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
1853
  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
1854

1855
  // Copy low 128bit into high 128bit of YMM registers.
1856
  void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1857
  void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
1858

1859
  // Load/store high 128bit of YMM registers which does not destroy other half.
1860
  void vinsertf128h(XMMRegister dst, Address src);
1861
  void vinserti128h(XMMRegister dst, Address src);
1862
  void vextractf128h(Address dst, XMMRegister src);
1863
  void vextracti128h(Address dst, XMMRegister src);
1864

1865
  // duplicate 4-bytes integer data from src into 8 locations in dest
1866
  void vpbroadcastd(XMMRegister dst, XMMRegister src);
1867

1868
  // Carry-Less Multiplication Quadword
1869
  void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
1870
  void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
1871

1872
  // AVX instruction which is used to clear upper 128 bits of YMM registers and
1873
  // to avoid transaction penalty between AVX and SSE states. There is no
1874
  // penalty if legacy SSE instructions are encoded using VEX prefix because
1875
  // they always clear upper 128 bits. It should be used before calling
1876
  // runtime code and native libraries.
1877
  void vzeroupper();
1878

1879
 protected:
1880
  // Next instructions require address alignment 16 bytes SSE mode.
1881
  // They should be called only from corresponding MacroAssembler instructions.
1882
  void andpd(XMMRegister dst, Address src);
1883
  void andps(XMMRegister dst, Address src);
1884
  void xorpd(XMMRegister dst, Address src);
1885
  void xorps(XMMRegister dst, Address src);
1886

1887
};
1888

1889
#endif // CPU_X86_VM_ASSEMBLER_X86_HPP
1890

1891
Product

Resources

Company