Path: blob/aarch64-shenandoah-jdk8u272-b10/hotspot/src/cpu/x86/vm/assembler_x86.hpp
32285 views
/*1* Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#ifndef CPU_X86_VM_ASSEMBLER_X86_HPP25#define CPU_X86_VM_ASSEMBLER_X86_HPP2627#include "asm/register.hpp"2829class BiasedLockingCounters;3031// Contains all the definitions needed for x86 assembly code generation.3233// Calling convention34class Argument VALUE_OBJ_CLASS_SPEC {35public:36enum {37#ifdef _LP6438#ifdef _WIN6439n_int_register_parameters_c = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)40n_float_register_parameters_c = 4, // xmm0 - xmm3 (c_farg0, c_farg1, ... )41#else42n_int_register_parameters_c = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)43n_float_register_parameters_c = 8, // xmm0 - xmm7 (c_farg0, c_farg1, ... )44#endif // _WIN6445n_int_register_parameters_j = 6, // j_rarg0, j_rarg1, ...46n_float_register_parameters_j = 8 // j_farg0, j_farg1, ...47#else48n_register_parameters = 0 // 0 registers used to pass arguments49#endif // _LP6450};51};525354#ifdef _LP6455// Symbolically name the register arguments used by the c calling convention.56// Windows is different from linux/solaris. So much for standards...5758#ifdef _WIN645960REGISTER_DECLARATION(Register, c_rarg0, rcx);61REGISTER_DECLARATION(Register, c_rarg1, rdx);62REGISTER_DECLARATION(Register, c_rarg2, r8);63REGISTER_DECLARATION(Register, c_rarg3, r9);6465REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);66REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);67REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);68REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);6970#else7172REGISTER_DECLARATION(Register, c_rarg0, rdi);73REGISTER_DECLARATION(Register, c_rarg1, rsi);74REGISTER_DECLARATION(Register, c_rarg2, rdx);75REGISTER_DECLARATION(Register, c_rarg3, rcx);76REGISTER_DECLARATION(Register, c_rarg4, r8);77REGISTER_DECLARATION(Register, c_rarg5, r9);7879REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);80REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);81REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);82REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);83REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4);84REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5);85REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6);86REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7);8788#endif // _WIN648990// Symbolically name the register arguments used by the Java calling convention.91// We have control over the convention for java so we can do what we please.92// What pleases us is to offset the java calling convention so that when93// we call a suitable jni method the arguments are lined up and we don't94// have to do little shuffling. A suitable jni method is non-static and a95// small number of arguments (two fewer args on windows)96//97// |-------------------------------------------------------|98// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 |99// |-------------------------------------------------------|100// | rcx rdx r8 r9 rdi* rsi* | windows (* not a c_rarg)101// | rdi rsi rdx rcx r8 r9 | solaris/linux102// |-------------------------------------------------------|103// | j_rarg5 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 |104// |-------------------------------------------------------|105106REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);107REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);108REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);109// Windows runs out of register args here110#ifdef _WIN64111REGISTER_DECLARATION(Register, j_rarg3, rdi);112REGISTER_DECLARATION(Register, j_rarg4, rsi);113#else114REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);115REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);116#endif /* _WIN64 */117REGISTER_DECLARATION(Register, j_rarg5, c_rarg0);118119REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0);120REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1);121REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2);122REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3);123REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4);124REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5);125REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6);126REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7);127128REGISTER_DECLARATION(Register, rscratch1, r10); // volatile129REGISTER_DECLARATION(Register, rscratch2, r11); // volatile130131REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved132REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved133134#else135// rscratch1 will apear in 32bit code that is dead but of course must compile136// Using noreg ensures if the dead code is incorrectly live and executed it137// will cause an assertion failure138#define rscratch1 noreg139#define rscratch2 noreg140141#endif // _LP64142143// JSR 292144// On x86, the SP does not have to be saved when invoking method handle intrinsics145// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.146REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg);147148// Address is an abstraction used to represent a memory location149// using any of the amd64 addressing modes with one object.150//151// Note: A register location is represented via a Register, not152// via an address for efficiency & simplicity reasons.153154class ArrayAddress;155156class Address VALUE_OBJ_CLASS_SPEC {157public:158enum ScaleFactor {159no_scale = -1,160times_1 = 0,161times_2 = 1,162times_4 = 2,163times_8 = 3,164times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4)165};166static ScaleFactor times(int size) {167assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");168if (size == 8) return times_8;169if (size == 4) return times_4;170if (size == 2) return times_2;171return times_1;172}173static int scale_size(ScaleFactor scale) {174assert(scale != no_scale, "");175assert(((1 << (int)times_1) == 1 &&176(1 << (int)times_2) == 2 &&177(1 << (int)times_4) == 4 &&178(1 << (int)times_8) == 8), "");179return (1 << (int)scale);180}181182private:183Register _base;184Register _index;185ScaleFactor _scale;186int _disp;187RelocationHolder _rspec;188189// Easily misused constructors make them private190// %%% can we make these go away?191NOT_LP64(Address(address loc, RelocationHolder spec);)192Address(int disp, address loc, relocInfo::relocType rtype);193Address(int disp, address loc, RelocationHolder spec);194195public:196197int disp() { return _disp; }198// creation199Address()200: _base(noreg),201_index(noreg),202_scale(no_scale),203_disp(0) {204}205206// No default displacement otherwise Register can be implicitly207// converted to 0(Register) which is quite a different animal.208209Address(Register base, int disp)210: _base(base),211_index(noreg),212_scale(no_scale),213_disp(disp) {214}215216Address(Register base, Register index, ScaleFactor scale, int disp = 0)217: _base (base),218_index(index),219_scale(scale),220_disp (disp) {221assert(!index->is_valid() == (scale == Address::no_scale),222"inconsistent address");223}224225Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)226: _base (base),227_index(index.register_or_noreg()),228_scale(scale),229_disp (disp + (index.constant_or_zero() * scale_size(scale))) {230if (!index.is_register()) scale = Address::no_scale;231assert(!_index->is_valid() == (scale == Address::no_scale),232"inconsistent address");233}234235Address plus_disp(int disp) const {236Address a = (*this);237a._disp += disp;238return a;239}240Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {241Address a = (*this);242a._disp += disp.constant_or_zero() * scale_size(scale);243if (disp.is_register()) {244assert(!a.index()->is_valid(), "competing indexes");245a._index = disp.as_register();246a._scale = scale;247}248return a;249}250bool is_same_address(Address a) const {251// disregard _rspec252return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;253}254255// The following two overloads are used in connection with the256// ByteSize type (see sizes.hpp). They simplify the use of257// ByteSize'd arguments in assembly code. Note that their equivalent258// for the optimized build are the member functions with int disp259// argument since ByteSize is mapped to an int type in that case.260//261// Note: DO NOT introduce similar overloaded functions for WordSize262// arguments as in the optimized mode, both ByteSize and WordSize263// are mapped to the same type and thus the compiler cannot make a264// distinction anymore (=> compiler errors).265266#ifdef ASSERT267Address(Register base, ByteSize disp)268: _base(base),269_index(noreg),270_scale(no_scale),271_disp(in_bytes(disp)) {272}273274Address(Register base, Register index, ScaleFactor scale, ByteSize disp)275: _base(base),276_index(index),277_scale(scale),278_disp(in_bytes(disp)) {279assert(!index->is_valid() == (scale == Address::no_scale),280"inconsistent address");281}282283Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)284: _base (base),285_index(index.register_or_noreg()),286_scale(scale),287_disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) {288if (!index.is_register()) scale = Address::no_scale;289assert(!_index->is_valid() == (scale == Address::no_scale),290"inconsistent address");291}292293#endif // ASSERT294295// accessors296bool uses(Register reg) const { return _base == reg || _index == reg; }297Register base() const { return _base; }298Register index() const { return _index; }299ScaleFactor scale() const { return _scale; }300int disp() const { return _disp; }301302// Convert the raw encoding form into the form expected by the constructor for303// Address. An index of 4 (rsp) corresponds to having no index, so convert304// that to noreg for the Address constructor.305static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);306307static Address make_array(ArrayAddress);308309private:310bool base_needs_rex() const {311return _base != noreg && _base->encoding() >= 8;312}313314bool index_needs_rex() const {315return _index != noreg &&_index->encoding() >= 8;316}317318relocInfo::relocType reloc() const { return _rspec.type(); }319320friend class Assembler;321friend class MacroAssembler;322friend class LIR_Assembler; // base/index/scale/disp323};324325//326// AddressLiteral has been split out from Address because operands of this type327// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out328// the few instructions that need to deal with address literals are unique and the329// MacroAssembler does not have to implement every instruction in the Assembler330// in order to search for address literals that may need special handling depending331// on the instruction and the platform. As small step on the way to merging i486/amd64332// directories.333//334class AddressLiteral VALUE_OBJ_CLASS_SPEC {335friend class ArrayAddress;336RelocationHolder _rspec;337// Typically we use AddressLiterals we want to use their rval338// However in some situations we want the lval (effect address) of the item.339// We provide a special factory for making those lvals.340bool _is_lval;341342// If the target is far we'll need to load the ea of this to343// a register to reach it. Otherwise if near we can do rip344// relative addressing.345346address _target;347348protected:349// creation350AddressLiteral()351: _is_lval(false),352_target(NULL)353{}354355public:356357358AddressLiteral(address target, relocInfo::relocType rtype);359360AddressLiteral(address target, RelocationHolder const& rspec)361: _rspec(rspec),362_is_lval(false),363_target(target)364{}365366AddressLiteral addr() {367AddressLiteral ret = *this;368ret._is_lval = true;369return ret;370}371372373private:374375address target() { return _target; }376bool is_lval() { return _is_lval; }377378relocInfo::relocType reloc() const { return _rspec.type(); }379const RelocationHolder& rspec() const { return _rspec; }380381friend class Assembler;382friend class MacroAssembler;383friend class Address;384friend class LIR_Assembler;385};386387// Convience classes388class RuntimeAddress: public AddressLiteral {389390public:391392RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}393394};395396class ExternalAddress: public AddressLiteral {397private:398static relocInfo::relocType reloc_for_target(address target) {399// Sometimes ExternalAddress is used for values which aren't400// exactly addresses, like the card table base.401// external_word_type can't be used for values in the first page402// so just skip the reloc in that case.403return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;404}405406public:407408ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {}409410};411412class InternalAddress: public AddressLiteral {413414public:415416InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}417418};419420// x86 can do array addressing as a single operation since disp can be an absolute421// address amd64 can't. We create a class that expresses the concept but does extra422// magic on amd64 to get the final result423424class ArrayAddress VALUE_OBJ_CLASS_SPEC {425private:426427AddressLiteral _base;428Address _index;429430public:431432ArrayAddress() {};433ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};434AddressLiteral base() { return _base; }435Address index() { return _index; }436437};438439const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize);440441// The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction442// level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write443// is what you get. The Assembler is generating code into a CodeBuffer.444445class Assembler : public AbstractAssembler {446friend class AbstractAssembler; // for the non-virtual hack447friend class LIR_Assembler; // as_Address()448friend class StubGenerator;449450public:451enum Condition { // The x86 condition codes used for conditional jumps/moves.452zero = 0x4,453notZero = 0x5,454equal = 0x4,455notEqual = 0x5,456less = 0xc,457lessEqual = 0xe,458greater = 0xf,459greaterEqual = 0xd,460below = 0x2,461belowEqual = 0x6,462above = 0x7,463aboveEqual = 0x3,464overflow = 0x0,465noOverflow = 0x1,466carrySet = 0x2,467carryClear = 0x3,468negative = 0x8,469positive = 0x9,470parity = 0xa,471noParity = 0xb472};473474enum Prefix {475// segment overrides476CS_segment = 0x2e,477SS_segment = 0x36,478DS_segment = 0x3e,479ES_segment = 0x26,480FS_segment = 0x64,481GS_segment = 0x65,482483REX = 0x40,484485REX_B = 0x41,486REX_X = 0x42,487REX_XB = 0x43,488REX_R = 0x44,489REX_RB = 0x45,490REX_RX = 0x46,491REX_RXB = 0x47,492493REX_W = 0x48,494495REX_WB = 0x49,496REX_WX = 0x4A,497REX_WXB = 0x4B,498REX_WR = 0x4C,499REX_WRB = 0x4D,500REX_WRX = 0x4E,501REX_WRXB = 0x4F,502503VEX_3bytes = 0xC4,504VEX_2bytes = 0xC5505};506507enum VexPrefix {508VEX_B = 0x20,509VEX_X = 0x40,510VEX_R = 0x80,511VEX_W = 0x80512};513514enum VexSimdPrefix {515VEX_SIMD_NONE = 0x0,516VEX_SIMD_66 = 0x1,517VEX_SIMD_F3 = 0x2,518VEX_SIMD_F2 = 0x3519};520521enum VexOpcode {522VEX_OPCODE_NONE = 0x0,523VEX_OPCODE_0F = 0x1,524VEX_OPCODE_0F_38 = 0x2,525VEX_OPCODE_0F_3A = 0x3526};527528enum WhichOperand {529// input to locate_operand, and format code for relocations530imm_operand = 0, // embedded 32-bit|64-bit immediate operand531disp32_operand = 1, // embedded 32-bit displacement or address532call32_operand = 2, // embedded 32-bit self-relative displacement533#ifndef _LP64534_WhichOperand_limit = 3535#else536narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop537_WhichOperand_limit = 4538#endif539};540541542543// NOTE: The general philopsophy of the declarations here is that 64bit versions544// of instructions are freely declared without the need for wrapping them an ifdef.545// (Some dangerous instructions are ifdef's out of inappropriate jvm's.)546// In the .cpp file the implementations are wrapped so that they are dropped out547// of the resulting jvm. This is done mostly to keep the footprint of MINIMAL548// to the size it was prior to merging up the 32bit and 64bit assemblers.549//550// This does mean you'll get a linker/runtime error if you use a 64bit only instruction551// in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.552553private:554555556// 64bit prefixes557int prefix_and_encode(int reg_enc, bool byteinst = false);558int prefixq_and_encode(int reg_enc);559560int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false);561int prefixq_and_encode(int dst_enc, int src_enc);562563void prefix(Register reg);564void prefix(Address adr);565void prefixq(Address adr);566567void prefix(Address adr, Register reg, bool byteinst = false);568void prefix(Address adr, XMMRegister reg);569void prefixq(Address adr, Register reg);570void prefixq(Address adr, XMMRegister reg);571572void prefetch_prefix(Address src);573574void rex_prefix(Address adr, XMMRegister xreg,575VexSimdPrefix pre, VexOpcode opc, bool rex_w);576int rex_prefix_and_encode(int dst_enc, int src_enc,577VexSimdPrefix pre, VexOpcode opc, bool rex_w);578579void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,580int nds_enc, VexSimdPrefix pre, VexOpcode opc,581bool vector256);582583void vex_prefix(Address adr, int nds_enc, int xreg_enc,584VexSimdPrefix pre, VexOpcode opc,585bool vex_w, bool vector256);586587void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,588VexSimdPrefix pre, bool vector256 = false) {589int dst_enc = dst->encoding();590int nds_enc = nds->is_valid() ? nds->encoding() : 0;591vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);592}593594void vex_prefix_0F38(Register dst, Register nds, Address src) {595bool vex_w = false;596bool vector256 = false;597vex_prefix(src, nds->encoding(), dst->encoding(),598VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);599}600601void vex_prefix_0F38_q(Register dst, Register nds, Address src) {602bool vex_w = true;603bool vector256 = false;604vex_prefix(src, nds->encoding(), dst->encoding(),605VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);606}607int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,608VexSimdPrefix pre, VexOpcode opc,609bool vex_w, bool vector256);610611int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) {612bool vex_w = false;613bool vector256 = false;614return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),615VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);616}617int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) {618bool vex_w = true;619bool vector256 = false;620return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),621VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);622}623int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,624VexSimdPrefix pre, bool vector256 = false,625VexOpcode opc = VEX_OPCODE_0F) {626int src_enc = src->encoding();627int dst_enc = dst->encoding();628int nds_enc = nds->is_valid() ? nds->encoding() : 0;629return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256);630}631632void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,633VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,634bool rex_w = false, bool vector256 = false);635636void simd_prefix(XMMRegister dst, Address src,637VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {638simd_prefix(dst, xnoreg, src, pre, opc);639}640641void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {642simd_prefix(src, dst, pre);643}644void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,645VexSimdPrefix pre) {646bool rex_w = true;647simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);648}649650int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,651VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,652bool rex_w = false, bool vector256 = false);653654// Move/convert 32-bit integer value.655int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,656VexSimdPrefix pre) {657// It is OK to cast from Register to XMMRegister to pass argument here658// since only encoding is used in simd_prefix_and_encode() and number of659// Gen and Xmm registers are the same.660return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);661}662int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {663return simd_prefix_and_encode(dst, xnoreg, src, pre);664}665int simd_prefix_and_encode(Register dst, XMMRegister src,666VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {667return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);668}669670// Move/convert 64-bit integer value.671int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,672VexSimdPrefix pre) {673bool rex_w = true;674return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);675}676int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {677return simd_prefix_and_encode_q(dst, xnoreg, src, pre);678}679int simd_prefix_and_encode_q(Register dst, XMMRegister src,680VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {681bool rex_w = true;682return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);683}684685// Helper functions for groups of instructions686void emit_arith_b(int op1, int op2, Register dst, int imm8);687688void emit_arith(int op1, int op2, Register dst, int32_t imm32);689// Force generation of a 4 byte immediate value even if it fits into 8bit690void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);691void emit_arith(int op1, int op2, Register dst, Register src);692693void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);694void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);695void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);696void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);697void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,698Address src, VexSimdPrefix pre, bool vector256);699void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,700XMMRegister src, VexSimdPrefix pre, bool vector256);701702void emit_operand(Register reg,703Register base, Register index, Address::ScaleFactor scale,704int disp,705RelocationHolder const& rspec,706int rip_relative_correction = 0);707708void emit_operand(Register reg, Address adr, int rip_relative_correction = 0);709710// operands that only take the original 32bit registers711void emit_operand32(Register reg, Address adr);712713void emit_operand(XMMRegister reg,714Register base, Register index, Address::ScaleFactor scale,715int disp,716RelocationHolder const& rspec);717718void emit_operand(XMMRegister reg, Address adr);719720void emit_operand(MMXRegister reg, Address adr);721722// workaround gcc (3.2.1-7) bug723void emit_operand(Address adr, MMXRegister reg);724725726// Immediate-to-memory forms727void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);728729void emit_farith(int b1, int b2, int i);730731732protected:733#ifdef ASSERT734void check_relocation(RelocationHolder const& rspec, int format);735#endif736737void emit_data(jint data, relocInfo::relocType rtype, int format);738void emit_data(jint data, RelocationHolder const& rspec, int format);739void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);740void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);741742bool reachable(AddressLiteral adr) NOT_LP64({ return true;});743744// These are all easily abused and hence protected745746// 32BIT ONLY SECTION747#ifndef _LP64748// Make these disappear in 64bit mode since they would never be correct749void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY750void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY751752void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY753void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY754755void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY756#else757// 64BIT ONLY SECTION758void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY759760void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);761void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);762763void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);764void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);765#endif // _LP64766767// These are unique in that we are ensured by the caller that the 32bit768// relative in these instructions will always be able to reach the potentially769// 64bit address described by entry. Since they can take a 64bit address they770// don't have the 32 suffix like the other instructions in this class.771772void call_literal(address entry, RelocationHolder const& rspec);773void jmp_literal(address entry, RelocationHolder const& rspec);774775// Avoid using directly section776// Instructions in this section are actually usable by anyone without danger777// of failure but have performance issues that are addressed my enhanced778// instructions which will do the proper thing base on the particular cpu.779// We protect them because we don't trust you...780781// Don't use next inc() and dec() methods directly. INC & DEC instructions782// could cause a partial flag stall since they don't set CF flag.783// Use MacroAssembler::decrement() & MacroAssembler::increment() methods784// which call inc() & dec() or add() & sub() in accordance with785// the product flag UseIncDec value.786787void decl(Register dst);788void decl(Address dst);789void decq(Register dst);790void decq(Address dst);791792void incl(Register dst);793void incl(Address dst);794void incq(Register dst);795void incq(Address dst);796797// New cpus require use of movsd and movss to avoid partial register stall798// when loading from memory. But for old Opteron use movlpd instead of movsd.799// The selection is done in MacroAssembler::movdbl() and movflt().800801// Move Scalar Single-Precision Floating-Point Values802void movss(XMMRegister dst, Address src);803void movss(XMMRegister dst, XMMRegister src);804void movss(Address dst, XMMRegister src);805806// Move Scalar Double-Precision Floating-Point Values807void movsd(XMMRegister dst, Address src);808void movsd(XMMRegister dst, XMMRegister src);809void movsd(Address dst, XMMRegister src);810void movlpd(XMMRegister dst, Address src);811812// New cpus require use of movaps and movapd to avoid partial register stall813// when moving between registers.814void movaps(XMMRegister dst, XMMRegister src);815void movapd(XMMRegister dst, XMMRegister src);816817// End avoid using directly818819820// Instruction prefixes821void prefix(Prefix p);822823public:824825// Creation826Assembler(CodeBuffer* code) : AbstractAssembler(code) {}827828// Decoding829static address locate_operand(address inst, WhichOperand which);830static address locate_next_instruction(address inst);831832// Utilities833static bool is_polling_page_far() NOT_LP64({ return false;});834835// Generic instructions836// Does 32bit or 64bit as needed for the platform. In some sense these837// belong in macro assembler but there is no need for both varieties to exist838839void lea(Register dst, Address src);840841void mov(Register dst, Register src);842843void pusha();844void popa();845846void pushf();847void popf();848849void push(int32_t imm32);850851void push(Register src);852853void pop(Register dst);854855// These are dummies to prevent surprise implicit conversions to Register856void push(void* v);857void pop(void* v);858859// These do register sized moves/scans860void rep_mov();861void rep_stos();862void rep_stosb();863void repne_scan();864#ifdef _LP64865void repne_scanl();866#endif867868// Vanilla instructions in lexical order869870void adcl(Address dst, int32_t imm32);871void adcl(Address dst, Register src);872void adcl(Register dst, int32_t imm32);873void adcl(Register dst, Address src);874void adcl(Register dst, Register src);875876void adcq(Register dst, int32_t imm32);877void adcq(Register dst, Address src);878void adcq(Register dst, Register src);879880void addl(Address dst, int32_t imm32);881void addl(Address dst, Register src);882void addl(Register dst, int32_t imm32);883void addl(Register dst, Address src);884void addl(Register dst, Register src);885886void addq(Address dst, int32_t imm32);887void addq(Address dst, Register src);888void addq(Register dst, int32_t imm32);889void addq(Register dst, Address src);890void addq(Register dst, Register src);891892#ifdef _LP64893//Add Unsigned Integers with Carry Flag894void adcxq(Register dst, Register src);895896//Add Unsigned Integers with Overflow Flag897void adoxq(Register dst, Register src);898#endif899900void addr_nop_4();901void addr_nop_5();902void addr_nop_7();903void addr_nop_8();904905// Add Scalar Double-Precision Floating-Point Values906void addsd(XMMRegister dst, Address src);907void addsd(XMMRegister dst, XMMRegister src);908909// Add Scalar Single-Precision Floating-Point Values910void addss(XMMRegister dst, Address src);911void addss(XMMRegister dst, XMMRegister src);912913// AES instructions914void aesdec(XMMRegister dst, Address src);915void aesdec(XMMRegister dst, XMMRegister src);916void aesdeclast(XMMRegister dst, Address src);917void aesdeclast(XMMRegister dst, XMMRegister src);918void aesenc(XMMRegister dst, Address src);919void aesenc(XMMRegister dst, XMMRegister src);920void aesenclast(XMMRegister dst, Address src);921void aesenclast(XMMRegister dst, XMMRegister src);922923924void andl(Address dst, int32_t imm32);925void andl(Register dst, int32_t imm32);926void andl(Register dst, Address src);927void andl(Register dst, Register src);928929void andq(Address dst, int32_t imm32);930void andq(Register dst, int32_t imm32);931void andq(Register dst, Address src);932void andq(Register dst, Register src);933934// BMI instructions935void andnl(Register dst, Register src1, Register src2);936void andnl(Register dst, Register src1, Address src2);937void andnq(Register dst, Register src1, Register src2);938void andnq(Register dst, Register src1, Address src2);939940void blsil(Register dst, Register src);941void blsil(Register dst, Address src);942void blsiq(Register dst, Register src);943void blsiq(Register dst, Address src);944945void blsmskl(Register dst, Register src);946void blsmskl(Register dst, Address src);947void blsmskq(Register dst, Register src);948void blsmskq(Register dst, Address src);949950void blsrl(Register dst, Register src);951void blsrl(Register dst, Address src);952void blsrq(Register dst, Register src);953void blsrq(Register dst, Address src);954955void bsfl(Register dst, Register src);956void bsrl(Register dst, Register src);957958#ifdef _LP64959void bsfq(Register dst, Register src);960void bsrq(Register dst, Register src);961#endif962963void bswapl(Register reg);964965void bswapq(Register reg);966967void call(Label& L, relocInfo::relocType rtype);968void call(Register reg); // push pc; pc <- reg969void call(Address adr); // push pc; pc <- adr970971void cdql();972973void cdqq();974975void cld();976977void clflush(Address adr);978979void cmovl(Condition cc, Register dst, Register src);980void cmovl(Condition cc, Register dst, Address src);981982void cmovq(Condition cc, Register dst, Register src);983void cmovq(Condition cc, Register dst, Address src);984985986void cmpb(Address dst, int imm8);987988void cmpl(Address dst, int32_t imm32);989990void cmpl(Register dst, int32_t imm32);991void cmpl(Register dst, Register src);992void cmpl(Register dst, Address src);993994void cmpq(Address dst, int32_t imm32);995void cmpq(Address dst, Register src);996997void cmpq(Register dst, int32_t imm32);998void cmpq(Register dst, Register src);999void cmpq(Register dst, Address src);10001001// these are dummies used to catch attempting to convert NULL to Register1002void cmpl(Register dst, void* junk); // dummy1003void cmpq(Register dst, void* junk); // dummy10041005void cmpw(Address dst, int imm16);10061007void cmpxchg8 (Address adr);10081009void cmpxchgl(Register reg, Address adr);10101011void cmpxchgq(Register reg, Address adr);10121013// Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS1014void comisd(XMMRegister dst, Address src);1015void comisd(XMMRegister dst, XMMRegister src);10161017// Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS1018void comiss(XMMRegister dst, Address src);1019void comiss(XMMRegister dst, XMMRegister src);10201021// Identify processor type and features1022void cpuid();10231024// Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value1025void cvtsd2ss(XMMRegister dst, XMMRegister src);1026void cvtsd2ss(XMMRegister dst, Address src);10271028// Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value1029void cvtsi2sdl(XMMRegister dst, Register src);1030void cvtsi2sdl(XMMRegister dst, Address src);1031void cvtsi2sdq(XMMRegister dst, Register src);1032void cvtsi2sdq(XMMRegister dst, Address src);10331034// Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value1035void cvtsi2ssl(XMMRegister dst, Register src);1036void cvtsi2ssl(XMMRegister dst, Address src);1037void cvtsi2ssq(XMMRegister dst, Register src);1038void cvtsi2ssq(XMMRegister dst, Address src);10391040// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value1041void cvtdq2pd(XMMRegister dst, XMMRegister src);10421043// Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value1044void cvtdq2ps(XMMRegister dst, XMMRegister src);10451046// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value1047void cvtss2sd(XMMRegister dst, XMMRegister src);1048void cvtss2sd(XMMRegister dst, Address src);10491050// Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer1051void cvttsd2sil(Register dst, Address src);1052void cvttsd2sil(Register dst, XMMRegister src);1053void cvttsd2siq(Register dst, XMMRegister src);10541055// Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer1056void cvttss2sil(Register dst, XMMRegister src);1057void cvttss2siq(Register dst, XMMRegister src);10581059// Divide Scalar Double-Precision Floating-Point Values1060void divsd(XMMRegister dst, Address src);1061void divsd(XMMRegister dst, XMMRegister src);10621063// Divide Scalar Single-Precision Floating-Point Values1064void divss(XMMRegister dst, Address src);1065void divss(XMMRegister dst, XMMRegister src);10661067void emms();10681069void fabs();10701071void fadd(int i);10721073void fadd_d(Address src);1074void fadd_s(Address src);10751076// "Alternate" versions of x87 instructions place result down in FPU1077// stack instead of on TOS10781079void fadda(int i); // "alternate" fadd1080void faddp(int i = 1);10811082void fchs();10831084void fcom(int i);10851086void fcomp(int i = 1);1087void fcomp_d(Address src);1088void fcomp_s(Address src);10891090void fcompp();10911092void fcos();10931094void fdecstp();10951096void fdiv(int i);1097void fdiv_d(Address src);1098void fdivr_s(Address src);1099void fdiva(int i); // "alternate" fdiv1100void fdivp(int i = 1);11011102void fdivr(int i);1103void fdivr_d(Address src);1104void fdiv_s(Address src);11051106void fdivra(int i); // "alternate" reversed fdiv11071108void fdivrp(int i = 1);11091110void ffree(int i = 0);11111112void fild_d(Address adr);1113void fild_s(Address adr);11141115void fincstp();11161117void finit();11181119void fist_s (Address adr);1120void fistp_d(Address adr);1121void fistp_s(Address adr);11221123void fld1();11241125void fld_d(Address adr);1126void fld_s(Address adr);1127void fld_s(int index);1128void fld_x(Address adr); // extended-precision (80-bit) format11291130void fldcw(Address src);11311132void fldenv(Address src);11331134void fldlg2();11351136void fldln2();11371138void fldz();11391140void flog();1141void flog10();11421143void fmul(int i);11441145void fmul_d(Address src);1146void fmul_s(Address src);11471148void fmula(int i); // "alternate" fmul11491150void fmulp(int i = 1);11511152void fnsave(Address dst);11531154void fnstcw(Address src);11551156void fnstsw_ax();11571158void fprem();1159void fprem1();11601161void frstor(Address src);11621163void fsin();11641165void fsqrt();11661167void fst_d(Address adr);1168void fst_s(Address adr);11691170void fstp_d(Address adr);1171void fstp_d(int index);1172void fstp_s(Address adr);1173void fstp_x(Address adr); // extended-precision (80-bit) format11741175void fsub(int i);1176void fsub_d(Address src);1177void fsub_s(Address src);11781179void fsuba(int i); // "alternate" fsub11801181void fsubp(int i = 1);11821183void fsubr(int i);1184void fsubr_d(Address src);1185void fsubr_s(Address src);11861187void fsubra(int i); // "alternate" reversed fsub11881189void fsubrp(int i = 1);11901191void ftan();11921193void ftst();11941195void fucomi(int i = 1);1196void fucomip(int i = 1);11971198void fwait();11991200void fxch(int i = 1);12011202void fxrstor(Address src);12031204void fxsave(Address dst);12051206void fyl2x();1207void frndint();1208void f2xm1();1209void fldl2e();12101211void hlt();12121213void idivl(Register src);1214void divl(Register src); // Unsigned division12151216#ifdef _LP641217void idivq(Register src);1218#endif12191220void imull(Register dst, Register src);1221void imull(Register dst, Register src, int value);1222void imull(Register dst, Address src);12231224#ifdef _LP641225void imulq(Register dst, Register src);1226void imulq(Register dst, Register src, int value);1227void imulq(Register dst, Address src);1228#endif12291230// jcc is the generic conditional branch generator to run-1231// time routines, jcc is used for branches to labels. jcc1232// takes a branch opcode (cc) and a label (L) and generates1233// either a backward branch or a forward branch and links it1234// to the label fixup chain. Usage:1235//1236// Label L; // unbound label1237// jcc(cc, L); // forward branch to unbound label1238// bind(L); // bind label to the current pc1239// jcc(cc, L); // backward branch to bound label1240// bind(L); // illegal: a label may be bound only once1241//1242// Note: The same Label can be used for forward and backward branches1243// but it may be bound only once.12441245void jcc(Condition cc, Label& L, bool maybe_short = true);12461247// Conditional jump to a 8-bit offset to L.1248// WARNING: be very careful using this for forward jumps. If the label is1249// not bound within an 8-bit offset of this instruction, a run-time error1250// will occur.1251void jccb(Condition cc, Label& L);12521253void jmp(Address entry); // pc <- entry12541255// Label operations & relative jumps (PPUM Appendix D)1256void jmp(Label& L, bool maybe_short = true); // unconditional jump to L12571258void jmp(Register entry); // pc <- entry12591260// Unconditional 8-bit offset jump to L.1261// WARNING: be very careful using this for forward jumps. If the label is1262// not bound within an 8-bit offset of this instruction, a run-time error1263// will occur.1264void jmpb(Label& L);12651266void ldmxcsr( Address src );12671268void leal(Register dst, Address src);12691270void leaq(Register dst, Address src);12711272void lfence();12731274void lock();12751276void lzcntl(Register dst, Register src);12771278#ifdef _LP641279void lzcntq(Register dst, Register src);1280#endif12811282enum Membar_mask_bits {1283StoreStore = 1 << 3,1284LoadStore = 1 << 2,1285StoreLoad = 1 << 1,1286LoadLoad = 1 << 01287};12881289// Serializes memory and blows flags1290void membar(Membar_mask_bits order_constraint) {1291if (os::is_MP()) {1292// We only have to handle StoreLoad1293if (order_constraint & StoreLoad) {1294// All usable chips support "locked" instructions which suffice1295// as barriers, and are much faster than the alternative of1296// using cpuid instruction. We use here a locked add [esp],0.1297// This is conveniently otherwise a no-op except for blowing1298// flags.1299// Any change to this code may need to revisit other places in1300// the code where this idiom is used, in particular the1301// orderAccess code.1302lock();1303addl(Address(rsp, 0), 0);// Assert the lock# signal here1304}1305}1306}13071308void mfence();13091310// Moves13111312void mov64(Register dst, int64_t imm64);13131314void movb(Address dst, Register src);1315void movb(Address dst, int imm8);1316void movb(Register dst, Address src);13171318void movdl(XMMRegister dst, Register src);1319void movdl(Register dst, XMMRegister src);1320void movdl(XMMRegister dst, Address src);1321void movdl(Address dst, XMMRegister src);13221323// Move Double Quadword1324void movdq(XMMRegister dst, Register src);1325void movdq(Register dst, XMMRegister src);13261327// Move Aligned Double Quadword1328void movdqa(XMMRegister dst, XMMRegister src);1329void movdqa(XMMRegister dst, Address src);13301331// Move Unaligned Double Quadword1332void movdqu(Address dst, XMMRegister src);1333void movdqu(XMMRegister dst, Address src);1334void movdqu(XMMRegister dst, XMMRegister src);13351336// Move Unaligned 256bit Vector1337void vmovdqu(Address dst, XMMRegister src);1338void vmovdqu(XMMRegister dst, Address src);1339void vmovdqu(XMMRegister dst, XMMRegister src);13401341// Move lower 64bit to high 64bit in 128bit register1342void movlhps(XMMRegister dst, XMMRegister src);13431344void movl(Register dst, int32_t imm32);1345void movl(Address dst, int32_t imm32);1346void movl(Register dst, Register src);1347void movl(Register dst, Address src);1348void movl(Address dst, Register src);13491350// These dummies prevent using movl from converting a zero (like NULL) into Register1351// by giving the compiler two choices it can't resolve13521353void movl(Address dst, void* junk);1354void movl(Register dst, void* junk);13551356#ifdef _LP641357void movq(Register dst, Register src);1358void movq(Register dst, Address src);1359void movq(Address dst, Register src);1360#endif13611362void movq(Address dst, MMXRegister src );1363void movq(MMXRegister dst, Address src );13641365#ifdef _LP641366// These dummies prevent using movq from converting a zero (like NULL) into Register1367// by giving the compiler two choices it can't resolve13681369void movq(Address dst, void* dummy);1370void movq(Register dst, void* dummy);1371#endif13721373// Move Quadword1374void movq(Address dst, XMMRegister src);1375void movq(XMMRegister dst, Address src);13761377void movsbl(Register dst, Address src);1378void movsbl(Register dst, Register src);13791380#ifdef _LP641381void movsbq(Register dst, Address src);1382void movsbq(Register dst, Register src);13831384// Move signed 32bit immediate to 64bit extending sign1385void movslq(Address dst, int32_t imm64);1386void movslq(Register dst, int32_t imm64);13871388void movslq(Register dst, Address src);1389void movslq(Register dst, Register src);1390void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous1391#endif13921393void movswl(Register dst, Address src);1394void movswl(Register dst, Register src);13951396#ifdef _LP641397void movswq(Register dst, Address src);1398void movswq(Register dst, Register src);1399#endif14001401void movw(Address dst, int imm16);1402void movw(Register dst, Address src);1403void movw(Address dst, Register src);14041405void movzbl(Register dst, Address src);1406void movzbl(Register dst, Register src);14071408#ifdef _LP641409void movzbq(Register dst, Address src);1410void movzbq(Register dst, Register src);1411#endif14121413void movzwl(Register dst, Address src);1414void movzwl(Register dst, Register src);14151416#ifdef _LP641417void movzwq(Register dst, Address src);1418void movzwq(Register dst, Register src);1419#endif14201421// Unsigned multiply with RAX destination register1422void mull(Address src);1423void mull(Register src);14241425#ifdef _LP641426void mulq(Address src);1427void mulq(Register src);1428void mulxq(Register dst1, Register dst2, Register src);1429#endif14301431// Multiply Scalar Double-Precision Floating-Point Values1432void mulsd(XMMRegister dst, Address src);1433void mulsd(XMMRegister dst, XMMRegister src);14341435// Multiply Scalar Single-Precision Floating-Point Values1436void mulss(XMMRegister dst, Address src);1437void mulss(XMMRegister dst, XMMRegister src);14381439void negl(Register dst);14401441#ifdef _LP641442void negq(Register dst);1443#endif14441445void nop(int i = 1);14461447void notl(Register dst);14481449#ifdef _LP641450void notq(Register dst);1451#endif14521453void orl(Address dst, int32_t imm32);1454void orl(Register dst, int32_t imm32);1455void orl(Register dst, Address src);1456void orl(Register dst, Register src);1457void orl(Address dst, Register src);14581459void orq(Address dst, int32_t imm32);1460void orq(Register dst, int32_t imm32);1461void orq(Register dst, Address src);1462void orq(Register dst, Register src);14631464// Pack with unsigned saturation1465void packuswb(XMMRegister dst, XMMRegister src);1466void packuswb(XMMRegister dst, Address src);1467void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);14681469// Pemutation of 64bit words1470void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256);14711472void pause();14731474// SSE4.2 string instructions1475void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);1476void pcmpestri(XMMRegister xmm1, Address src, int imm8);14771478// SSE 4.1 extract1479void pextrd(Register dst, XMMRegister src, int imm8);1480void pextrq(Register dst, XMMRegister src, int imm8);14811482// SSE 4.1 insert1483void pinsrd(XMMRegister dst, Register src, int imm8);1484void pinsrq(XMMRegister dst, Register src, int imm8);14851486// SSE4.1 packed move1487void pmovzxbw(XMMRegister dst, XMMRegister src);1488void pmovzxbw(XMMRegister dst, Address src);14891490#ifndef _LP64 // no 32bit push/pop on amd641491void popl(Address dst);1492#endif14931494#ifdef _LP641495void popq(Address dst);1496#endif14971498void popcntl(Register dst, Address src);1499void popcntl(Register dst, Register src);15001501#ifdef _LP641502void popcntq(Register dst, Address src);1503void popcntq(Register dst, Register src);1504#endif15051506// Prefetches (SSE, SSE2, 3DNOW only)15071508void prefetchnta(Address src);1509void prefetchr(Address src);1510void prefetcht0(Address src);1511void prefetcht1(Address src);1512void prefetcht2(Address src);1513void prefetchw(Address src);15141515// Shuffle Bytes1516void pshufb(XMMRegister dst, XMMRegister src);1517void pshufb(XMMRegister dst, Address src);15181519// Shuffle Packed Doublewords1520void pshufd(XMMRegister dst, XMMRegister src, int mode);1521void pshufd(XMMRegister dst, Address src, int mode);15221523// Shuffle Packed Low Words1524void pshuflw(XMMRegister dst, XMMRegister src, int mode);1525void pshuflw(XMMRegister dst, Address src, int mode);15261527// Shift Right by bytes Logical DoubleQuadword Immediate1528void psrldq(XMMRegister dst, int shift);1529// Shift Left by bytes Logical DoubleQuadword Immediate1530void pslldq(XMMRegister dst, int shift);15311532// Logical Compare 128bit1533void ptest(XMMRegister dst, XMMRegister src);1534void ptest(XMMRegister dst, Address src);1535// Logical Compare 256bit1536void vptest(XMMRegister dst, XMMRegister src);1537void vptest(XMMRegister dst, Address src);15381539// Interleave Low Bytes1540void punpcklbw(XMMRegister dst, XMMRegister src);1541void punpcklbw(XMMRegister dst, Address src);15421543// Interleave Low Doublewords1544void punpckldq(XMMRegister dst, XMMRegister src);1545void punpckldq(XMMRegister dst, Address src);15461547// Interleave Low Quadwords1548void punpcklqdq(XMMRegister dst, XMMRegister src);15491550#ifndef _LP64 // no 32bit push/pop on amd641551void pushl(Address src);1552#endif15531554void pushq(Address src);15551556void rcll(Register dst, int imm8);15571558void rclq(Register dst, int imm8);15591560void rcrq(Register dst, int imm8);15611562void rdtsc();15631564void ret(int imm16);15651566#ifdef _LP641567void rorq(Register dst, int imm8);1568void rorxq(Register dst, Register src, int imm8);1569#endif15701571void sahf();15721573void sarl(Register dst, int imm8);1574void sarl(Register dst);15751576void sarq(Register dst, int imm8);1577void sarq(Register dst);15781579void sbbl(Address dst, int32_t imm32);1580void sbbl(Register dst, int32_t imm32);1581void sbbl(Register dst, Address src);1582void sbbl(Register dst, Register src);15831584void sbbq(Address dst, int32_t imm32);1585void sbbq(Register dst, int32_t imm32);1586void sbbq(Register dst, Address src);1587void sbbq(Register dst, Register src);15881589void setb(Condition cc, Register dst);15901591void shldl(Register dst, Register src);15921593void shll(Register dst, int imm8);1594void shll(Register dst);15951596void shlq(Register dst, int imm8);1597void shlq(Register dst);15981599void shrdl(Register dst, Register src);16001601void shrl(Register dst, int imm8);1602void shrl(Register dst);16031604void shrq(Register dst, int imm8);1605void shrq(Register dst);16061607void smovl(); // QQQ generic?16081609// Compute Square Root of Scalar Double-Precision Floating-Point Value1610void sqrtsd(XMMRegister dst, Address src);1611void sqrtsd(XMMRegister dst, XMMRegister src);16121613// Compute Square Root of Scalar Single-Precision Floating-Point Value1614void sqrtss(XMMRegister dst, Address src);1615void sqrtss(XMMRegister dst, XMMRegister src);16161617void std();16181619void stmxcsr( Address dst );16201621void subl(Address dst, int32_t imm32);1622void subl(Address dst, Register src);1623void subl(Register dst, int32_t imm32);1624void subl(Register dst, Address src);1625void subl(Register dst, Register src);16261627void subq(Address dst, int32_t imm32);1628void subq(Address dst, Register src);1629void subq(Register dst, int32_t imm32);1630void subq(Register dst, Address src);1631void subq(Register dst, Register src);16321633// Force generation of a 4 byte immediate value even if it fits into 8bit1634void subl_imm32(Register dst, int32_t imm32);1635void subq_imm32(Register dst, int32_t imm32);16361637// Subtract Scalar Double-Precision Floating-Point Values1638void subsd(XMMRegister dst, Address src);1639void subsd(XMMRegister dst, XMMRegister src);16401641// Subtract Scalar Single-Precision Floating-Point Values1642void subss(XMMRegister dst, Address src);1643void subss(XMMRegister dst, XMMRegister src);16441645void testb(Register dst, int imm8);1646void testb(Address dst, int imm8);16471648void testl(Register dst, int32_t imm32);1649void testl(Register dst, Register src);1650void testl(Register dst, Address src);16511652void testq(Register dst, int32_t imm32);1653void testq(Register dst, Register src);16541655// BMI - count trailing zeros1656void tzcntl(Register dst, Register src);1657void tzcntq(Register dst, Register src);16581659// Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS1660void ucomisd(XMMRegister dst, Address src);1661void ucomisd(XMMRegister dst, XMMRegister src);16621663// Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS1664void ucomiss(XMMRegister dst, Address src);1665void ucomiss(XMMRegister dst, XMMRegister src);16661667void xabort(int8_t imm8);16681669void xaddl(Address dst, Register src);16701671void xaddq(Address dst, Register src);16721673void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);16741675void xchgl(Register reg, Address adr);1676void xchgl(Register dst, Register src);16771678void xchgq(Register reg, Address adr);1679void xchgq(Register dst, Register src);16801681void xend();16821683// Get Value of Extended Control Register1684void xgetbv();16851686void xorl(Register dst, int32_t imm32);1687void xorl(Register dst, Address src);1688void xorl(Register dst, Register src);16891690void xorq(Register dst, Address src);1691void xorq(Register dst, Register src);16921693void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 016941695// AVX 3-operands scalar instructions (encoded with VEX prefix)16961697void vaddsd(XMMRegister dst, XMMRegister nds, Address src);1698void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);1699void vaddss(XMMRegister dst, XMMRegister nds, Address src);1700void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);1701void vdivsd(XMMRegister dst, XMMRegister nds, Address src);1702void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);1703void vdivss(XMMRegister dst, XMMRegister nds, Address src);1704void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);1705void vmulsd(XMMRegister dst, XMMRegister nds, Address src);1706void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);1707void vmulss(XMMRegister dst, XMMRegister nds, Address src);1708void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);1709void vsubsd(XMMRegister dst, XMMRegister nds, Address src);1710void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);1711void vsubss(XMMRegister dst, XMMRegister nds, Address src);1712void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);171317141715//====================VECTOR ARITHMETIC=====================================17161717// Add Packed Floating-Point Values1718void addpd(XMMRegister dst, XMMRegister src);1719void addps(XMMRegister dst, XMMRegister src);1720void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1721void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1722void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);1723void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);17241725// Subtract Packed Floating-Point Values1726void subpd(XMMRegister dst, XMMRegister src);1727void subps(XMMRegister dst, XMMRegister src);1728void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1729void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1730void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);1731void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);17321733// Multiply Packed Floating-Point Values1734void mulpd(XMMRegister dst, XMMRegister src);1735void mulps(XMMRegister dst, XMMRegister src);1736void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1737void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1738void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);1739void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);17401741// Divide Packed Floating-Point Values1742void divpd(XMMRegister dst, XMMRegister src);1743void divps(XMMRegister dst, XMMRegister src);1744void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1745void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1746void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);1747void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);17481749// Bitwise Logical AND of Packed Floating-Point Values1750void andpd(XMMRegister dst, XMMRegister src);1751void andps(XMMRegister dst, XMMRegister src);1752void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1753void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1754void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);1755void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);17561757// Bitwise Logical XOR of Packed Floating-Point Values1758void xorpd(XMMRegister dst, XMMRegister src);1759void xorps(XMMRegister dst, XMMRegister src);1760void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1761void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1762void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);1763void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);17641765// Add packed integers1766void paddb(XMMRegister dst, XMMRegister src);1767void paddw(XMMRegister dst, XMMRegister src);1768void paddd(XMMRegister dst, XMMRegister src);1769void paddq(XMMRegister dst, XMMRegister src);1770void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1771void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1772void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1773void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1774void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);1775void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);1776void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);1777void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);17781779// Sub packed integers1780void psubb(XMMRegister dst, XMMRegister src);1781void psubw(XMMRegister dst, XMMRegister src);1782void psubd(XMMRegister dst, XMMRegister src);1783void psubq(XMMRegister dst, XMMRegister src);1784void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1785void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1786void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1787void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1788void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);1789void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);1790void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);1791void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);17921793// Multiply packed integers (only shorts and ints)1794void pmullw(XMMRegister dst, XMMRegister src);1795void pmulld(XMMRegister dst, XMMRegister src);1796void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1797void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1798void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);1799void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);18001801// Shift left packed integers1802void psllw(XMMRegister dst, int shift);1803void pslld(XMMRegister dst, int shift);1804void psllq(XMMRegister dst, int shift);1805void psllw(XMMRegister dst, XMMRegister shift);1806void pslld(XMMRegister dst, XMMRegister shift);1807void psllq(XMMRegister dst, XMMRegister shift);1808void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);1809void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);1810void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);1811void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);1812void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);1813void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);18141815// Logical shift right packed integers1816void psrlw(XMMRegister dst, int shift);1817void psrld(XMMRegister dst, int shift);1818void psrlq(XMMRegister dst, int shift);1819void psrlw(XMMRegister dst, XMMRegister shift);1820void psrld(XMMRegister dst, XMMRegister shift);1821void psrlq(XMMRegister dst, XMMRegister shift);1822void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);1823void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);1824void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);1825void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);1826void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);1827void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);18281829// Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)1830void psraw(XMMRegister dst, int shift);1831void psrad(XMMRegister dst, int shift);1832void psraw(XMMRegister dst, XMMRegister shift);1833void psrad(XMMRegister dst, XMMRegister shift);1834void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);1835void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);1836void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);1837void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);18381839// And packed integers1840void pand(XMMRegister dst, XMMRegister src);1841void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1842void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);18431844// Or packed integers1845void por(XMMRegister dst, XMMRegister src);1846void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1847void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);18481849// Xor packed integers1850void pxor(XMMRegister dst, XMMRegister src);1851void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);1852void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);18531854// Copy low 128bit into high 128bit of YMM registers.1855void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);1856void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);18571858// Load/store high 128bit of YMM registers which does not destroy other half.1859void vinsertf128h(XMMRegister dst, Address src);1860void vinserti128h(XMMRegister dst, Address src);1861void vextractf128h(Address dst, XMMRegister src);1862void vextracti128h(Address dst, XMMRegister src);18631864// duplicate 4-bytes integer data from src into 8 locations in dest1865void vpbroadcastd(XMMRegister dst, XMMRegister src);18661867// Carry-Less Multiplication Quadword1868void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);1869void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);18701871// AVX instruction which is used to clear upper 128 bits of YMM registers and1872// to avoid transaction penalty between AVX and SSE states. There is no1873// penalty if legacy SSE instructions are encoded using VEX prefix because1874// they always clear upper 128 bits. It should be used before calling1875// runtime code and native libraries.1876void vzeroupper();18771878protected:1879// Next instructions require address alignment 16 bytes SSE mode.1880// They should be called only from corresponding MacroAssembler instructions.1881void andpd(XMMRegister dst, Address src);1882void andps(XMMRegister dst, Address src);1883void xorpd(XMMRegister dst, Address src);1884void xorps(XMMRegister dst, Address src);18851886};18871888#endif // CPU_X86_VM_ASSEMBLER_X86_HPP188918901891