Path: blob/aarch64-shenandoah-jdk8u272-b10/hotspot/src/cpu/ppc/vm/assembler_ppc.cpp
32285 views
/*1* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.2* Copyright 2012, 2014 SAP AG. All rights reserved.3* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.4*5* This code is free software; you can redistribute it and/or modify it6* under the terms of the GNU General Public License version 2 only, as7* published by the Free Software Foundation.8*9* This code is distributed in the hope that it will be useful, but WITHOUT10* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or11* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License12* version 2 for more details (a copy is included in the LICENSE file that13* accompanied this code).14*15* You should have received a copy of the GNU General Public License version16* 2 along with this work; if not, write to the Free Software Foundation,17* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.18*19* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA20* or visit www.oracle.com if you need additional information or have any21* questions.22*23*/2425#include "precompiled.hpp"26#include "asm/assembler.inline.hpp"27#include "gc_interface/collectedHeap.inline.hpp"28#include "interpreter/interpreter.hpp"29#include "memory/cardTableModRefBS.hpp"30#include "memory/resourceArea.hpp"31#include "prims/methodHandles.hpp"32#include "runtime/biasedLocking.hpp"33#include "runtime/interfaceSupport.hpp"34#include "runtime/objectMonitor.hpp"35#include "runtime/os.hpp"36#include "runtime/sharedRuntime.hpp"37#include "runtime/stubRoutines.hpp"38#include "utilities/macros.hpp"39#if INCLUDE_ALL_GCS40#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"41#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"42#include "gc_implementation/g1/heapRegion.hpp"43#endif // INCLUDE_ALL_GCS4445#ifdef PRODUCT46#define BLOCK_COMMENT(str) // nothing47#else48#define BLOCK_COMMENT(str) block_comment(str)49#endif5051int AbstractAssembler::code_fill_byte() {52return 0x00; // illegal instruction 0x0000000053}5455void Assembler::print_instruction(int inst) {56Unimplemented();57}5859// Patch instruction `inst' at offset `inst_pos' to refer to60// `dest_pos' and return the resulting instruction. We should have61// pcs, not offsets, but since all is relative, it will work out fine.62int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {63int m = 0; // mask for displacement field64int v = 0; // new value for displacement field6566switch (inv_op_ppc(inst)) {67case b_op: m = li(-1); v = li(disp(dest_pos, inst_pos)); break;68case bc_op: m = bd(-1); v = bd(disp(dest_pos, inst_pos)); break;69default: ShouldNotReachHere();70}71return inst & ~m | v;72}7374// Return the offset, relative to _code_begin, of the destination of75// the branch inst at offset pos.76int Assembler::branch_destination(int inst, int pos) {77int r = 0;78switch (inv_op_ppc(inst)) {79case b_op: r = bxx_destination_offset(inst, pos); break;80case bc_op: r = inv_bd_field(inst, pos); break;81default: ShouldNotReachHere();82}83return r;84}8586// Low-level andi-one-instruction-macro.87void Assembler::andi(Register a, Register s, const int ui16) {88assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");89if (is_power_of_2_long(((jlong) ui16)+1)) {90// pow2minus191clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));92} else if (is_power_of_2_long((jlong) ui16)) {93// pow294rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16));95} else if (is_power_of_2_long((jlong)-ui16)) {96// negpow297clrrdi(a, s, log2_long((jlong)-ui16));98} else {99andi_(a, s, ui16);100}101}102103// RegisterOrConstant version.104void Assembler::ld(Register d, RegisterOrConstant roc, Register s1) {105if (roc.is_constant()) {106if (s1 == noreg) {107int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);108Assembler::ld(d, simm16_rest, d);109} else if (is_simm(roc.as_constant(), 16)) {110Assembler::ld(d, roc.as_constant(), s1);111} else {112load_const_optimized(d, roc.as_constant());113Assembler::ldx(d, d, s1);114}115} else {116if (s1 == noreg)117Assembler::ld(d, 0, roc.as_register());118else119Assembler::ldx(d, roc.as_register(), s1);120}121}122123void Assembler::lwa(Register d, RegisterOrConstant roc, Register s1) {124if (roc.is_constant()) {125if (s1 == noreg) {126int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);127Assembler::lwa(d, simm16_rest, d);128} else if (is_simm(roc.as_constant(), 16)) {129Assembler::lwa(d, roc.as_constant(), s1);130} else {131load_const_optimized(d, roc.as_constant());132Assembler::lwax(d, d, s1);133}134} else {135if (s1 == noreg)136Assembler::lwa(d, 0, roc.as_register());137else138Assembler::lwax(d, roc.as_register(), s1);139}140}141142void Assembler::lwz(Register d, RegisterOrConstant roc, Register s1) {143if (roc.is_constant()) {144if (s1 == noreg) {145int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);146Assembler::lwz(d, simm16_rest, d);147} else if (is_simm(roc.as_constant(), 16)) {148Assembler::lwz(d, roc.as_constant(), s1);149} else {150load_const_optimized(d, roc.as_constant());151Assembler::lwzx(d, d, s1);152}153} else {154if (s1 == noreg)155Assembler::lwz(d, 0, roc.as_register());156else157Assembler::lwzx(d, roc.as_register(), s1);158}159}160161void Assembler::lha(Register d, RegisterOrConstant roc, Register s1) {162if (roc.is_constant()) {163if (s1 == noreg) {164int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);165Assembler::lha(d, simm16_rest, d);166} else if (is_simm(roc.as_constant(), 16)) {167Assembler::lha(d, roc.as_constant(), s1);168} else {169load_const_optimized(d, roc.as_constant());170Assembler::lhax(d, d, s1);171}172} else {173if (s1 == noreg)174Assembler::lha(d, 0, roc.as_register());175else176Assembler::lhax(d, roc.as_register(), s1);177}178}179180void Assembler::lhz(Register d, RegisterOrConstant roc, Register s1) {181if (roc.is_constant()) {182if (s1 == noreg) {183int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);184Assembler::lhz(d, simm16_rest, d);185} else if (is_simm(roc.as_constant(), 16)) {186Assembler::lhz(d, roc.as_constant(), s1);187} else {188load_const_optimized(d, roc.as_constant());189Assembler::lhzx(d, d, s1);190}191} else {192if (s1 == noreg)193Assembler::lhz(d, 0, roc.as_register());194else195Assembler::lhzx(d, roc.as_register(), s1);196}197}198199void Assembler::lbz(Register d, RegisterOrConstant roc, Register s1) {200if (roc.is_constant()) {201if (s1 == noreg) {202int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);203Assembler::lbz(d, simm16_rest, d);204} else if (is_simm(roc.as_constant(), 16)) {205Assembler::lbz(d, roc.as_constant(), s1);206} else {207load_const_optimized(d, roc.as_constant());208Assembler::lbzx(d, d, s1);209}210} else {211if (s1 == noreg)212Assembler::lbz(d, 0, roc.as_register());213else214Assembler::lbzx(d, roc.as_register(), s1);215}216}217218void Assembler::std(Register d, RegisterOrConstant roc, Register s1, Register tmp) {219if (roc.is_constant()) {220if (s1 == noreg) {221guarantee(tmp != noreg, "Need tmp reg to encode large constants");222int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);223Assembler::std(d, simm16_rest, tmp);224} else if (is_simm(roc.as_constant(), 16)) {225Assembler::std(d, roc.as_constant(), s1);226} else {227guarantee(tmp != noreg, "Need tmp reg to encode large constants");228load_const_optimized(tmp, roc.as_constant());229Assembler::stdx(d, tmp, s1);230}231} else {232if (s1 == noreg)233Assembler::std(d, 0, roc.as_register());234else235Assembler::stdx(d, roc.as_register(), s1);236}237}238239void Assembler::stw(Register d, RegisterOrConstant roc, Register s1, Register tmp) {240if (roc.is_constant()) {241if (s1 == noreg) {242guarantee(tmp != noreg, "Need tmp reg to encode large constants");243int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);244Assembler::stw(d, simm16_rest, tmp);245} else if (is_simm(roc.as_constant(), 16)) {246Assembler::stw(d, roc.as_constant(), s1);247} else {248guarantee(tmp != noreg, "Need tmp reg to encode large constants");249load_const_optimized(tmp, roc.as_constant());250Assembler::stwx(d, tmp, s1);251}252} else {253if (s1 == noreg)254Assembler::stw(d, 0, roc.as_register());255else256Assembler::stwx(d, roc.as_register(), s1);257}258}259260void Assembler::sth(Register d, RegisterOrConstant roc, Register s1, Register tmp) {261if (roc.is_constant()) {262if (s1 == noreg) {263guarantee(tmp != noreg, "Need tmp reg to encode large constants");264int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);265Assembler::sth(d, simm16_rest, tmp);266} else if (is_simm(roc.as_constant(), 16)) {267Assembler::sth(d, roc.as_constant(), s1);268} else {269guarantee(tmp != noreg, "Need tmp reg to encode large constants");270load_const_optimized(tmp, roc.as_constant());271Assembler::sthx(d, tmp, s1);272}273} else {274if (s1 == noreg)275Assembler::sth(d, 0, roc.as_register());276else277Assembler::sthx(d, roc.as_register(), s1);278}279}280281void Assembler::stb(Register d, RegisterOrConstant roc, Register s1, Register tmp) {282if (roc.is_constant()) {283if (s1 == noreg) {284guarantee(tmp != noreg, "Need tmp reg to encode large constants");285int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);286Assembler::stb(d, simm16_rest, tmp);287} else if (is_simm(roc.as_constant(), 16)) {288Assembler::stb(d, roc.as_constant(), s1);289} else {290guarantee(tmp != noreg, "Need tmp reg to encode large constants");291load_const_optimized(tmp, roc.as_constant());292Assembler::stbx(d, tmp, s1);293}294} else {295if (s1 == noreg)296Assembler::stb(d, 0, roc.as_register());297else298Assembler::stbx(d, roc.as_register(), s1);299}300}301302void Assembler::add(Register d, RegisterOrConstant roc, Register s1) {303if (roc.is_constant()) {304intptr_t c = roc.as_constant();305assert(is_simm(c, 16), "too big");306addi(d, s1, (int)c);307}308else add(d, roc.as_register(), s1);309}310311void Assembler::subf(Register d, RegisterOrConstant roc, Register s1) {312if (roc.is_constant()) {313intptr_t c = roc.as_constant();314assert(is_simm(-c, 16), "too big");315addi(d, s1, (int)-c);316}317else subf(d, roc.as_register(), s1);318}319320void Assembler::cmpd(ConditionRegister d, RegisterOrConstant roc, Register s1) {321if (roc.is_constant()) {322intptr_t c = roc.as_constant();323assert(is_simm(c, 16), "too big");324cmpdi(d, s1, (int)c);325}326else cmpd(d, roc.as_register(), s1);327}328329// Load a 64 bit constant. Patchable.330void Assembler::load_const(Register d, long x, Register tmp) {331// 64-bit value: x = xa xb xc xd332int xa = (x >> 48) & 0xffff;333int xb = (x >> 32) & 0xffff;334int xc = (x >> 16) & 0xffff;335int xd = (x >> 0) & 0xffff;336if (tmp == noreg) {337Assembler::lis( d, (int)(short)xa);338Assembler::ori( d, d, (unsigned int)xb);339Assembler::sldi(d, d, 32);340Assembler::oris(d, d, (unsigned int)xc);341Assembler::ori( d, d, (unsigned int)xd);342} else {343// exploit instruction level parallelism if we have a tmp register344assert_different_registers(d, tmp);345Assembler::lis(tmp, (int)(short)xa);346Assembler::lis(d, (int)(short)xc);347Assembler::ori(tmp, tmp, (unsigned int)xb);348Assembler::ori(d, d, (unsigned int)xd);349Assembler::insrdi(d, tmp, 32, 0);350}351}352353// Load a 64 bit constant, optimized, not identifyable.354// Tmp can be used to increase ILP. Set return_simm16_rest=true to get a355// 16 bit immediate offset.356int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {357// Avoid accidentally trying to use R0 for indexed addressing.358assert(d != R0, "R0 not allowed");359assert_different_registers(d, tmp);360361short xa, xb, xc, xd; // Four 16-bit chunks of const.362long rem = x; // Remaining part of const.363364xd = rem & 0xFFFF; // Lowest 16-bit chunk.365rem = (rem >> 16) + ((unsigned short)xd >> 15); // Compensation for sign extend.366367if (rem == 0) { // opt 1: simm16368li(d, xd);369return 0;370}371372xc = rem & 0xFFFF; // Next 16-bit chunk.373rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.374375if (rem == 0) { // opt 2: simm32376lis(d, xc);377} else { // High 32 bits needed.378379if (tmp != noreg) { // opt 3: We have a temp reg.380// No carry propagation between xc and higher chunks here (use logical instructions).381xa = (x >> 48) & 0xffff;382xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.383bool load_xa = (xa != 0) || (xb < 0);384bool return_xd = false;385386if (load_xa) { lis(tmp, xa); }387if (xc) { lis(d, xc); }388if (load_xa) {389if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.390} else {391li(tmp, xb); // non-negative392}393if (xc) {394if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi.395else if (xd) { addi(d, d, xd); }396} else {397li(d, xd);398}399insrdi(d, tmp, 32, 0);400return return_xd ? xd : 0; // non-negative401}402403xb = rem & 0xFFFF; // Next 16-bit chunk.404rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend.405406xa = rem & 0xFFFF; // Highest 16-bit chunk.407408// opt 4: avoid adding 0409if (xa) { // Highest 16-bit needed?410lis(d, xa);411if (xb) { addi(d, d, xb); }412} else {413li(d, xb);414}415sldi(d, d, 32);416if (xc) { addis(d, d, xc); }417}418419// opt 5: Return offset to be inserted into following instruction.420if (return_simm16_rest) return xd;421422if (xd) { addi(d, d, xd); }423return 0;424}425426#ifndef PRODUCT427// Test of ppc assembler.428void Assembler::test_asm() {429// PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions430addi( R0, R1, 10);431addis( R5, R2, 11);432addic_( R3, R31, 42);433subfic( R21, R12, 2112);434add( R3, R2, R1);435add_( R11, R22, R30);436subf( R7, R6, R5);437subf_( R8, R9, R4);438addc( R11, R12, R13);439addc_( R14, R14, R14);440subfc( R15, R16, R17);441subfc_( R18, R20, R19);442adde( R20, R22, R24);443adde_( R29, R27, R26);444subfe( R28, R1, R0);445subfe_( R21, R11, R29);446neg( R21, R22);447neg_( R13, R23);448mulli( R0, R11, -31);449mulld( R1, R18, R21);450mulld_( R2, R17, R22);451mullw( R3, R16, R23);452mullw_( R4, R15, R24);453divd( R5, R14, R25);454divd_( R6, R13, R26);455divw( R7, R12, R27);456divw_( R8, R11, R28);457458li( R3, -4711);459460// PPC 1, section 3.3.9, Fixed-Point Compare Instructions461cmpi( CCR7, 0, R27, 4711);462cmp( CCR0, 1, R14, R11);463cmpli( CCR5, 1, R17, 45);464cmpl( CCR3, 0, R9, R10);465466cmpwi( CCR7, R27, 4711);467cmpw( CCR0, R14, R11);468cmplwi( CCR5, R17, 45);469cmplw( CCR3, R9, R10);470471cmpdi( CCR7, R27, 4711);472cmpd( CCR0, R14, R11);473cmpldi( CCR5, R17, 45);474cmpld( CCR3, R9, R10);475476// PPC 1, section 3.3.11, Fixed-Point Logical Instructions477andi_( R4, R5, 0xff);478andis_( R12, R13, 0x7b51);479ori( R1, R4, 13);480oris( R3, R5, 177);481xori( R7, R6, 51);482xoris( R29, R0, 1);483andr( R17, R21, R16);484and_( R3, R5, R15);485orr( R2, R1, R9);486or_( R17, R15, R11);487xorr( R19, R18, R10);488xor_( R31, R21, R11);489nand( R5, R7, R3);490nand_( R3, R1, R0);491nor( R2, R3, R5);492nor_( R3, R6, R8);493andc( R25, R12, R11);494andc_( R24, R22, R21);495orc( R20, R10, R12);496orc_( R22, R2, R13);497498nop();499500// PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions501sld( R5, R6, R8);502sld_( R3, R5, R9);503slw( R2, R1, R10);504slw_( R6, R26, R16);505srd( R16, R24, R8);506srd_( R21, R14, R7);507srw( R22, R25, R29);508srw_( R5, R18, R17);509srad( R7, R11, R0);510srad_( R9, R13, R1);511sraw( R7, R15, R2);512sraw_( R4, R17, R3);513sldi( R3, R18, 63);514sldi_( R2, R20, 30);515slwi( R1, R21, 30);516slwi_( R7, R23, 8);517srdi( R0, R19, 2);518srdi_( R12, R24, 5);519srwi( R13, R27, 6);520srwi_( R14, R29, 7);521sradi( R15, R30, 9);522sradi_( R16, R31, 19);523srawi( R17, R31, 15);524srawi_( R18, R31, 12);525526clrrdi( R3, R30, 5);527clrldi( R9, R10, 11);528529rldicr( R19, R20, 13, 15);530rldicr_(R20, R20, 16, 14);531rldicl( R21, R21, 30, 33);532rldicl_(R22, R1, 20, 25);533rlwinm( R23, R2, 25, 10, 11);534rlwinm_(R24, R3, 12, 13, 14);535536// PPC 1, section 3.3.2 Fixed-Point Load Instructions537lwzx( R3, R5, R7);538lwz( R11, 0, R1);539lwzu( R31, -4, R11);540541lwax( R3, R5, R7);542lwa( R31, -4, R11);543lhzx( R3, R5, R7);544lhz( R31, -4, R11);545lhzu( R31, -4, R11);546547548lhax( R3, R5, R7);549lha( R31, -4, R11);550lhau( R11, 0, R1);551552lbzx( R3, R5, R7);553lbz( R31, -4, R11);554lbzu( R11, 0, R1);555556ld( R31, -4, R11);557ldx( R3, R5, R7);558ldu( R31, -4, R11);559560// PPC 1, section 3.3.3 Fixed-Point Store Instructions561stwx( R3, R5, R7);562stw( R31, -4, R11);563stwu( R11, 0, R1);564565sthx( R3, R5, R7 );566sth( R31, -4, R11);567sthu( R31, -4, R11);568569stbx( R3, R5, R7);570stb( R31, -4, R11);571stbu( R31, -4, R11);572573std( R31, -4, R11);574stdx( R3, R5, R7);575stdu( R31, -4, R11);576577// PPC 1, section 3.3.13 Move To/From System Register Instructions578mtlr( R3);579mflr( R3);580mtctr( R3);581mfctr( R3);582mtcrf( 0xff, R15);583mtcr( R15);584mtcrf( 0x03, R15);585mtcr( R15);586mfcr( R15);587588// PPC 1, section 2.4.1 Branch Instructions589Label lbl1, lbl2, lbl3;590bind(lbl1);591592b(pc());593b(pc() - 8);594b(lbl1);595b(lbl2);596b(lbl3);597598bl(pc() - 8);599bl(lbl1);600bl(lbl2);601602bcl(4, 10, pc() - 8);603bcl(4, 10, lbl1);604bcl(4, 10, lbl2);605606bclr( 4, 6, 0);607bclrl(4, 6, 0);608609bind(lbl2);610611bcctr( 4, 6, 0);612bcctrl(4, 6, 0);613614blt(CCR0, lbl2);615bgt(CCR1, lbl2);616beq(CCR2, lbl2);617bso(CCR3, lbl2);618bge(CCR4, lbl2);619ble(CCR5, lbl2);620bne(CCR6, lbl2);621bns(CCR7, lbl2);622623bltl(CCR0, lbl2);624bgtl(CCR1, lbl2);625beql(CCR2, lbl2);626bsol(CCR3, lbl2);627bgel(CCR4, lbl2);628blel(CCR5, lbl2);629bnel(CCR6, lbl2);630bnsl(CCR7, lbl2);631blr();632633sync();634icbi( R1, R2);635dcbst(R2, R3);636637// FLOATING POINT instructions ppc.638// PPC 1, section 4.6.2 Floating-Point Load Instructions639lfs( F1, -11, R3);640lfsu(F2, 123, R4);641lfsx(F3, R5, R6);642lfd( F4, 456, R7);643lfdu(F5, 789, R8);644lfdx(F6, R10, R11);645646// PPC 1, section 4.6.3 Floating-Point Store Instructions647stfs( F7, 876, R12);648stfsu( F8, 543, R13);649stfsx( F9, R14, R15);650stfd( F10, 210, R16);651stfdu( F11, 111, R17);652stfdx( F12, R18, R19);653654// PPC 1, section 4.6.4 Floating-Point Move Instructions655fmr( F13, F14);656fmr_( F14, F15);657fneg( F16, F17);658fneg_( F18, F19);659fabs( F20, F21);660fabs_( F22, F23);661fnabs( F24, F25);662fnabs_(F26, F27);663664// PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic665// Instructions666fadd( F28, F29, F30);667fadd_( F31, F0, F1);668fadds( F2, F3, F4);669fadds_(F5, F6, F7);670fsub( F8, F9, F10);671fsub_( F11, F12, F13);672fsubs( F14, F15, F16);673fsubs_(F17, F18, F19);674fmul( F20, F21, F22);675fmul_( F23, F24, F25);676fmuls( F26, F27, F28);677fmuls_(F29, F30, F31);678fdiv( F0, F1, F2);679fdiv_( F3, F4, F5);680fdivs( F6, F7, F8);681fdivs_(F9, F10, F11);682683// PPC 1, section 4.6.6 Floating-Point Rounding and Conversion684// Instructions685frsp( F12, F13);686fctid( F14, F15);687fctidz(F16, F17);688fctiw( F18, F19);689fctiwz(F20, F21);690fcfid( F22, F23);691692// PPC 1, section 4.6.7 Floating-Point Compare Instructions693fcmpu( CCR7, F24, F25);694695tty->print_cr("\ntest_asm disassembly (0x%lx 0x%lx):", p2i(code()->insts_begin()), p2i(code()->insts_end()));696code()->decode();697}698699#endif // !PRODUCT700701702