Path: blob/aarch64-shenandoah-jdk8u272-b10/hotspot/src/share/vm/opto/lcm.cpp
32285 views
/*1* Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include "precompiled.hpp"25#include "memory/allocation.inline.hpp"26#include "opto/block.hpp"27#include "opto/c2compiler.hpp"28#include "opto/callnode.hpp"29#include "opto/cfgnode.hpp"30#include "opto/machnode.hpp"31#include "opto/runtime.hpp"32#if defined AD_MD_HPP33# include AD_MD_HPP34#elif defined TARGET_ARCH_MODEL_x86_3235# include "adfiles/ad_x86_32.hpp"36#elif defined TARGET_ARCH_MODEL_x86_6437# include "adfiles/ad_x86_64.hpp"38#elif defined TARGET_ARCH_MODEL_aarch3239# include "adfiles/ad_aarch32.hpp"40#elif defined TARGET_ARCH_MODEL_aarch6441# include "adfiles/ad_aarch64.hpp"42#elif defined TARGET_ARCH_MODEL_sparc43# include "adfiles/ad_sparc.hpp"44#elif defined TARGET_ARCH_MODEL_zero45# include "adfiles/ad_zero.hpp"46#elif defined TARGET_ARCH_MODEL_ppc_6447# include "adfiles/ad_ppc_64.hpp"48#endif4950// Optimization - Graph Style5152// Check whether val is not-null-decoded compressed oop,53// i.e. will grab into the base of the heap if it represents NULL.54static bool accesses_heap_base_zone(Node *val) {55if (Universe::narrow_oop_base() != NULL) { // Implies UseCompressedOops.56if (val && val->is_Mach()) {57if (val->as_Mach()->ideal_Opcode() == Op_DecodeN) {58// This assumes all Decodes with TypePtr::NotNull are matched to nodes that59// decode NULL to point to the heap base (Decode_NN).60if (val->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull) {61return true;62}63}64// Must recognize load operation with Decode matched in memory operand.65// We should not reach here exept for PPC/AIX, as os::zero_page_read_protected()66// returns true everywhere else. On PPC, no such memory operands67// exist, therefore we did not yet implement a check for such operands.68NOT_AIX(Unimplemented());69}70}71return false;72}7374static bool needs_explicit_null_check_for_read(Node *val) {75// On some OSes (AIX) the page at address 0 is only write protected.76// If so, only Store operations will trap.77if (os::zero_page_read_protected()) {78return false; // Implicit null check will work.79}80// Also a read accessing the base of a heap-based compressed heap will trap.81if (accesses_heap_base_zone(val) && // Hits the base zone page.82Universe::narrow_oop_use_implicit_null_checks()) { // Base zone page is protected.83return false;84}8586return true;87}8889//------------------------------implicit_null_check----------------------------90// Detect implicit-null-check opportunities. Basically, find NULL checks91// with suitable memory ops nearby. Use the memory op to do the NULL check.92// I can generate a memory op if there is not one nearby.93// The proj is the control projection for the not-null case.94// The val is the pointer being checked for nullness or95// decodeHeapOop_not_null node if it did not fold into address.96void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allowed_reasons) {97// Assume if null check need for 0 offset then always needed98// Intel solaris doesn't support any null checks yet and no99// mechanism exists (yet) to set the switches at an os_cpu level100if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return;101102// Make sure the ptr-is-null path appears to be uncommon!103float f = block->end()->as_MachIf()->_prob;104if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f;105if( f > PROB_UNLIKELY_MAG(4) ) return;106107uint bidx = 0; // Capture index of value into memop108bool was_store; // Memory op is a store op109110// Get the successor block for if the test ptr is non-null111Block* not_null_block; // this one goes with the proj112Block* null_block;113if (block->get_node(block->number_of_nodes()-1) == proj) {114null_block = block->_succs[0];115not_null_block = block->_succs[1];116} else {117assert(block->get_node(block->number_of_nodes()-2) == proj, "proj is one or the other");118not_null_block = block->_succs[0];119null_block = block->_succs[1];120}121while (null_block->is_Empty() == Block::empty_with_goto) {122null_block = null_block->_succs[0];123}124125// Search the exception block for an uncommon trap.126// (See Parse::do_if and Parse::do_ifnull for the reason127// we need an uncommon trap. Briefly, we need a way to128// detect failure of this optimization, as in 6366351.)129{130bool found_trap = false;131for (uint i1 = 0; i1 < null_block->number_of_nodes(); i1++) {132Node* nn = null_block->get_node(i1);133if (nn->is_MachCall() &&134nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) {135const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type();136if (trtype->isa_int() && trtype->is_int()->is_con()) {137jint tr_con = trtype->is_int()->get_con();138Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con);139Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con);140assert((int)reason < (int)BitsPerInt, "recode bit map");141if (is_set_nth_bit(allowed_reasons, (int) reason)142&& action != Deoptimization::Action_none) {143// This uncommon trap is sure to recompile, eventually.144// When that happens, C->too_many_traps will prevent145// this transformation from happening again.146found_trap = true;147}148}149break;150}151}152if (!found_trap) {153// We did not find an uncommon trap.154return;155}156}157158// Check for decodeHeapOop_not_null node which did not fold into address159bool is_decoden = ((intptr_t)val) & 1;160val = (Node*)(((intptr_t)val) & ~1);161162assert(!is_decoden || (val->in(0) == NULL) && val->is_Mach() &&163(val->as_Mach()->ideal_Opcode() == Op_DecodeN), "sanity");164165// Search the successor block for a load or store who's base value is also166// the tested value. There may be several.167Node_List *out = new Node_List(Thread::current()->resource_area());168MachNode *best = NULL; // Best found so far169for (DUIterator i = val->outs(); val->has_out(i); i++) {170Node *m = val->out(i);171if( !m->is_Mach() ) continue;172MachNode *mach = m->as_Mach();173was_store = false;174int iop = mach->ideal_Opcode();175switch( iop ) {176case Op_LoadB:177case Op_LoadUB:178case Op_LoadUS:179case Op_LoadD:180case Op_LoadF:181case Op_LoadI:182case Op_LoadL:183case Op_LoadP:184case Op_LoadN:185case Op_LoadS:186case Op_LoadKlass:187case Op_LoadNKlass:188case Op_LoadRange:189case Op_LoadD_unaligned:190case Op_LoadL_unaligned:191case Op_StoreB:192case Op_StoreC:193case Op_StoreCM:194case Op_StoreD:195case Op_StoreF:196case Op_StoreI:197case Op_StoreL:198case Op_StoreP:199case Op_StoreN:200case Op_StoreNKlass:201was_store = true; // Memory op is a store op202// Stores will have their address in slot 2 (memory in slot 1).203// If the value being nul-checked is in another slot, it means we204// are storing the checked value, which does NOT check the value!205if( mach->in(2) != val ) continue;206break; // Found a memory op?207case Op_StrComp:208case Op_StrEquals:209case Op_StrIndexOf:210case Op_AryEq:211case Op_EncodeISOArray:212// Not a legit memory op for implicit null check regardless of213// embedded loads214continue;215default: // Also check for embedded loads216if( !mach->needs_anti_dependence_check() )217continue; // Not an memory op; skip it218if( must_clone[iop] ) {219// Do not move nodes which produce flags because220// RA will try to clone it to place near branch and221// it will cause recompilation, see clone_node().222continue;223}224{225// Check that value is used in memory address in226// instructions with embedded load (CmpP val1,(val2+off)).227Node* base;228Node* index;229const MachOper* oper = mach->memory_inputs(base, index);230if (oper == NULL || oper == (MachOper*)-1) {231continue; // Not an memory op; skip it232}233if (val == base ||234val == index && val->bottom_type()->isa_narrowoop()) {235break; // Found it236} else {237continue; // Skip it238}239}240break;241}242243// On some OSes (AIX) the page at address 0 is only write protected.244// If so, only Store operations will trap.245// But a read accessing the base of a heap-based compressed heap will trap.246if (!was_store && needs_explicit_null_check_for_read(val)) {247continue;248}249250// Check that node's control edge is not-null block's head or dominates it,251// otherwise we can't hoist it because there are other control dependencies.252Node* ctrl = mach->in(0);253if (ctrl != NULL && !(ctrl == not_null_block->head() ||254get_block_for_node(ctrl)->dominates(not_null_block))) {255continue;256}257258// check if the offset is not too high for implicit exception259{260intptr_t offset = 0;261const TypePtr *adr_type = NULL; // Do not need this return value here262const Node* base = mach->get_base_and_disp(offset, adr_type);263if (base == NULL || base == NodeSentinel) {264// Narrow oop address doesn't have base, only index265if( val->bottom_type()->isa_narrowoop() &&266MacroAssembler::needs_explicit_null_check(offset) )267continue; // Give up if offset is beyond page size268// cannot reason about it; is probably not implicit null exception269} else {270const TypePtr* tptr;271if (UseCompressedOops && (Universe::narrow_oop_shift() == 0 ||272Universe::narrow_klass_shift() == 0)) {273// 32-bits narrow oop can be the base of address expressions274tptr = base->get_ptr_type();275} else {276// only regular oops are expected here277tptr = base->bottom_type()->is_ptr();278}279// Give up if offset is not a compile-time constant280if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )281continue;282offset += tptr->_offset; // correct if base is offseted283if( MacroAssembler::needs_explicit_null_check(offset) )284continue; // Give up is reference is beyond 4K page size285}286}287288// Check ctrl input to see if the null-check dominates the memory op289Block *cb = get_block_for_node(mach);290cb = cb->_idom; // Always hoist at least 1 block291if( !was_store ) { // Stores can be hoisted only one block292while( cb->_dom_depth > (block->_dom_depth + 1))293cb = cb->_idom; // Hoist loads as far as we want294// The non-null-block should dominate the memory op, too. Live295// range spilling will insert a spill in the non-null-block if it is296// needs to spill the memory op for an implicit null check.297if (cb->_dom_depth == (block->_dom_depth + 1)) {298if (cb != not_null_block) continue;299cb = cb->_idom;300}301}302if( cb != block ) continue;303304// Found a memory user; see if it can be hoisted to check-block305uint vidx = 0; // Capture index of value into memop306uint j;307for( j = mach->req()-1; j > 0; j-- ) {308if( mach->in(j) == val ) {309vidx = j;310// Ignore DecodeN val which could be hoisted to where needed.311if( is_decoden ) continue;312}313// Block of memory-op input314Block *inb = get_block_for_node(mach->in(j));315Block *b = block; // Start from nul check316while( b != inb && b->_dom_depth > inb->_dom_depth )317b = b->_idom; // search upwards for input318// See if input dominates null check319if( b != inb )320break;321}322if( j > 0 )323continue;324Block *mb = get_block_for_node(mach);325// Hoisting stores requires more checks for the anti-dependence case.326// Give up hoisting if we have to move the store past any load.327if( was_store ) {328Block *b = mb; // Start searching here for a local load329// mach use (faulting) trying to hoist330// n might be blocker to hoisting331while( b != block ) {332uint k;333for( k = 1; k < b->number_of_nodes(); k++ ) {334Node *n = b->get_node(k);335if( n->needs_anti_dependence_check() &&336n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) )337break; // Found anti-dependent load338}339if( k < b->number_of_nodes() )340break; // Found anti-dependent load341// Make sure control does not do a merge (would have to check allpaths)342if( b->num_preds() != 2 ) break;343b = get_block_for_node(b->pred(1)); // Move up to predecessor block344}345if( b != block ) continue;346}347348// Make sure this memory op is not already being used for a NullCheck349Node *e = mb->end();350if( e->is_MachNullCheck() && e->in(1) == mach )351continue; // Already being used as a NULL check352353// Found a candidate! Pick one with least dom depth - the highest354// in the dom tree should be closest to the null check.355if (best == NULL || get_block_for_node(mach)->_dom_depth < get_block_for_node(best)->_dom_depth) {356best = mach;357bidx = vidx;358}359}360// No candidate!361if (best == NULL) {362return;363}364365// ---- Found an implicit null check366extern int implicit_null_checks;367implicit_null_checks++;368369if( is_decoden ) {370// Check if we need to hoist decodeHeapOop_not_null first.371Block *valb = get_block_for_node(val);372if( block != valb && block->_dom_depth < valb->_dom_depth ) {373// Hoist it up to the end of the test block.374valb->find_remove(val);375block->add_inst(val);376map_node_to_block(val, block);377// DecodeN on x86 may kill flags. Check for flag-killing projections378// that also need to be hoisted.379for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) {380Node* n = val->fast_out(j);381if( n->is_MachProj() ) {382get_block_for_node(n)->find_remove(n);383block->add_inst(n);384map_node_to_block(n, block);385}386}387}388}389// Hoist the memory candidate up to the end of the test block.390Block *old_block = get_block_for_node(best);391old_block->find_remove(best);392block->add_inst(best);393map_node_to_block(best, block);394395// Move the control dependence if it is pinned to not-null block.396// Don't change it in other cases: NULL or dominating control.397if (best->in(0) == not_null_block->head()) {398// Set it to control edge of null check.399best->set_req(0, proj->in(0)->in(0));400}401402// Check for flag-killing projections that also need to be hoisted403// Should be DU safe because no edge updates.404for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) {405Node* n = best->fast_out(j);406if( n->is_MachProj() ) {407get_block_for_node(n)->find_remove(n);408block->add_inst(n);409map_node_to_block(n, block);410}411}412413// proj==Op_True --> ne test; proj==Op_False --> eq test.414// One of two graph shapes got matched:415// (IfTrue (If (Bool NE (CmpP ptr NULL))))416// (IfFalse (If (Bool EQ (CmpP ptr NULL))))417// NULL checks are always branch-if-eq. If we see a IfTrue projection418// then we are replacing a 'ne' test with a 'eq' NULL check test.419// We need to flip the projections to keep the same semantics.420if( proj->Opcode() == Op_IfTrue ) {421// Swap order of projections in basic block to swap branch targets422Node *tmp1 = block->get_node(block->end_idx()+1);423Node *tmp2 = block->get_node(block->end_idx()+2);424block->map_node(tmp2, block->end_idx()+1);425block->map_node(tmp1, block->end_idx()+2);426Node *tmp = new (C) Node(C->top()); // Use not NULL input427tmp1->replace_by(tmp);428tmp2->replace_by(tmp1);429tmp->replace_by(tmp2);430tmp->destruct();431}432433// Remove the existing null check; use a new implicit null check instead.434// Since schedule-local needs precise def-use info, we need to correct435// it as well.436Node *old_tst = proj->in(0);437MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx);438block->map_node(nul_chk, block->end_idx());439map_node_to_block(nul_chk, block);440// Redirect users of old_test to nul_chk441for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2)442old_tst->last_out(i2)->set_req(0, nul_chk);443// Clean-up any dead code444for (uint i3 = 0; i3 < old_tst->req(); i3++) {445Node* in = old_tst->in(i3);446old_tst->set_req(i3, NULL);447if (in->outcnt() == 0) {448// Remove dead input node449in->disconnect_inputs(NULL, C);450block->find_remove(in);451}452}453454latency_from_uses(nul_chk);455latency_from_uses(best);456457// insert anti-dependences to defs in this block458if (! best->needs_anti_dependence_check()) {459for (uint k = 1; k < block->number_of_nodes(); k++) {460Node *n = block->get_node(k);461if (n->needs_anti_dependence_check() &&462n->in(LoadNode::Memory) == best->in(StoreNode::Memory)) {463// Found anti-dependent load464insert_anti_dependences(block, n);465}466}467}468}469470471//------------------------------select-----------------------------------------472// Select a nice fellow from the worklist to schedule next. If there is only473// one choice, then use it. Projections take top priority for correctness474// reasons - if I see a projection, then it is next. There are a number of475// other special cases, for instructions that consume condition codes, et al.476// These are chosen immediately. Some instructions are required to immediately477// precede the last instruction in the block, and these are taken last. Of the478// remaining cases (most), choose the instruction with the greatest latency479// (that is, the most number of pseudo-cycles required to the end of the480// routine). If there is a tie, choose the instruction with the most inputs.481Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) {482483// If only a single entry on the stack, use it484uint cnt = worklist.size();485if (cnt == 1) {486Node *n = worklist[0];487worklist.map(0,worklist.pop());488return n;489}490491uint choice = 0; // Bigger is most important492uint latency = 0; // Bigger is scheduled first493uint score = 0; // Bigger is better494int idx = -1; // Index in worklist495int cand_cnt = 0; // Candidate count496497for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist498// Order in worklist is used to break ties.499// See caller for how this is used to delay scheduling500// of induction variable increments to after the other501// uses of the phi are scheduled.502Node *n = worklist[i]; // Get Node on worklist503504int iop = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : 0;505if( n->is_Proj() || // Projections always win506n->Opcode()== Op_Con || // So does constant 'Top'507iop == Op_CreateEx || // Create-exception must start block508iop == Op_CheckCastPP509) {510worklist.map(i,worklist.pop());511return n;512}513514// Final call in a block must be adjacent to 'catch'515Node *e = block->end();516if( e->is_Catch() && e->in(0)->in(0) == n )517continue;518519// Memory op for an implicit null check has to be at the end of the block520if( e->is_MachNullCheck() && e->in(1) == n )521continue;522523// Schedule IV increment last.524if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd &&525e->in(1)->in(1) == n && n->is_iteratively_computed())526continue;527528uint n_choice = 2;529530// See if this instruction is consumed by a branch. If so, then (as the531// branch is the last instruction in the basic block) force it to the532// end of the basic block533if ( must_clone[iop] ) {534// See if any use is a branch535bool found_machif = false;536537for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {538Node* use = n->fast_out(j);539540// The use is a conditional branch, make them adjacent541if (use->is_MachIf() && get_block_for_node(use) == block) {542found_machif = true;543break;544}545546// More than this instruction pending for successor to be ready,547// don't choose this if other opportunities are ready548if (ready_cnt.at(use->_idx) > 1)549n_choice = 1;550}551552// loop terminated, prefer not to use this instruction553if (found_machif)554continue;555}556557// See if this has a predecessor that is "must_clone", i.e. sets the558// condition code. If so, choose this first559for (uint j = 0; j < n->req() ; j++) {560Node *inn = n->in(j);561if (inn) {562if (inn->is_Mach() && must_clone[inn->as_Mach()->ideal_Opcode()] ) {563n_choice = 3;564break;565}566}567}568569// MachTemps should be scheduled last so they are near their uses570if (n->is_MachTemp()) {571n_choice = 1;572}573574uint n_latency = get_latency_for_node(n);575uint n_score = n->req(); // Many inputs get high score to break ties576577// Keep best latency found578cand_cnt++;579if (choice < n_choice ||580(choice == n_choice &&581((StressLCM && Compile::randomized_select(cand_cnt)) ||582(!StressLCM &&583(latency < n_latency ||584(latency == n_latency &&585(score < n_score))))))) {586choice = n_choice;587latency = n_latency;588score = n_score;589idx = i; // Also keep index in worklist590}591} // End of for all ready nodes in worklist592593assert(idx >= 0, "index should be set");594Node *n = worklist[(uint)idx]; // Get the winner595596worklist.map((uint)idx, worklist.pop()); // Compress worklist597return n;598}599600601//------------------------------set_next_call----------------------------------602void PhaseCFG::set_next_call(Block* block, Node* n, VectorSet& next_call) {603if( next_call.test_set(n->_idx) ) return;604for( uint i=0; i<n->len(); i++ ) {605Node *m = n->in(i);606if( !m ) continue; // must see all nodes in block that precede call607if (get_block_for_node(m) == block) {608set_next_call(block, m, next_call);609}610}611}612613//------------------------------needed_for_next_call---------------------------614// Set the flag 'next_call' for each Node that is needed for the next call to615// be scheduled. This flag lets me bias scheduling so Nodes needed for the616// next subroutine call get priority - basically it moves things NOT needed617// for the next call till after the call. This prevents me from trying to618// carry lots of stuff live across a call.619void PhaseCFG::needed_for_next_call(Block* block, Node* this_call, VectorSet& next_call) {620// Find the next control-defining Node in this block621Node* call = NULL;622for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) {623Node* m = this_call->fast_out(i);624if (get_block_for_node(m) == block && // Local-block user625m != this_call && // Not self-start node626m->is_MachCall()) {627call = m;628break;629}630}631if (call == NULL) return; // No next call (e.g., block end is near)632// Set next-call for all inputs to this call633set_next_call(block, call, next_call);634}635636//------------------------------add_call_kills-------------------------------------637// helper function that adds caller save registers to MachProjNode638static void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) {639// Fill in the kill mask for the call640for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) {641if( !regs.Member(r) ) { // Not already defined by the call642// Save-on-call register?643if ((save_policy[r] == 'C') ||644(save_policy[r] == 'A') ||645((save_policy[r] == 'E') && exclude_soe)) {646proj->_rout.Insert(r);647}648}649}650}651652653//------------------------------sched_call-------------------------------------654uint PhaseCFG::sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call) {655RegMask regs;656657// Schedule all the users of the call right now. All the users are658// projection Nodes, so they must be scheduled next to the call.659// Collect all the defined registers.660for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) {661Node* n = mcall->fast_out(i);662assert( n->is_MachProj(), "" );663int n_cnt = ready_cnt.at(n->_idx)-1;664ready_cnt.at_put(n->_idx, n_cnt);665assert( n_cnt == 0, "" );666// Schedule next to call667block->map_node(n, node_cnt++);668// Collect defined registers669regs.OR(n->out_RegMask());670// Check for scheduling the next control-definer671if( n->bottom_type() == Type::CONTROL )672// Warm up next pile of heuristic bits673needed_for_next_call(block, n, next_call);674675// Children of projections are now all ready676for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {677Node* m = n->fast_out(j); // Get user678if(get_block_for_node(m) != block) {679continue;680}681if( m->is_Phi() ) continue;682int m_cnt = ready_cnt.at(m->_idx)-1;683ready_cnt.at_put(m->_idx, m_cnt);684if( m_cnt == 0 )685worklist.push(m);686}687688}689690// Act as if the call defines the Frame Pointer.691// Certainly the FP is alive and well after the call.692regs.Insert(_matcher.c_frame_pointer());693694// Set all registers killed and not already defined by the call.695uint r_cnt = mcall->tf()->range()->cnt();696int op = mcall->ideal_Opcode();697MachProjNode *proj = new (C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );698map_node_to_block(proj, block);699block->insert_node(proj, node_cnt++);700701// Select the right register save policy.702const char *save_policy = NULL;703switch (op) {704case Op_CallRuntime:705case Op_CallLeaf:706case Op_CallLeafNoFP:707// Calling C code so use C calling convention708save_policy = _matcher._c_reg_save_policy;709break;710711case Op_CallStaticJava:712case Op_CallDynamicJava:713// Calling Java code so use Java calling convention714save_policy = _matcher._register_save_policy;715break;716717default:718ShouldNotReachHere();719}720721// When using CallRuntime mark SOE registers as killed by the call722// so values that could show up in the RegisterMap aren't live in a723// callee saved register since the register wouldn't know where to724// find them. CallLeaf and CallLeafNoFP are ok because they can't725// have debug info on them. Strictly speaking this only needs to be726// done for oops since idealreg2debugmask takes care of debug info727// references but there no way to handle oops differently than other728// pointers as far as the kill mask goes.729bool exclude_soe = op == Op_CallRuntime;730731// If the call is a MethodHandle invoke, we need to exclude the732// register which is used to save the SP value over MH invokes from733// the mask. Otherwise this register could be used for734// deoptimization information.735if (op == Op_CallStaticJava) {736MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall;737if (mcallstaticjava->_method_handle_invoke)738proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask());739}740741add_call_kills(proj, regs, save_policy, exclude_soe);742743return node_cnt;744}745746747//------------------------------schedule_local---------------------------------748// Topological sort within a block. Someday become a real scheduler.749bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) {750// Already "sorted" are the block start Node (as the first entry), and751// the block-ending Node and any trailing control projections. We leave752// these alone. PhiNodes and ParmNodes are made to follow the block start753// Node. Everything else gets topo-sorted.754755#ifndef PRODUCT756if (trace_opto_pipelining()) {757tty->print_cr("# --- schedule_local B%d, before: ---", block->_pre_order);758for (uint i = 0;i < block->number_of_nodes(); i++) {759tty->print("# ");760block->get_node(i)->fast_dump();761}762tty->print_cr("#");763}764#endif765766// RootNode is already sorted767if (block->number_of_nodes() == 1) {768return true;769}770771// Move PhiNodes and ParmNodes from 1 to cnt up to the start772uint node_cnt = block->end_idx();773uint phi_cnt = 1;774uint i;775for( i = 1; i<node_cnt; i++ ) { // Scan for Phi776Node *n = block->get_node(i);777if( n->is_Phi() || // Found a PhiNode or ParmNode778(n->is_Proj() && n->in(0) == block->head()) ) {779// Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt780block->map_node(block->get_node(phi_cnt), i);781block->map_node(n, phi_cnt++); // swap Phi/Parm up front782} else { // All others783// Count block-local inputs to 'n'784uint cnt = n->len(); // Input count785uint local = 0;786for( uint j=0; j<cnt; j++ ) {787Node *m = n->in(j);788if( m && get_block_for_node(m) == block && !m->is_top() )789local++; // One more block-local input790}791ready_cnt.at_put(n->_idx, local); // Count em up792793#ifdef ASSERT794if( UseConcMarkSweepGC || UseG1GC ) {795if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) {796// Check the precedence edges797for (uint prec = n->req(); prec < n->len(); prec++) {798Node* oop_store = n->in(prec);799if (oop_store != NULL) {800assert(get_block_for_node(oop_store)->_dom_depth <= block->_dom_depth, "oop_store must dominate card-mark");801}802}803}804}805#endif806807// A few node types require changing a required edge to a precedence edge808// before allocation.809if( n->is_Mach() && n->req() > TypeFunc::Parms &&810(n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire ||811n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) {812// MemBarAcquire could be created without Precedent edge.813// del_req() replaces the specified edge with the last input edge814// and then removes the last edge. If the specified edge > number of815// edges the last edge will be moved outside of the input edges array816// and the edge will be lost. This is why this code should be817// executed only when Precedent (== TypeFunc::Parms) edge is present.818Node *x = n->in(TypeFunc::Parms);819if (x != NULL && get_block_for_node(x) == block && n->find_prec_edge(x) != -1) {820// Old edge to node within same block will get removed, but no precedence821// edge will get added because it already exists. Update ready count.822int cnt = ready_cnt.at(n->_idx);823assert(cnt > 1, err_msg("MemBar node %d must not get ready here", n->_idx));824ready_cnt.at_put(n->_idx, cnt-1);825}826n->del_req(TypeFunc::Parms);827n->add_prec(x);828}829}830}831for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count832ready_cnt.at_put(block->get_node(i2)->_idx, 0);833834// All the prescheduled guys do not hold back internal nodes835uint i3;836for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled837Node *n = block->get_node(i3); // Get pre-scheduled838for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {839Node* m = n->fast_out(j);840if (get_block_for_node(m) == block) { // Local-block user841int m_cnt = ready_cnt.at(m->_idx)-1;842ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count843}844}845}846847Node_List delay;848// Make a worklist849Node_List worklist;850for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist851Node *m = block->get_node(i4);852if( !ready_cnt.at(m->_idx) ) { // Zero ready count?853if (m->is_iteratively_computed()) {854// Push induction variable increments last to allow other uses855// of the phi to be scheduled first. The select() method breaks856// ties in scheduling by worklist order.857delay.push(m);858} else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) {859// Force the CreateEx to the top of the list so it's processed860// first and ends up at the start of the block.861worklist.insert(0, m);862} else {863worklist.push(m); // Then on to worklist!864}865}866}867while (delay.size()) {868Node* d = delay.pop();869worklist.push(d);870}871872// Warm up the 'next_call' heuristic bits873needed_for_next_call(block, block->head(), next_call);874875#ifndef PRODUCT876if (trace_opto_pipelining()) {877for (uint j=0; j< block->number_of_nodes(); j++) {878Node *n = block->get_node(j);879int idx = n->_idx;880tty->print("# ready cnt:%3d ", ready_cnt.at(idx));881tty->print("latency:%3d ", get_latency_for_node(n));882tty->print("%4d: %s\n", idx, n->Name());883}884}885#endif886887uint max_idx = (uint)ready_cnt.length();888// Pull from worklist and schedule889while( worklist.size() ) { // Worklist is not ready890891#ifndef PRODUCT892if (trace_opto_pipelining()) {893tty->print("# ready list:");894for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist895Node *n = worklist[i]; // Get Node on worklist896tty->print(" %d", n->_idx);897}898tty->cr();899}900#endif901902// Select and pop a ready guy from worklist903Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt);904block->map_node(n, phi_cnt++); // Schedule him next905906#ifndef PRODUCT907if (trace_opto_pipelining()) {908tty->print("# select %d: %s", n->_idx, n->Name());909tty->print(", latency:%d", get_latency_for_node(n));910n->dump();911if (Verbose) {912tty->print("# ready list:");913for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist914Node *n = worklist[i]; // Get Node on worklist915tty->print(" %d", n->_idx);916}917tty->cr();918}919}920921#endif922if( n->is_MachCall() ) {923MachCallNode *mcall = n->as_MachCall();924phi_cnt = sched_call(block, phi_cnt, worklist, ready_cnt, mcall, next_call);925continue;926}927928if (n->is_Mach() && n->as_Mach()->has_call()) {929RegMask regs;930regs.Insert(_matcher.c_frame_pointer());931regs.OR(n->out_RegMask());932933MachProjNode *proj = new (C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj );934map_node_to_block(proj, block);935block->insert_node(proj, phi_cnt++);936937add_call_kills(proj, regs, _matcher._c_reg_save_policy, false);938}939940// Children are now all ready941for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) {942Node* m = n->fast_out(i5); // Get user943if (get_block_for_node(m) != block) {944continue;945}946if( m->is_Phi() ) continue;947if (m->_idx >= max_idx) { // new node, skip it948assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types");949continue;950}951int m_cnt = ready_cnt.at(m->_idx)-1;952ready_cnt.at_put(m->_idx, m_cnt);953if( m_cnt == 0 )954worklist.push(m);955}956}957958if( phi_cnt != block->end_idx() ) {959// did not schedule all. Retry, Bailout, or Die960if (C->subsume_loads() == true && !C->failing()) {961// Retry with subsume_loads == false962// If this is the first failure, the sentinel string will "stick"963// to the Compile object, and the C2Compiler will see it and retry.964C->record_failure(C2Compiler::retry_no_subsuming_loads());965} else {966assert(false, "graph should be schedulable");967}968// assert( phi_cnt == end_idx(), "did not schedule all" );969return false;970}971972#ifndef PRODUCT973if (trace_opto_pipelining()) {974tty->print_cr("#");975tty->print_cr("# after schedule_local");976for (uint i = 0;i < block->number_of_nodes();i++) {977tty->print("# ");978block->get_node(i)->fast_dump();979}980tty->cr();981}982#endif983984985return true;986}987988//--------------------------catch_cleanup_fix_all_inputs-----------------------989static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) {990for (uint l = 0; l < use->len(); l++) {991if (use->in(l) == old_def) {992if (l < use->req()) {993use->set_req(l, new_def);994} else {995use->rm_prec(l);996use->add_prec(new_def);997l--;998}999}1000}1001}10021003//------------------------------catch_cleanup_find_cloned_def------------------1004Node* PhaseCFG::catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) {1005assert( use_blk != def_blk, "Inter-block cleanup only");10061007// The use is some block below the Catch. Find and return the clone of the def1008// that dominates the use. If there is no clone in a dominating block, then1009// create a phi for the def in a dominating block.10101011// Find which successor block dominates this use. The successor1012// blocks must all be single-entry (from the Catch only; I will have1013// split blocks to make this so), hence they all dominate.1014while( use_blk->_dom_depth > def_blk->_dom_depth+1 )1015use_blk = use_blk->_idom;10161017// Find the successor1018Node *fixup = NULL;10191020uint j;1021for( j = 0; j < def_blk->_num_succs; j++ )1022if( use_blk == def_blk->_succs[j] )1023break;10241025if( j == def_blk->_num_succs ) {1026// Block at same level in dom-tree is not a successor. It needs a1027// PhiNode, the PhiNode uses from the def and IT's uses need fixup.1028Node_Array inputs = new Node_List(Thread::current()->resource_area());1029for(uint k = 1; k < use_blk->num_preds(); k++) {1030Block* block = get_block_for_node(use_blk->pred(k));1031inputs.map(k, catch_cleanup_find_cloned_def(block, def, def_blk, n_clone_idx));1032}10331034// Check to see if the use_blk already has an identical phi inserted.1035// If it exists, it will be at the first position since all uses of a1036// def are processed together.1037Node *phi = use_blk->get_node(1);1038if( phi->is_Phi() ) {1039fixup = phi;1040for (uint k = 1; k < use_blk->num_preds(); k++) {1041if (phi->in(k) != inputs[k]) {1042// Not a match1043fixup = NULL;1044break;1045}1046}1047}10481049// If an existing PhiNode was not found, make a new one.1050if (fixup == NULL) {1051Node *new_phi = PhiNode::make(use_blk->head(), def);1052use_blk->insert_node(new_phi, 1);1053map_node_to_block(new_phi, use_blk);1054for (uint k = 1; k < use_blk->num_preds(); k++) {1055new_phi->set_req(k, inputs[k]);1056}1057fixup = new_phi;1058}10591060} else {1061// Found the use just below the Catch. Make it use the clone.1062fixup = use_blk->get_node(n_clone_idx);1063}10641065return fixup;1066}10671068//--------------------------catch_cleanup_intra_block--------------------------1069// Fix all input edges in use that reference "def". The use is in the same1070// block as the def and both have been cloned in each successor block.1071static void catch_cleanup_intra_block(Node *use, Node *def, Block *blk, int beg, int n_clone_idx) {10721073// Both the use and def have been cloned. For each successor block,1074// get the clone of the use, and make its input the clone of the def1075// found in that block.10761077uint use_idx = blk->find_node(use);1078uint offset_idx = use_idx - beg;1079for( uint k = 0; k < blk->_num_succs; k++ ) {1080// Get clone in each successor block1081Block *sb = blk->_succs[k];1082Node *clone = sb->get_node(offset_idx+1);1083assert( clone->Opcode() == use->Opcode(), "" );10841085// Make use-clone reference the def-clone1086catch_cleanup_fix_all_inputs(clone, def, sb->get_node(n_clone_idx));1087}1088}10891090//------------------------------catch_cleanup_inter_block---------------------1091// Fix all input edges in use that reference "def". The use is in a different1092// block than the def.1093void PhaseCFG::catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) {1094if( !use_blk ) return; // Can happen if the use is a precedence edge10951096Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, n_clone_idx);1097catch_cleanup_fix_all_inputs(use, def, new_def);1098}10991100//------------------------------call_catch_cleanup-----------------------------1101// If we inserted any instructions between a Call and his CatchNode,1102// clone the instructions on all paths below the Catch.1103void PhaseCFG::call_catch_cleanup(Block* block) {11041105// End of region to clone1106uint end = block->end_idx();1107if( !block->get_node(end)->is_Catch() ) return;1108// Start of region to clone1109uint beg = end;1110while(!block->get_node(beg-1)->is_MachProj() ||1111!block->get_node(beg-1)->in(0)->is_MachCall() ) {1112beg--;1113assert(beg > 0,"Catch cleanup walking beyond block boundary");1114}1115// Range of inserted instructions is [beg, end)1116if( beg == end ) return;11171118// Clone along all Catch output paths. Clone area between the 'beg' and1119// 'end' indices.1120for( uint i = 0; i < block->_num_succs; i++ ) {1121Block *sb = block->_succs[i];1122// Clone the entire area; ignoring the edge fixup for now.1123for( uint j = end; j > beg; j-- ) {1124Node *clone = block->get_node(j-1)->clone();1125sb->insert_node(clone, 1);1126map_node_to_block(clone, sb);1127if (clone->needs_anti_dependence_check()) {1128insert_anti_dependences(sb, clone);1129}1130}1131}113211331134// Fixup edges. Check the def-use info per cloned Node1135for(uint i2 = beg; i2 < end; i2++ ) {1136uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block1137Node *n = block->get_node(i2); // Node that got cloned1138// Need DU safe iterator because of edge manipulation in calls.1139Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area());1140for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) {1141out->push(n->fast_out(j1));1142}1143uint max = out->size();1144for (uint j = 0; j < max; j++) {// For all users1145Node *use = out->pop();1146Block *buse = get_block_for_node(use);1147if( use->is_Phi() ) {1148for( uint k = 1; k < use->req(); k++ )1149if( use->in(k) == n ) {1150Block* b = get_block_for_node(buse->pred(k));1151Node *fixup = catch_cleanup_find_cloned_def(b, n, block, n_clone_idx);1152use->set_req(k, fixup);1153}1154} else {1155if (block == buse) {1156catch_cleanup_intra_block(use, n, block, beg, n_clone_idx);1157} else {1158catch_cleanup_inter_block(use, buse, n, block, n_clone_idx);1159}1160}1161} // End for all users11621163} // End of for all Nodes in cloned area11641165// Remove the now-dead cloned ops1166for(uint i3 = beg; i3 < end; i3++ ) {1167block->get_node(beg)->disconnect_inputs(NULL, C);1168block->remove_node(beg);1169}11701171// If the successor blocks have a CreateEx node, move it back to the top1172for(uint i4 = 0; i4 < block->_num_succs; i4++ ) {1173Block *sb = block->_succs[i4];1174uint new_cnt = end - beg;1175// Remove any newly created, but dead, nodes.1176for( uint j = new_cnt; j > 0; j-- ) {1177Node *n = sb->get_node(j);1178if (n->outcnt() == 0 &&1179(!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){1180n->disconnect_inputs(NULL, C);1181sb->remove_node(j);1182new_cnt--;1183}1184}1185// If any newly created nodes remain, move the CreateEx node to the top1186if (new_cnt > 0) {1187Node *cex = sb->get_node(1+new_cnt);1188if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {1189sb->remove_node(1+new_cnt);1190sb->insert_node(cex, 1);1191}1192}1193}1194}119511961197