Path: blob/jdk8u272-b10-aarch32-20201026/hotspot/src/share/vm/opto/lcm.cpp
83404 views
/*1* Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include "precompiled.hpp"25#include "memory/allocation.inline.hpp"26#include "opto/block.hpp"27#include "opto/c2compiler.hpp"28#include "opto/callnode.hpp"29#include "opto/cfgnode.hpp"30#include "opto/machnode.hpp"31#include "opto/runtime.hpp"32#if defined AD_MD_HPP33# include AD_MD_HPP34#elif defined TARGET_ARCH_MODEL_x86_3235# include "adfiles/ad_x86_32.hpp"36#elif defined TARGET_ARCH_MODEL_x86_6437# include "adfiles/ad_x86_64.hpp"38#elif defined TARGET_ARCH_MODEL_aarch6439# include "adfiles/ad_aarch64.hpp"40#elif defined TARGET_ARCH_MODEL_sparc41# include "adfiles/ad_sparc.hpp"42#elif defined TARGET_ARCH_MODEL_zero43# include "adfiles/ad_zero.hpp"44#elif defined TARGET_ARCH_MODEL_ppc_6445# include "adfiles/ad_ppc_64.hpp"46#elif defined TARGET_ARCH_MODEL_aarch3247# include "adfiles/ad_aarch32.hpp"48#endif4950// Optimization - Graph Style5152// Check whether val is not-null-decoded compressed oop,53// i.e. will grab into the base of the heap if it represents NULL.54static bool accesses_heap_base_zone(Node *val) {55if (Universe::narrow_oop_base() != NULL) { // Implies UseCompressedOops.56if (val && val->is_Mach()) {57if (val->as_Mach()->ideal_Opcode() == Op_DecodeN) {58// This assumes all Decodes with TypePtr::NotNull are matched to nodes that59// decode NULL to point to the heap base (Decode_NN).60if (val->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull) {61return true;62}63}64// Must recognize load operation with Decode matched in memory operand.65// We should not reach here exept for PPC/AIX, as os::zero_page_read_protected()66// returns true everywhere else. On PPC, no such memory operands67// exist, therefore we did not yet implement a check for such operands.68NOT_AIX(Unimplemented());69}70}71return false;72}7374static bool needs_explicit_null_check_for_read(Node *val) {75// On some OSes (AIX) the page at address 0 is only write protected.76// If so, only Store operations will trap.77if (os::zero_page_read_protected()) {78return false; // Implicit null check will work.79}80// Also a read accessing the base of a heap-based compressed heap will trap.81if (accesses_heap_base_zone(val) && // Hits the base zone page.82Universe::narrow_oop_use_implicit_null_checks()) { // Base zone page is protected.83return false;84}8586return true;87}8889//------------------------------implicit_null_check----------------------------90// Detect implicit-null-check opportunities. Basically, find NULL checks91// with suitable memory ops nearby. Use the memory op to do the NULL check.92// I can generate a memory op if there is not one nearby.93// The proj is the control projection for the not-null case.94// The val is the pointer being checked for nullness or95// decodeHeapOop_not_null node if it did not fold into address.96void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allowed_reasons) {97// Assume if null check need for 0 offset then always needed98// Intel solaris doesn't support any null checks yet and no99// mechanism exists (yet) to set the switches at an os_cpu level100if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return;101102// Make sure the ptr-is-null path appears to be uncommon!103float f = block->end()->as_MachIf()->_prob;104if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f;105if( f > PROB_UNLIKELY_MAG(4) ) return;106107uint bidx = 0; // Capture index of value into memop108bool was_store; // Memory op is a store op109110// Get the successor block for if the test ptr is non-null111Block* not_null_block; // this one goes with the proj112Block* null_block;113if (block->get_node(block->number_of_nodes()-1) == proj) {114null_block = block->_succs[0];115not_null_block = block->_succs[1];116} else {117assert(block->get_node(block->number_of_nodes()-2) == proj, "proj is one or the other");118not_null_block = block->_succs[0];119null_block = block->_succs[1];120}121while (null_block->is_Empty() == Block::empty_with_goto) {122null_block = null_block->_succs[0];123}124125// Search the exception block for an uncommon trap.126// (See Parse::do_if and Parse::do_ifnull for the reason127// we need an uncommon trap. Briefly, we need a way to128// detect failure of this optimization, as in 6366351.)129{130bool found_trap = false;131for (uint i1 = 0; i1 < null_block->number_of_nodes(); i1++) {132Node* nn = null_block->get_node(i1);133if (nn->is_MachCall() &&134nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) {135const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type();136if (trtype->isa_int() && trtype->is_int()->is_con()) {137jint tr_con = trtype->is_int()->get_con();138Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con);139Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con);140assert((int)reason < (int)BitsPerInt, "recode bit map");141if (is_set_nth_bit(allowed_reasons, (int) reason)142&& action != Deoptimization::Action_none) {143// This uncommon trap is sure to recompile, eventually.144// When that happens, C->too_many_traps will prevent145// this transformation from happening again.146found_trap = true;147}148}149break;150}151}152if (!found_trap) {153// We did not find an uncommon trap.154return;155}156}157158// Check for decodeHeapOop_not_null node which did not fold into address159bool is_decoden = ((intptr_t)val) & 1;160val = (Node*)(((intptr_t)val) & ~1);161162assert(!is_decoden || (val->in(0) == NULL) && val->is_Mach() &&163(val->as_Mach()->ideal_Opcode() == Op_DecodeN), "sanity");164165// Search the successor block for a load or store who's base value is also166// the tested value. There may be several.167Node_List *out = new Node_List(Thread::current()->resource_area());168MachNode *best = NULL; // Best found so far169for (DUIterator i = val->outs(); val->has_out(i); i++) {170Node *m = val->out(i);171if( !m->is_Mach() ) continue;172MachNode *mach = m->as_Mach();173was_store = false;174int iop = mach->ideal_Opcode();175switch( iop ) {176case Op_LoadB:177case Op_LoadUB:178case Op_LoadUS:179case Op_LoadD:180case Op_LoadF:181case Op_LoadI:182case Op_LoadL:183case Op_LoadP:184case Op_LoadN:185case Op_LoadS:186case Op_LoadKlass:187case Op_LoadNKlass:188case Op_LoadRange:189case Op_LoadD_unaligned:190case Op_LoadL_unaligned:191assert(mach->in(2) == val, "should be address");192break;193case Op_StoreB:194case Op_StoreC:195case Op_StoreCM:196case Op_StoreD:197case Op_StoreF:198case Op_StoreI:199case Op_StoreL:200case Op_StoreP:201case Op_StoreN:202case Op_StoreNKlass:203was_store = true; // Memory op is a store op204// Stores will have their address in slot 2 (memory in slot 1).205// If the value being nul-checked is in another slot, it means we206// are storing the checked value, which does NOT check the value!207if( mach->in(2) != val ) continue;208break; // Found a memory op?209case Op_StrComp:210case Op_StrEquals:211case Op_StrIndexOf:212case Op_AryEq:213case Op_EncodeISOArray:214// Not a legit memory op for implicit null check regardless of215// embedded loads216continue;217default: // Also check for embedded loads218if( !mach->needs_anti_dependence_check() )219continue; // Not an memory op; skip it220if( must_clone[iop] ) {221// Do not move nodes which produce flags because222// RA will try to clone it to place near branch and223// it will cause recompilation, see clone_node().224continue;225}226{227// Check that value is used in memory address in228// instructions with embedded load (CmpP val1,(val2+off)).229Node* base;230Node* index;231const MachOper* oper = mach->memory_inputs(base, index);232if (oper == NULL || oper == (MachOper*)-1) {233continue; // Not an memory op; skip it234}235if (val == base ||236val == index && val->bottom_type()->isa_narrowoop()) {237break; // Found it238} else {239continue; // Skip it240}241}242break;243}244245// On some OSes (AIX) the page at address 0 is only write protected.246// If so, only Store operations will trap.247// But a read accessing the base of a heap-based compressed heap will trap.248if (!was_store && needs_explicit_null_check_for_read(val)) {249continue;250}251252// Check that node's control edge is not-null block's head or dominates it,253// otherwise we can't hoist it because there are other control dependencies.254Node* ctrl = mach->in(0);255if (ctrl != NULL && !(ctrl == not_null_block->head() ||256get_block_for_node(ctrl)->dominates(not_null_block))) {257continue;258}259260// check if the offset is not too high for implicit exception261{262intptr_t offset = 0;263const TypePtr *adr_type = NULL; // Do not need this return value here264const Node* base = mach->get_base_and_disp(offset, adr_type);265if (base == NULL || base == NodeSentinel) {266// Narrow oop address doesn't have base, only index267if( val->bottom_type()->isa_narrowoop() &&268MacroAssembler::needs_explicit_null_check(offset) )269continue; // Give up if offset is beyond page size270// cannot reason about it; is probably not implicit null exception271} else {272const TypePtr* tptr;273if (UseCompressedOops && (Universe::narrow_oop_shift() == 0 ||274Universe::narrow_klass_shift() == 0)) {275// 32-bits narrow oop can be the base of address expressions276tptr = base->get_ptr_type();277} else {278// only regular oops are expected here279tptr = base->bottom_type()->is_ptr();280}281// Give up if offset is not a compile-time constant282if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )283continue;284offset += tptr->_offset; // correct if base is offseted285if( MacroAssembler::needs_explicit_null_check(offset) )286continue; // Give up is reference is beyond 4K page size287}288}289290// Check ctrl input to see if the null-check dominates the memory op291Block *cb = get_block_for_node(mach);292cb = cb->_idom; // Always hoist at least 1 block293if( !was_store ) { // Stores can be hoisted only one block294while( cb->_dom_depth > (block->_dom_depth + 1))295cb = cb->_idom; // Hoist loads as far as we want296// The non-null-block should dominate the memory op, too. Live297// range spilling will insert a spill in the non-null-block if it is298// needs to spill the memory op for an implicit null check.299if (cb->_dom_depth == (block->_dom_depth + 1)) {300if (cb != not_null_block) continue;301cb = cb->_idom;302}303}304if( cb != block ) continue;305306// Found a memory user; see if it can be hoisted to check-block307uint vidx = 0; // Capture index of value into memop308uint j;309for( j = mach->req()-1; j > 0; j-- ) {310if( mach->in(j) == val ) {311vidx = j;312// Ignore DecodeN val which could be hoisted to where needed.313if( is_decoden ) continue;314}315// Block of memory-op input316Block *inb = get_block_for_node(mach->in(j));317Block *b = block; // Start from nul check318while( b != inb && b->_dom_depth > inb->_dom_depth )319b = b->_idom; // search upwards for input320// See if input dominates null check321if( b != inb )322break;323}324if( j > 0 )325continue;326Block *mb = get_block_for_node(mach);327// Hoisting stores requires more checks for the anti-dependence case.328// Give up hoisting if we have to move the store past any load.329if( was_store ) {330Block *b = mb; // Start searching here for a local load331// mach use (faulting) trying to hoist332// n might be blocker to hoisting333while( b != block ) {334uint k;335for( k = 1; k < b->number_of_nodes(); k++ ) {336Node *n = b->get_node(k);337if( n->needs_anti_dependence_check() &&338n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) )339break; // Found anti-dependent load340}341if( k < b->number_of_nodes() )342break; // Found anti-dependent load343// Make sure control does not do a merge (would have to check allpaths)344if( b->num_preds() != 2 ) break;345b = get_block_for_node(b->pred(1)); // Move up to predecessor block346}347if( b != block ) continue;348}349350// Make sure this memory op is not already being used for a NullCheck351Node *e = mb->end();352if( e->is_MachNullCheck() && e->in(1) == mach )353continue; // Already being used as a NULL check354355// Found a candidate! Pick one with least dom depth - the highest356// in the dom tree should be closest to the null check.357if (best == NULL || get_block_for_node(mach)->_dom_depth < get_block_for_node(best)->_dom_depth) {358best = mach;359bidx = vidx;360}361}362// No candidate!363if (best == NULL) {364return;365}366367// ---- Found an implicit null check368extern int implicit_null_checks;369implicit_null_checks++;370371if( is_decoden ) {372// Check if we need to hoist decodeHeapOop_not_null first.373Block *valb = get_block_for_node(val);374if( block != valb && block->_dom_depth < valb->_dom_depth ) {375// Hoist it up to the end of the test block.376valb->find_remove(val);377block->add_inst(val);378map_node_to_block(val, block);379// DecodeN on x86 may kill flags. Check for flag-killing projections380// that also need to be hoisted.381for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) {382Node* n = val->fast_out(j);383if( n->is_MachProj() ) {384get_block_for_node(n)->find_remove(n);385block->add_inst(n);386map_node_to_block(n, block);387}388}389}390}391// Hoist the memory candidate up to the end of the test block.392Block *old_block = get_block_for_node(best);393old_block->find_remove(best);394block->add_inst(best);395map_node_to_block(best, block);396397// Move the control dependence if it is pinned to not-null block.398// Don't change it in other cases: NULL or dominating control.399if (best->in(0) == not_null_block->head()) {400// Set it to control edge of null check.401best->set_req(0, proj->in(0)->in(0));402}403404// Check for flag-killing projections that also need to be hoisted405// Should be DU safe because no edge updates.406for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) {407Node* n = best->fast_out(j);408if( n->is_MachProj() ) {409get_block_for_node(n)->find_remove(n);410block->add_inst(n);411map_node_to_block(n, block);412}413}414415// proj==Op_True --> ne test; proj==Op_False --> eq test.416// One of two graph shapes got matched:417// (IfTrue (If (Bool NE (CmpP ptr NULL))))418// (IfFalse (If (Bool EQ (CmpP ptr NULL))))419// NULL checks are always branch-if-eq. If we see a IfTrue projection420// then we are replacing a 'ne' test with a 'eq' NULL check test.421// We need to flip the projections to keep the same semantics.422if( proj->Opcode() == Op_IfTrue ) {423// Swap order of projections in basic block to swap branch targets424Node *tmp1 = block->get_node(block->end_idx()+1);425Node *tmp2 = block->get_node(block->end_idx()+2);426block->map_node(tmp2, block->end_idx()+1);427block->map_node(tmp1, block->end_idx()+2);428Node *tmp = new (C) Node(C->top()); // Use not NULL input429tmp1->replace_by(tmp);430tmp2->replace_by(tmp1);431tmp->replace_by(tmp2);432tmp->destruct();433}434435// Remove the existing null check; use a new implicit null check instead.436// Since schedule-local needs precise def-use info, we need to correct437// it as well.438Node *old_tst = proj->in(0);439MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx);440block->map_node(nul_chk, block->end_idx());441map_node_to_block(nul_chk, block);442// Redirect users of old_test to nul_chk443for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2)444old_tst->last_out(i2)->set_req(0, nul_chk);445// Clean-up any dead code446for (uint i3 = 0; i3 < old_tst->req(); i3++) {447Node* in = old_tst->in(i3);448old_tst->set_req(i3, NULL);449if (in->outcnt() == 0) {450// Remove dead input node451in->disconnect_inputs(NULL, C);452block->find_remove(in);453}454}455456latency_from_uses(nul_chk);457latency_from_uses(best);458459// insert anti-dependences to defs in this block460if (! best->needs_anti_dependence_check()) {461for (uint k = 1; k < block->number_of_nodes(); k++) {462Node *n = block->get_node(k);463if (n->needs_anti_dependence_check() &&464n->in(LoadNode::Memory) == best->in(StoreNode::Memory)) {465// Found anti-dependent load466insert_anti_dependences(block, n);467}468}469}470}471472473//------------------------------select-----------------------------------------474// Select a nice fellow from the worklist to schedule next. If there is only475// one choice, then use it. Projections take top priority for correctness476// reasons - if I see a projection, then it is next. There are a number of477// other special cases, for instructions that consume condition codes, et al.478// These are chosen immediately. Some instructions are required to immediately479// precede the last instruction in the block, and these are taken last. Of the480// remaining cases (most), choose the instruction with the greatest latency481// (that is, the most number of pseudo-cycles required to the end of the482// routine). If there is a tie, choose the instruction with the most inputs.483Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) {484485// If only a single entry on the stack, use it486uint cnt = worklist.size();487if (cnt == 1) {488Node *n = worklist[0];489worklist.map(0,worklist.pop());490return n;491}492493uint choice = 0; // Bigger is most important494uint latency = 0; // Bigger is scheduled first495uint score = 0; // Bigger is better496int idx = -1; // Index in worklist497int cand_cnt = 0; // Candidate count498499for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist500// Order in worklist is used to break ties.501// See caller for how this is used to delay scheduling502// of induction variable increments to after the other503// uses of the phi are scheduled.504Node *n = worklist[i]; // Get Node on worklist505506int iop = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : 0;507if( n->is_Proj() || // Projections always win508n->Opcode()== Op_Con || // So does constant 'Top'509iop == Op_CreateEx || // Create-exception must start block510iop == Op_CheckCastPP511) {512worklist.map(i,worklist.pop());513return n;514}515516// Final call in a block must be adjacent to 'catch'517Node *e = block->end();518if( e->is_Catch() && e->in(0)->in(0) == n )519continue;520521// Memory op for an implicit null check has to be at the end of the block522if( e->is_MachNullCheck() && e->in(1) == n )523continue;524525// Schedule IV increment last.526if (e->is_Mach() && e->as_Mach()->ideal_Opcode() == Op_CountedLoopEnd &&527e->in(1)->in(1) == n && n->is_iteratively_computed())528continue;529530uint n_choice = 2;531532// See if this instruction is consumed by a branch. If so, then (as the533// branch is the last instruction in the basic block) force it to the534// end of the basic block535if ( must_clone[iop] ) {536// See if any use is a branch537bool found_machif = false;538539for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {540Node* use = n->fast_out(j);541542// The use is a conditional branch, make them adjacent543if (use->is_MachIf() && get_block_for_node(use) == block) {544found_machif = true;545break;546}547548// More than this instruction pending for successor to be ready,549// don't choose this if other opportunities are ready550if (ready_cnt.at(use->_idx) > 1)551n_choice = 1;552}553554// loop terminated, prefer not to use this instruction555if (found_machif)556continue;557}558559// See if this has a predecessor that is "must_clone", i.e. sets the560// condition code. If so, choose this first561for (uint j = 0; j < n->req() ; j++) {562Node *inn = n->in(j);563if (inn) {564if (inn->is_Mach() && must_clone[inn->as_Mach()->ideal_Opcode()] ) {565n_choice = 3;566break;567}568}569}570571// MachTemps should be scheduled last so they are near their uses572if (n->is_MachTemp()) {573n_choice = 1;574}575576uint n_latency = get_latency_for_node(n);577uint n_score = n->req(); // Many inputs get high score to break ties578579// Keep best latency found580cand_cnt++;581if (choice < n_choice ||582(choice == n_choice &&583((StressLCM && Compile::randomized_select(cand_cnt)) ||584(!StressLCM &&585(latency < n_latency ||586(latency == n_latency &&587(score < n_score))))))) {588choice = n_choice;589latency = n_latency;590score = n_score;591idx = i; // Also keep index in worklist592}593} // End of for all ready nodes in worklist594595assert(idx >= 0, "index should be set");596Node *n = worklist[(uint)idx]; // Get the winner597598worklist.map((uint)idx, worklist.pop()); // Compress worklist599return n;600}601602603//------------------------------set_next_call----------------------------------604void PhaseCFG::set_next_call(Block* block, Node* n, VectorSet& next_call) {605if( next_call.test_set(n->_idx) ) return;606for( uint i=0; i<n->len(); i++ ) {607Node *m = n->in(i);608if( !m ) continue; // must see all nodes in block that precede call609if (get_block_for_node(m) == block) {610set_next_call(block, m, next_call);611}612}613}614615//------------------------------needed_for_next_call---------------------------616// Set the flag 'next_call' for each Node that is needed for the next call to617// be scheduled. This flag lets me bias scheduling so Nodes needed for the618// next subroutine call get priority - basically it moves things NOT needed619// for the next call till after the call. This prevents me from trying to620// carry lots of stuff live across a call.621void PhaseCFG::needed_for_next_call(Block* block, Node* this_call, VectorSet& next_call) {622// Find the next control-defining Node in this block623Node* call = NULL;624for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) {625Node* m = this_call->fast_out(i);626if (get_block_for_node(m) == block && // Local-block user627m != this_call && // Not self-start node628m->is_MachCall()) {629call = m;630break;631}632}633if (call == NULL) return; // No next call (e.g., block end is near)634// Set next-call for all inputs to this call635set_next_call(block, call, next_call);636}637638//------------------------------add_call_kills-------------------------------------639// helper function that adds caller save registers to MachProjNode640static void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) {641// Fill in the kill mask for the call642for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) {643if( !regs.Member(r) ) { // Not already defined by the call644// Save-on-call register?645if ((save_policy[r] == 'C') ||646(save_policy[r] == 'A') ||647((save_policy[r] == 'E') && exclude_soe)) {648proj->_rout.Insert(r);649}650}651}652}653654655//------------------------------sched_call-------------------------------------656uint PhaseCFG::sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call) {657RegMask regs;658659// Schedule all the users of the call right now. All the users are660// projection Nodes, so they must be scheduled next to the call.661// Collect all the defined registers.662for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) {663Node* n = mcall->fast_out(i);664assert( n->is_MachProj(), "" );665int n_cnt = ready_cnt.at(n->_idx)-1;666ready_cnt.at_put(n->_idx, n_cnt);667assert( n_cnt == 0, "" );668// Schedule next to call669block->map_node(n, node_cnt++);670// Collect defined registers671regs.OR(n->out_RegMask());672// Check for scheduling the next control-definer673if( n->bottom_type() == Type::CONTROL )674// Warm up next pile of heuristic bits675needed_for_next_call(block, n, next_call);676677// Children of projections are now all ready678for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {679Node* m = n->fast_out(j); // Get user680if(get_block_for_node(m) != block) {681continue;682}683if( m->is_Phi() ) continue;684int m_cnt = ready_cnt.at(m->_idx)-1;685ready_cnt.at_put(m->_idx, m_cnt);686if( m_cnt == 0 )687worklist.push(m);688}689690}691692// Act as if the call defines the Frame Pointer.693// Certainly the FP is alive and well after the call.694regs.Insert(_matcher.c_frame_pointer());695696// Set all registers killed and not already defined by the call.697uint r_cnt = mcall->tf()->range()->cnt();698int op = mcall->ideal_Opcode();699MachProjNode *proj = new (C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );700map_node_to_block(proj, block);701block->insert_node(proj, node_cnt++);702703// Select the right register save policy.704const char *save_policy = NULL;705switch (op) {706case Op_CallRuntime:707case Op_CallLeaf:708case Op_CallLeafNoFP:709// Calling C code so use C calling convention710save_policy = _matcher._c_reg_save_policy;711break;712713case Op_CallStaticJava:714case Op_CallDynamicJava:715// Calling Java code so use Java calling convention716save_policy = _matcher._register_save_policy;717break;718719default:720ShouldNotReachHere();721}722723// When using CallRuntime mark SOE registers as killed by the call724// so values that could show up in the RegisterMap aren't live in a725// callee saved register since the register wouldn't know where to726// find them. CallLeaf and CallLeafNoFP are ok because they can't727// have debug info on them. Strictly speaking this only needs to be728// done for oops since idealreg2debugmask takes care of debug info729// references but there no way to handle oops differently than other730// pointers as far as the kill mask goes.731bool exclude_soe = op == Op_CallRuntime;732733// If the call is a MethodHandle invoke, we need to exclude the734// register which is used to save the SP value over MH invokes from735// the mask. Otherwise this register could be used for736// deoptimization information.737if (op == Op_CallStaticJava) {738MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall;739if (mcallstaticjava->_method_handle_invoke)740proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask());741}742743add_call_kills(proj, regs, save_policy, exclude_soe);744745return node_cnt;746}747748749//------------------------------schedule_local---------------------------------750// Topological sort within a block. Someday become a real scheduler.751bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) {752// Already "sorted" are the block start Node (as the first entry), and753// the block-ending Node and any trailing control projections. We leave754// these alone. PhiNodes and ParmNodes are made to follow the block start755// Node. Everything else gets topo-sorted.756757#ifndef PRODUCT758if (trace_opto_pipelining()) {759tty->print_cr("# --- schedule_local B%d, before: ---", block->_pre_order);760for (uint i = 0;i < block->number_of_nodes(); i++) {761tty->print("# ");762block->get_node(i)->fast_dump();763}764tty->print_cr("#");765}766#endif767768// RootNode is already sorted769if (block->number_of_nodes() == 1) {770return true;771}772773// Move PhiNodes and ParmNodes from 1 to cnt up to the start774uint node_cnt = block->end_idx();775uint phi_cnt = 1;776uint i;777for( i = 1; i<node_cnt; i++ ) { // Scan for Phi778Node *n = block->get_node(i);779if( n->is_Phi() || // Found a PhiNode or ParmNode780(n->is_Proj() && n->in(0) == block->head()) ) {781// Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt782block->map_node(block->get_node(phi_cnt), i);783block->map_node(n, phi_cnt++); // swap Phi/Parm up front784} else { // All others785// Count block-local inputs to 'n'786uint cnt = n->len(); // Input count787uint local = 0;788for( uint j=0; j<cnt; j++ ) {789Node *m = n->in(j);790if( m && get_block_for_node(m) == block && !m->is_top() )791local++; // One more block-local input792}793ready_cnt.at_put(n->_idx, local); // Count em up794795#ifdef ASSERT796if( UseConcMarkSweepGC || UseG1GC ) {797if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) {798// Check the precedence edges799for (uint prec = n->req(); prec < n->len(); prec++) {800Node* oop_store = n->in(prec);801if (oop_store != NULL) {802assert(get_block_for_node(oop_store)->_dom_depth <= block->_dom_depth, "oop_store must dominate card-mark");803}804}805}806}807#endif808809// A few node types require changing a required edge to a precedence edge810// before allocation.811if( n->is_Mach() && n->req() > TypeFunc::Parms &&812(n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire ||813n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) {814// MemBarAcquire could be created without Precedent edge.815// del_req() replaces the specified edge with the last input edge816// and then removes the last edge. If the specified edge > number of817// edges the last edge will be moved outside of the input edges array818// and the edge will be lost. This is why this code should be819// executed only when Precedent (== TypeFunc::Parms) edge is present.820Node *x = n->in(TypeFunc::Parms);821if (x != NULL && get_block_for_node(x) == block && n->find_prec_edge(x) != -1) {822// Old edge to node within same block will get removed, but no precedence823// edge will get added because it already exists. Update ready count.824int cnt = ready_cnt.at(n->_idx);825assert(cnt > 1, err_msg("MemBar node %d must not get ready here", n->_idx));826ready_cnt.at_put(n->_idx, cnt-1);827}828n->del_req(TypeFunc::Parms);829n->add_prec(x);830}831}832}833for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count834ready_cnt.at_put(block->get_node(i2)->_idx, 0);835836// All the prescheduled guys do not hold back internal nodes837uint i3;838for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled839Node *n = block->get_node(i3); // Get pre-scheduled840for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {841Node* m = n->fast_out(j);842if (get_block_for_node(m) == block) { // Local-block user843int m_cnt = ready_cnt.at(m->_idx)-1;844ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count845}846}847}848849Node_List delay;850// Make a worklist851Node_List worklist;852for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist853Node *m = block->get_node(i4);854if( !ready_cnt.at(m->_idx) ) { // Zero ready count?855if (m->is_iteratively_computed()) {856// Push induction variable increments last to allow other uses857// of the phi to be scheduled first. The select() method breaks858// ties in scheduling by worklist order.859delay.push(m);860} else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) {861// Force the CreateEx to the top of the list so it's processed862// first and ends up at the start of the block.863worklist.insert(0, m);864} else {865worklist.push(m); // Then on to worklist!866}867}868}869while (delay.size()) {870Node* d = delay.pop();871worklist.push(d);872}873874// Warm up the 'next_call' heuristic bits875needed_for_next_call(block, block->head(), next_call);876877#ifndef PRODUCT878if (trace_opto_pipelining()) {879for (uint j=0; j< block->number_of_nodes(); j++) {880Node *n = block->get_node(j);881int idx = n->_idx;882tty->print("# ready cnt:%3d ", ready_cnt.at(idx));883tty->print("latency:%3d ", get_latency_for_node(n));884tty->print("%4d: %s\n", idx, n->Name());885}886}887#endif888889uint max_idx = (uint)ready_cnt.length();890// Pull from worklist and schedule891while( worklist.size() ) { // Worklist is not ready892893#ifndef PRODUCT894if (trace_opto_pipelining()) {895tty->print("# ready list:");896for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist897Node *n = worklist[i]; // Get Node on worklist898tty->print(" %d", n->_idx);899}900tty->cr();901}902#endif903904// Select and pop a ready guy from worklist905Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt);906block->map_node(n, phi_cnt++); // Schedule him next907908#ifndef PRODUCT909if (trace_opto_pipelining()) {910tty->print("# select %d: %s", n->_idx, n->Name());911tty->print(", latency:%d", get_latency_for_node(n));912n->dump();913if (Verbose) {914tty->print("# ready list:");915for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist916Node *n = worklist[i]; // Get Node on worklist917tty->print(" %d", n->_idx);918}919tty->cr();920}921}922923#endif924if( n->is_MachCall() ) {925MachCallNode *mcall = n->as_MachCall();926phi_cnt = sched_call(block, phi_cnt, worklist, ready_cnt, mcall, next_call);927continue;928}929930if (n->is_Mach() && n->as_Mach()->has_call()) {931RegMask regs;932regs.Insert(_matcher.c_frame_pointer());933regs.OR(n->out_RegMask());934935MachProjNode *proj = new (C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj );936map_node_to_block(proj, block);937block->insert_node(proj, phi_cnt++);938939add_call_kills(proj, regs, _matcher._c_reg_save_policy, false);940}941942// Children are now all ready943for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) {944Node* m = n->fast_out(i5); // Get user945if (get_block_for_node(m) != block) {946continue;947}948if( m->is_Phi() ) continue;949if (m->_idx >= max_idx) { // new node, skip it950assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types");951continue;952}953int m_cnt = ready_cnt.at(m->_idx)-1;954ready_cnt.at_put(m->_idx, m_cnt);955if( m_cnt == 0 )956worklist.push(m);957}958}959960if( phi_cnt != block->end_idx() ) {961// did not schedule all. Retry, Bailout, or Die962if (C->subsume_loads() == true && !C->failing()) {963// Retry with subsume_loads == false964// If this is the first failure, the sentinel string will "stick"965// to the Compile object, and the C2Compiler will see it and retry.966C->record_failure(C2Compiler::retry_no_subsuming_loads());967} else {968assert(false, "graph should be schedulable");969}970// assert( phi_cnt == end_idx(), "did not schedule all" );971return false;972}973974#ifndef PRODUCT975if (trace_opto_pipelining()) {976tty->print_cr("#");977tty->print_cr("# after schedule_local");978for (uint i = 0;i < block->number_of_nodes();i++) {979tty->print("# ");980block->get_node(i)->fast_dump();981}982tty->cr();983}984#endif985986987return true;988}989990//--------------------------catch_cleanup_fix_all_inputs-----------------------991static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) {992for (uint l = 0; l < use->len(); l++) {993if (use->in(l) == old_def) {994if (l < use->req()) {995use->set_req(l, new_def);996} else {997use->rm_prec(l);998use->add_prec(new_def);999l--;1000}1001}1002}1003}10041005//------------------------------catch_cleanup_find_cloned_def------------------1006Node* PhaseCFG::catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) {1007assert( use_blk != def_blk, "Inter-block cleanup only");10081009// The use is some block below the Catch. Find and return the clone of the def1010// that dominates the use. If there is no clone in a dominating block, then1011// create a phi for the def in a dominating block.10121013// Find which successor block dominates this use. The successor1014// blocks must all be single-entry (from the Catch only; I will have1015// split blocks to make this so), hence they all dominate.1016while( use_blk->_dom_depth > def_blk->_dom_depth+1 )1017use_blk = use_blk->_idom;10181019// Find the successor1020Node *fixup = NULL;10211022uint j;1023for( j = 0; j < def_blk->_num_succs; j++ )1024if( use_blk == def_blk->_succs[j] )1025break;10261027if( j == def_blk->_num_succs ) {1028// Block at same level in dom-tree is not a successor. It needs a1029// PhiNode, the PhiNode uses from the def and IT's uses need fixup.1030Node_Array inputs = new Node_List(Thread::current()->resource_area());1031for(uint k = 1; k < use_blk->num_preds(); k++) {1032Block* block = get_block_for_node(use_blk->pred(k));1033inputs.map(k, catch_cleanup_find_cloned_def(block, def, def_blk, n_clone_idx));1034}10351036// Check to see if the use_blk already has an identical phi inserted.1037// If it exists, it will be at the first position since all uses of a1038// def are processed together.1039Node *phi = use_blk->get_node(1);1040if( phi->is_Phi() ) {1041fixup = phi;1042for (uint k = 1; k < use_blk->num_preds(); k++) {1043if (phi->in(k) != inputs[k]) {1044// Not a match1045fixup = NULL;1046break;1047}1048}1049}10501051// If an existing PhiNode was not found, make a new one.1052if (fixup == NULL) {1053Node *new_phi = PhiNode::make(use_blk->head(), def);1054use_blk->insert_node(new_phi, 1);1055map_node_to_block(new_phi, use_blk);1056for (uint k = 1; k < use_blk->num_preds(); k++) {1057new_phi->set_req(k, inputs[k]);1058}1059fixup = new_phi;1060}10611062} else {1063// Found the use just below the Catch. Make it use the clone.1064fixup = use_blk->get_node(n_clone_idx);1065}10661067return fixup;1068}10691070//--------------------------catch_cleanup_intra_block--------------------------1071// Fix all input edges in use that reference "def". The use is in the same1072// block as the def and both have been cloned in each successor block.1073static void catch_cleanup_intra_block(Node *use, Node *def, Block *blk, int beg, int n_clone_idx) {10741075// Both the use and def have been cloned. For each successor block,1076// get the clone of the use, and make its input the clone of the def1077// found in that block.10781079uint use_idx = blk->find_node(use);1080uint offset_idx = use_idx - beg;1081for( uint k = 0; k < blk->_num_succs; k++ ) {1082// Get clone in each successor block1083Block *sb = blk->_succs[k];1084Node *clone = sb->get_node(offset_idx+1);1085assert( clone->Opcode() == use->Opcode(), "" );10861087// Make use-clone reference the def-clone1088catch_cleanup_fix_all_inputs(clone, def, sb->get_node(n_clone_idx));1089}1090}10911092//------------------------------catch_cleanup_inter_block---------------------1093// Fix all input edges in use that reference "def". The use is in a different1094// block than the def.1095void PhaseCFG::catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, int n_clone_idx) {1096if( !use_blk ) return; // Can happen if the use is a precedence edge10971098Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, n_clone_idx);1099catch_cleanup_fix_all_inputs(use, def, new_def);1100}11011102//------------------------------call_catch_cleanup-----------------------------1103// If we inserted any instructions between a Call and his CatchNode,1104// clone the instructions on all paths below the Catch.1105void PhaseCFG::call_catch_cleanup(Block* block) {11061107// End of region to clone1108uint end = block->end_idx();1109if( !block->get_node(end)->is_Catch() ) return;1110// Start of region to clone1111uint beg = end;1112while(!block->get_node(beg-1)->is_MachProj() ||1113!block->get_node(beg-1)->in(0)->is_MachCall() ) {1114beg--;1115assert(beg > 0,"Catch cleanup walking beyond block boundary");1116}1117// Range of inserted instructions is [beg, end)1118if( beg == end ) return;11191120// Clone along all Catch output paths. Clone area between the 'beg' and1121// 'end' indices.1122for( uint i = 0; i < block->_num_succs; i++ ) {1123Block *sb = block->_succs[i];1124// Clone the entire area; ignoring the edge fixup for now.1125for( uint j = end; j > beg; j-- ) {1126Node *clone = block->get_node(j-1)->clone();1127sb->insert_node(clone, 1);1128map_node_to_block(clone, sb);1129if (clone->needs_anti_dependence_check()) {1130insert_anti_dependences(sb, clone);1131}1132}1133}113411351136// Fixup edges. Check the def-use info per cloned Node1137for(uint i2 = beg; i2 < end; i2++ ) {1138uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block1139Node *n = block->get_node(i2); // Node that got cloned1140// Need DU safe iterator because of edge manipulation in calls.1141Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area());1142for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) {1143out->push(n->fast_out(j1));1144}1145uint max = out->size();1146for (uint j = 0; j < max; j++) {// For all users1147Node *use = out->pop();1148Block *buse = get_block_for_node(use);1149if( use->is_Phi() ) {1150for( uint k = 1; k < use->req(); k++ )1151if( use->in(k) == n ) {1152Block* b = get_block_for_node(buse->pred(k));1153Node *fixup = catch_cleanup_find_cloned_def(b, n, block, n_clone_idx);1154use->set_req(k, fixup);1155}1156} else {1157if (block == buse) {1158catch_cleanup_intra_block(use, n, block, beg, n_clone_idx);1159} else {1160catch_cleanup_inter_block(use, buse, n, block, n_clone_idx);1161}1162}1163} // End for all users11641165} // End of for all Nodes in cloned area11661167// Remove the now-dead cloned ops1168for(uint i3 = beg; i3 < end; i3++ ) {1169block->get_node(beg)->disconnect_inputs(NULL, C);1170block->remove_node(beg);1171}11721173// If the successor blocks have a CreateEx node, move it back to the top1174for(uint i4 = 0; i4 < block->_num_succs; i4++ ) {1175Block *sb = block->_succs[i4];1176uint new_cnt = end - beg;1177// Remove any newly created, but dead, nodes.1178for( uint j = new_cnt; j > 0; j-- ) {1179Node *n = sb->get_node(j);1180if (n->outcnt() == 0 &&1181(!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){1182n->disconnect_inputs(NULL, C);1183sb->remove_node(j);1184new_cnt--;1185}1186}1187// If any newly created nodes remain, move the CreateEx node to the top1188if (new_cnt > 0) {1189Node *cex = sb->get_node(1+new_cnt);1190if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {1191sb->remove_node(1+new_cnt);1192sb->insert_node(cex, 1);1193}1194}1195}1196}119711981199