Path: blob/aarch64-shenandoah-jdk8u272-b10/hotspot/src/share/vm/opto/block.cpp
32285 views
/*1* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include "precompiled.hpp"25#include "libadt/vectset.hpp"26#include "memory/allocation.inline.hpp"27#include "opto/block.hpp"28#include "opto/cfgnode.hpp"29#include "opto/chaitin.hpp"30#include "opto/loopnode.hpp"31#include "opto/machnode.hpp"32#include "opto/matcher.hpp"33#include "opto/opcodes.hpp"34#include "opto/rootnode.hpp"35#include "utilities/copy.hpp"3637void Block_Array::grow( uint i ) {38assert(i >= Max(), "must be an overflow");39debug_only(_limit = i+1);40if( i < _size ) return;41if( !_size ) {42_size = 1;43_blocks = (Block**)_arena->Amalloc( _size * sizeof(Block*) );44_blocks[0] = NULL;45}46uint old = _size;47while( i >= _size ) _size <<= 1; // Double to fit48_blocks = (Block**)_arena->Arealloc( _blocks, old*sizeof(Block*),_size*sizeof(Block*));49Copy::zero_to_bytes( &_blocks[old], (_size-old)*sizeof(Block*) );50}5152void Block_List::remove(uint i) {53assert(i < _cnt, "index out of bounds");54Copy::conjoint_words_to_lower((HeapWord*)&_blocks[i+1], (HeapWord*)&_blocks[i], ((_cnt-i-1)*sizeof(Block*)));55pop(); // shrink list by one block56}5758void Block_List::insert(uint i, Block *b) {59push(b); // grow list by one block60Copy::conjoint_words_to_higher((HeapWord*)&_blocks[i], (HeapWord*)&_blocks[i+1], ((_cnt-i-1)*sizeof(Block*)));61_blocks[i] = b;62}6364#ifndef PRODUCT65void Block_List::print() {66for (uint i=0; i < size(); i++) {67tty->print("B%d ", _blocks[i]->_pre_order);68}69tty->print("size = %d\n", size());70}71#endif7273uint Block::code_alignment() {74// Check for Root block75if (_pre_order == 0) return CodeEntryAlignment;76// Check for Start block77if (_pre_order == 1) return InteriorEntryAlignment;78// Check for loop alignment79if (has_loop_alignment()) return loop_alignment();8081return relocInfo::addr_unit(); // no particular alignment82}8384uint Block::compute_loop_alignment() {85Node *h = head();86int unit_sz = relocInfo::addr_unit();87if (h->is_Loop() && h->as_Loop()->is_inner_loop()) {88// Pre- and post-loops have low trip count so do not bother with89// NOPs for align loop head. The constants are hidden from tuning90// but only because my "divide by 4" heuristic surely gets nearly91// all possible gain (a "do not align at all" heuristic has a92// chance of getting a really tiny gain).93if (h->is_CountedLoop() && (h->as_CountedLoop()->is_pre_loop() ||94h->as_CountedLoop()->is_post_loop())) {95return (OptoLoopAlignment > 4*unit_sz) ? (OptoLoopAlignment>>2) : unit_sz;96}97// Loops with low backedge frequency should not be aligned.98Node *n = h->in(LoopNode::LoopBackControl)->in(0);99if (n->is_MachIf() && n->as_MachIf()->_prob < 0.01) {100return unit_sz; // Loop does not loop, more often than not!101}102return OptoLoopAlignment; // Otherwise align loop head103}104105return unit_sz; // no particular alignment106}107108// Compute the size of first 'inst_cnt' instructions in this block.109// Return the number of instructions left to compute if the block has110// less then 'inst_cnt' instructions. Stop, and return 0 if sum_size111// exceeds OptoLoopAlignment.112uint Block::compute_first_inst_size(uint& sum_size, uint inst_cnt,113PhaseRegAlloc* ra) {114uint last_inst = number_of_nodes();115for( uint j = 0; j < last_inst && inst_cnt > 0; j++ ) {116uint inst_size = get_node(j)->size(ra);117if( inst_size > 0 ) {118inst_cnt--;119uint sz = sum_size + inst_size;120if( sz <= (uint)OptoLoopAlignment ) {121// Compute size of instructions which fit into fetch buffer only122// since all inst_cnt instructions will not fit even if we align them.123sum_size = sz;124} else {125return 0;126}127}128}129return inst_cnt;130}131132uint Block::find_node( const Node *n ) const {133for( uint i = 0; i < number_of_nodes(); i++ ) {134if( get_node(i) == n )135return i;136}137ShouldNotReachHere();138return 0;139}140141// Find and remove n from block list142void Block::find_remove( const Node *n ) {143remove_node(find_node(n));144}145146bool Block::contains(const Node *n) const {147return _nodes.contains(n);148}149150// Return empty status of a block. Empty blocks contain only the head, other151// ideal nodes, and an optional trailing goto.152int Block::is_Empty() const {153154// Root or start block is not considered empty155if (head()->is_Root() || head()->is_Start()) {156return not_empty;157}158159int success_result = completely_empty;160int end_idx = number_of_nodes() - 1;161162// Check for ending goto163if ((end_idx > 0) && (get_node(end_idx)->is_MachGoto())) {164success_result = empty_with_goto;165end_idx--;166}167168// Unreachable blocks are considered empty169if (num_preds() <= 1) {170return success_result;171}172173// Ideal nodes are allowable in empty blocks: skip them Only MachNodes174// turn directly into code, because only MachNodes have non-trivial175// emit() functions.176while ((end_idx > 0) && !get_node(end_idx)->is_Mach()) {177end_idx--;178}179180// No room for any interesting instructions?181if (end_idx == 0) {182return success_result;183}184185return not_empty;186}187188// Return true if the block's code implies that it is likely to be189// executed infrequently. Check to see if the block ends in a Halt or190// a low probability call.191bool Block::has_uncommon_code() const {192Node* en = end();193194if (en->is_MachGoto())195en = en->in(0);196if (en->is_Catch())197en = en->in(0);198if (en->is_MachProj() && en->in(0)->is_MachCall()) {199MachCallNode* call = en->in(0)->as_MachCall();200if (call->cnt() != COUNT_UNKNOWN && call->cnt() <= PROB_UNLIKELY_MAG(4)) {201// This is true for slow-path stubs like new_{instance,array},202// slow_arraycopy, complete_monitor_locking, uncommon_trap.203// The magic number corresponds to the probability of an uncommon_trap,204// even though it is a count not a probability.205return true;206}207}208209int op = en->is_Mach() ? en->as_Mach()->ideal_Opcode() : en->Opcode();210return op == Op_Halt;211}212213// True if block is low enough frequency or guarded by a test which214// mostly does not go here.215bool PhaseCFG::is_uncommon(const Block* block) {216// Initial blocks must never be moved, so are never uncommon.217if (block->head()->is_Root() || block->head()->is_Start()) return false;218219// Check for way-low freq220if(block->_freq < BLOCK_FREQUENCY(0.00001f) ) return true;221222// Look for code shape indicating uncommon_trap or slow path223if (block->has_uncommon_code()) return true;224225const float epsilon = 0.05f;226const float guard_factor = PROB_UNLIKELY_MAG(4) / (1.f - epsilon);227uint uncommon_preds = 0;228uint freq_preds = 0;229uint uncommon_for_freq_preds = 0;230231for( uint i=1; i< block->num_preds(); i++ ) {232Block* guard = get_block_for_node(block->pred(i));233// Check to see if this block follows its guard 1 time out of 10000234// or less.235//236// See list of magnitude-4 unlikely probabilities in cfgnode.hpp which237// we intend to be "uncommon", such as slow-path TLE allocation,238// predicted call failure, and uncommon trap triggers.239//240// Use an epsilon value of 5% to allow for variability in frequency241// predictions and floating point calculations. The net effect is242// that guard_factor is set to 9500.243//244// Ignore low-frequency blocks.245// The next check is (guard->_freq < 1.e-5 * 9500.).246if(guard->_freq*BLOCK_FREQUENCY(guard_factor) < BLOCK_FREQUENCY(0.00001f)) {247uncommon_preds++;248} else {249freq_preds++;250if(block->_freq < guard->_freq * guard_factor ) {251uncommon_for_freq_preds++;252}253}254}255if( block->num_preds() > 1 &&256// The block is uncommon if all preds are uncommon or257(uncommon_preds == (block->num_preds()-1) ||258// it is uncommon for all frequent preds.259uncommon_for_freq_preds == freq_preds) ) {260return true;261}262return false;263}264265#ifndef PRODUCT266void Block::dump_bidx(const Block* orig, outputStream* st) const {267if (_pre_order) st->print("B%d",_pre_order);268else st->print("N%d", head()->_idx);269270if (Verbose && orig != this) {271// Dump the original block's idx272st->print(" (");273orig->dump_bidx(orig, st);274st->print(")");275}276}277278void Block::dump_pred(const PhaseCFG* cfg, Block* orig, outputStream* st) const {279if (is_connector()) {280for (uint i=1; i<num_preds(); i++) {281Block *p = cfg->get_block_for_node(pred(i));282p->dump_pred(cfg, orig, st);283}284} else {285dump_bidx(orig, st);286st->print(" ");287}288}289290void Block::dump_head(const PhaseCFG* cfg, outputStream* st) const {291// Print the basic block292dump_bidx(this, st);293st->print(": #\t");294295// Print the incoming CFG edges and the outgoing CFG edges296for( uint i=0; i<_num_succs; i++ ) {297non_connector_successor(i)->dump_bidx(_succs[i], st);298st->print(" ");299}300st->print("<- ");301if( head()->is_block_start() ) {302for (uint i=1; i<num_preds(); i++) {303Node *s = pred(i);304if (cfg != NULL) {305Block *p = cfg->get_block_for_node(s);306p->dump_pred(cfg, p, st);307} else {308while (!s->is_block_start())309s = s->in(0);310st->print("N%d ", s->_idx );311}312}313} else {314st->print("BLOCK HEAD IS JUNK ");315}316317// Print loop, if any318const Block *bhead = this; // Head of self-loop319Node *bh = bhead->head();320321if ((cfg != NULL) && bh->is_Loop() && !head()->is_Root()) {322LoopNode *loop = bh->as_Loop();323const Block *bx = cfg->get_block_for_node(loop->in(LoopNode::LoopBackControl));324while (bx->is_connector()) {325bx = cfg->get_block_for_node(bx->pred(1));326}327st->print("\tLoop: B%d-B%d ", bhead->_pre_order, bx->_pre_order);328// Dump any loop-specific bits, especially for CountedLoops.329loop->dump_spec(st);330} else if (has_loop_alignment()) {331st->print(" top-of-loop");332}333st->print(" Freq: %g",_freq);334if( Verbose || WizardMode ) {335st->print(" IDom: %d/#%d", _idom ? _idom->_pre_order : 0, _dom_depth);336st->print(" RegPressure: %d",_reg_pressure);337st->print(" IHRP Index: %d",_ihrp_index);338st->print(" FRegPressure: %d",_freg_pressure);339st->print(" FHRP Index: %d",_fhrp_index);340}341st->cr();342}343344void Block::dump() const {345dump(NULL);346}347348void Block::dump(const PhaseCFG* cfg) const {349dump_head(cfg);350for (uint i=0; i< number_of_nodes(); i++) {351get_node(i)->dump();352}353tty->print("\n");354}355#endif356357PhaseCFG::PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher)358: Phase(CFG)359, _block_arena(arena)360, _root(root)361, _matcher(matcher)362, _node_to_block_mapping(arena)363, _node_latency(NULL)364#ifndef PRODUCT365, _trace_opto_pipelining(TraceOptoPipelining || C->method_has_option("TraceOptoPipelining"))366#endif367#ifdef ASSERT368, _raw_oops(arena)369#endif370{371ResourceMark rm;372// I'll need a few machine-specific GotoNodes. Make an Ideal GotoNode,373// then Match it into a machine-specific Node. Then clone the machine374// Node on demand.375Node *x = new (C) GotoNode(NULL);376x->init_req(0, x);377_goto = matcher.match_tree(x);378assert(_goto != NULL, "");379_goto->set_req(0,_goto);380381// Build the CFG in Reverse Post Order382_number_of_blocks = build_cfg();383_root_block = get_block_for_node(_root);384}385386// Build a proper looking CFG. Make every block begin with either a StartNode387// or a RegionNode. Make every block end with either a Goto, If or Return.388// The RootNode both starts and ends it's own block. Do this with a recursive389// backwards walk over the control edges.390uint PhaseCFG::build_cfg() {391Arena *a = Thread::current()->resource_area();392VectorSet visited(a);393394// Allocate stack with enough space to avoid frequent realloc395Node_Stack nstack(a, C->live_nodes() >> 1);396nstack.push(_root, 0);397uint sum = 0; // Counter for blocks398399while (nstack.is_nonempty()) {400// node and in's index from stack's top401// 'np' is _root (see above) or RegionNode, StartNode: we push on stack402// only nodes which point to the start of basic block (see below).403Node *np = nstack.node();404// idx > 0, except for the first node (_root) pushed on stack405// at the beginning when idx == 0.406// We will use the condition (idx == 0) later to end the build.407uint idx = nstack.index();408Node *proj = np->in(idx);409const Node *x = proj->is_block_proj();410// Does the block end with a proper block-ending Node? One of Return,411// If or Goto? (This check should be done for visited nodes also).412if (x == NULL) { // Does not end right...413Node *g = _goto->clone(); // Force it to end in a Goto414g->set_req(0, proj);415np->set_req(idx, g);416x = proj = g;417}418if (!visited.test_set(x->_idx)) { // Visit this block once419// Skip any control-pinned middle'in stuff420Node *p = proj;421do {422proj = p; // Update pointer to last Control423p = p->in(0); // Move control forward424} while( !p->is_block_proj() &&425!p->is_block_start() );426// Make the block begin with one of Region or StartNode.427if( !p->is_block_start() ) {428RegionNode *r = new (C) RegionNode( 2 );429r->init_req(1, p); // Insert RegionNode in the way430proj->set_req(0, r); // Insert RegionNode in the way431p = r;432}433// 'p' now points to the start of this basic block434435// Put self in array of basic blocks436Block *bb = new (_block_arena) Block(_block_arena, p);437map_node_to_block(p, bb);438map_node_to_block(x, bb);439if( x != p ) { // Only for root is x == p440bb->push_node((Node*)x);441}442// Now handle predecessors443++sum; // Count 1 for self block444uint cnt = bb->num_preds();445for (int i = (cnt - 1); i > 0; i-- ) { // For all predecessors446Node *prevproj = p->in(i); // Get prior input447assert( !prevproj->is_Con(), "dead input not removed" );448// Check to see if p->in(i) is a "control-dependent" CFG edge -449// i.e., it splits at the source (via an IF or SWITCH) and merges450// at the destination (via a many-input Region).451// This breaks critical edges. The RegionNode to start the block452// will be added when <p,i> is pulled off the node stack453if ( cnt > 2 ) { // Merging many things?454assert( prevproj== bb->pred(i),"");455if(prevproj->is_block_proj() != prevproj) { // Control-dependent edge?456// Force a block on the control-dependent edge457Node *g = _goto->clone(); // Force it to end in a Goto458g->set_req(0,prevproj);459p->set_req(i,g);460}461}462nstack.push(p, i); // 'p' is RegionNode or StartNode463}464} else { // Post-processing visited nodes465nstack.pop(); // remove node from stack466// Check if it the fist node pushed on stack at the beginning.467if (idx == 0) break; // end of the build468// Find predecessor basic block469Block *pb = get_block_for_node(x);470// Insert into nodes array, if not already there471if (!has_block(proj)) {472assert( x != proj, "" );473// Map basic block of projection474map_node_to_block(proj, pb);475pb->push_node(proj);476}477// Insert self as a child of my predecessor block478pb->_succs.map(pb->_num_succs++, get_block_for_node(np));479assert( pb->get_node(pb->number_of_nodes() - pb->_num_succs)->is_block_proj(),480"too many control users, not a CFG?" );481}482}483// Return number of basic blocks for all children and self484return sum;485}486487// Inserts a goto & corresponding basic block between488// block[block_no] and its succ_no'th successor block489void PhaseCFG::insert_goto_at(uint block_no, uint succ_no) {490// get block with block_no491assert(block_no < number_of_blocks(), "illegal block number");492Block* in = get_block(block_no);493// get successor block succ_no494assert(succ_no < in->_num_succs, "illegal successor number");495Block* out = in->_succs[succ_no];496// Compute frequency of the new block. Do this before inserting497// new block in case succ_prob() needs to infer the probability from498// surrounding blocks.499float freq = in->_freq * in->succ_prob(succ_no);500// get ProjNode corresponding to the succ_no'th successor of the in block501ProjNode* proj = in->get_node(in->number_of_nodes() - in->_num_succs + succ_no)->as_Proj();502// create region for basic block503RegionNode* region = new (C) RegionNode(2);504region->init_req(1, proj);505// setup corresponding basic block506Block* block = new (_block_arena) Block(_block_arena, region);507map_node_to_block(region, block);508C->regalloc()->set_bad(region->_idx);509// add a goto node510Node* gto = _goto->clone(); // get a new goto node511gto->set_req(0, region);512// add it to the basic block513block->push_node(gto);514map_node_to_block(gto, block);515C->regalloc()->set_bad(gto->_idx);516// hook up successor block517block->_succs.map(block->_num_succs++, out);518// remap successor's predecessors if necessary519for (uint i = 1; i < out->num_preds(); i++) {520if (out->pred(i) == proj) out->head()->set_req(i, gto);521}522// remap predecessor's successor to new block523in->_succs.map(succ_no, block);524// Set the frequency of the new block525block->_freq = freq;526// add new basic block to basic block list527add_block_at(block_no + 1, block);528}529530// Does this block end in a multiway branch that cannot have the default case531// flipped for another case?532static bool no_flip_branch(Block *b) {533int branch_idx = b->number_of_nodes() - b->_num_succs-1;534if (branch_idx < 1) {535return false;536}537Node *branch = b->get_node(branch_idx);538if (branch->is_Catch()) {539return true;540}541if (branch->is_Mach()) {542if (branch->is_MachNullCheck()) {543return true;544}545int iop = branch->as_Mach()->ideal_Opcode();546if (iop == Op_FastLock || iop == Op_FastUnlock) {547return true;548}549// Don't flip if branch has an implicit check.550if (branch->as_Mach()->is_TrapBasedCheckNode()) {551return true;552}553}554return false;555}556557// Check for NeverBranch at block end. This needs to become a GOTO to the558// true target. NeverBranch are treated as a conditional branch that always559// goes the same direction for most of the optimizer and are used to give a560// fake exit path to infinite loops. At this late stage they need to turn561// into Goto's so that when you enter the infinite loop you indeed hang.562void PhaseCFG::convert_NeverBranch_to_Goto(Block *b) {563// Find true target564int end_idx = b->end_idx();565int idx = b->get_node(end_idx+1)->as_Proj()->_con;566Block *succ = b->_succs[idx];567Node* gto = _goto->clone(); // get a new goto node568gto->set_req(0, b->head());569Node *bp = b->get_node(end_idx);570b->map_node(gto, end_idx); // Slam over NeverBranch571map_node_to_block(gto, b);572C->regalloc()->set_bad(gto->_idx);573b->pop_node(); // Yank projections574b->pop_node(); // Yank projections575b->_succs.map(0,succ); // Map only successor576b->_num_succs = 1;577// remap successor's predecessors if necessary578uint j;579for( j = 1; j < succ->num_preds(); j++)580if( succ->pred(j)->in(0) == bp )581succ->head()->set_req(j, gto);582// Kill alternate exit path583Block *dead = b->_succs[1-idx];584for( j = 1; j < dead->num_preds(); j++)585if( dead->pred(j)->in(0) == bp )586break;587// Scan through block, yanking dead path from588// all regions and phis.589dead->head()->del_req(j);590for( int k = 1; dead->get_node(k)->is_Phi(); k++ )591dead->get_node(k)->del_req(j);592}593594// Helper function to move block bx to the slot following b_index. Return595// true if the move is successful, otherwise false596bool PhaseCFG::move_to_next(Block* bx, uint b_index) {597if (bx == NULL) return false;598599// Return false if bx is already scheduled.600uint bx_index = bx->_pre_order;601if ((bx_index <= b_index) && (get_block(bx_index) == bx)) {602return false;603}604605// Find the current index of block bx on the block list606bx_index = b_index + 1;607while (bx_index < number_of_blocks() && get_block(bx_index) != bx) {608bx_index++;609}610assert(get_block(bx_index) == bx, "block not found");611612// If the previous block conditionally falls into bx, return false,613// because moving bx will create an extra jump.614for(uint k = 1; k < bx->num_preds(); k++ ) {615Block* pred = get_block_for_node(bx->pred(k));616if (pred == get_block(bx_index - 1)) {617if (pred->_num_succs != 1) {618return false;619}620}621}622623// Reinsert bx just past block 'b'624_blocks.remove(bx_index);625_blocks.insert(b_index + 1, bx);626return true;627}628629// Move empty and uncommon blocks to the end.630void PhaseCFG::move_to_end(Block *b, uint i) {631int e = b->is_Empty();632if (e != Block::not_empty) {633if (e == Block::empty_with_goto) {634// Remove the goto, but leave the block.635b->pop_node();636}637// Mark this block as a connector block, which will cause it to be638// ignored in certain functions such as non_connector_successor().639b->set_connector();640}641// Move the empty block to the end, and don't recheck.642_blocks.remove(i);643_blocks.push(b);644}645646// Set loop alignment for every block647void PhaseCFG::set_loop_alignment() {648uint last = number_of_blocks();649assert(get_block(0) == get_root_block(), "");650651for (uint i = 1; i < last; i++) {652Block* block = get_block(i);653if (block->head()->is_Loop()) {654block->set_loop_alignment(block);655}656}657}658659// Make empty basic blocks to be "connector" blocks, Move uncommon blocks660// to the end.661void PhaseCFG::remove_empty_blocks() {662// Move uncommon blocks to the end663uint last = number_of_blocks();664assert(get_block(0) == get_root_block(), "");665666for (uint i = 1; i < last; i++) {667Block* block = get_block(i);668if (block->is_connector()) {669break;670}671672// Check for NeverBranch at block end. This needs to become a GOTO to the673// true target. NeverBranch are treated as a conditional branch that674// always goes the same direction for most of the optimizer and are used675// to give a fake exit path to infinite loops. At this late stage they676// need to turn into Goto's so that when you enter the infinite loop you677// indeed hang.678if (block->get_node(block->end_idx())->Opcode() == Op_NeverBranch) {679convert_NeverBranch_to_Goto(block);680}681682// Look for uncommon blocks and move to end.683if (!C->do_freq_based_layout()) {684if (is_uncommon(block)) {685move_to_end(block, i);686last--; // No longer check for being uncommon!687if (no_flip_branch(block)) { // Fall-thru case must follow?688// Find the fall-thru block689block = get_block(i);690move_to_end(block, i);691last--;692}693// backup block counter post-increment694i--;695}696}697}698699// Move empty blocks to the end700last = number_of_blocks();701for (uint i = 1; i < last; i++) {702Block* block = get_block(i);703if (block->is_Empty() != Block::not_empty) {704move_to_end(block, i);705last--;706i--;707}708} // End of for all blocks709}710711Block *PhaseCFG::fixup_trap_based_check(Node *branch, Block *block, int block_pos, Block *bnext) {712// Trap based checks must fall through to the successor with713// PROB_ALWAYS.714// They should be an If with 2 successors.715assert(branch->is_MachIf(), "must be If");716assert(block->_num_succs == 2, "must have 2 successors");717718// Get the If node and the projection for the first successor.719MachIfNode *iff = block->get_node(block->number_of_nodes()-3)->as_MachIf();720ProjNode *proj0 = block->get_node(block->number_of_nodes()-2)->as_Proj();721ProjNode *proj1 = block->get_node(block->number_of_nodes()-1)->as_Proj();722ProjNode *projt = (proj0->Opcode() == Op_IfTrue) ? proj0 : proj1;723ProjNode *projf = (proj0->Opcode() == Op_IfFalse) ? proj0 : proj1;724725// Assert that proj0 and succs[0] match up. Similarly for proj1 and succs[1].726assert(proj0->raw_out(0) == block->_succs[0]->head(), "Mismatch successor 0");727assert(proj1->raw_out(0) == block->_succs[1]->head(), "Mismatch successor 1");728729ProjNode *proj_always;730ProjNode *proj_never;731// We must negate the branch if the implicit check doesn't follow732// the branch's TRUE path. Then, the new TRUE branch target will733// be the old FALSE branch target.734if (iff->_prob <= 2*PROB_NEVER) { // There are small rounding errors.735proj_never = projt;736proj_always = projf;737} else {738// We must negate the branch if the trap doesn't follow the739// branch's TRUE path. Then, the new TRUE branch target will740// be the old FALSE branch target.741proj_never = projf;742proj_always = projt;743iff->negate();744}745assert(iff->_prob <= 2*PROB_NEVER, "Trap based checks are expected to trap never!");746// Map the successors properly747block->_succs.map(0, get_block_for_node(proj_never ->raw_out(0))); // The target of the trap.748block->_succs.map(1, get_block_for_node(proj_always->raw_out(0))); // The fall through target.749750if (block->get_node(block->number_of_nodes() - block->_num_succs + 1) != proj_always) {751block->map_node(proj_never, block->number_of_nodes() - block->_num_succs + 0);752block->map_node(proj_always, block->number_of_nodes() - block->_num_succs + 1);753}754755// Place the fall through block after this block.756Block *bs1 = block->non_connector_successor(1);757if (bs1 != bnext && move_to_next(bs1, block_pos)) {758bnext = bs1;759}760// If the fall through block still is not the next block, insert a goto.761if (bs1 != bnext) {762insert_goto_at(block_pos, 1);763}764return bnext;765}766767// Fix up the final control flow for basic blocks.768void PhaseCFG::fixup_flow() {769// Fixup final control flow for the blocks. Remove jump-to-next770// block. If neither arm of an IF follows the conditional branch, we771// have to add a second jump after the conditional. We place the772// TRUE branch target in succs[0] for both GOTOs and IFs.773for (uint i = 0; i < number_of_blocks(); i++) {774Block* block = get_block(i);775block->_pre_order = i; // turn pre-order into block-index776777// Connector blocks need no further processing.778if (block->is_connector()) {779assert((i+1) == number_of_blocks() || get_block(i + 1)->is_connector(), "All connector blocks should sink to the end");780continue;781}782assert(block->is_Empty() != Block::completely_empty, "Empty blocks should be connectors");783784Block* bnext = (i < number_of_blocks() - 1) ? get_block(i + 1) : NULL;785Block* bs0 = block->non_connector_successor(0);786787// Check for multi-way branches where I cannot negate the test to788// exchange the true and false targets.789if (no_flip_branch(block)) {790// Find fall through case - if must fall into its target.791// Get the index of the branch's first successor.792int branch_idx = block->number_of_nodes() - block->_num_succs;793794// The branch is 1 before the branch's first successor.795Node *branch = block->get_node(branch_idx-1);796797// Handle no-flip branches which have implicit checks and which require798// special block ordering and individual semantics of the 'fall through799// case'.800if ((TrapBasedNullChecks || TrapBasedRangeChecks) &&801branch->is_Mach() && branch->as_Mach()->is_TrapBasedCheckNode()) {802bnext = fixup_trap_based_check(branch, block, i, bnext);803} else {804// Else, default handling for no-flip branches805for (uint j2 = 0; j2 < block->_num_succs; j2++) {806const ProjNode* p = block->get_node(branch_idx + j2)->as_Proj();807if (p->_con == 0) {808// successor j2 is fall through case809if (block->non_connector_successor(j2) != bnext) {810// but it is not the next block => insert a goto811insert_goto_at(i, j2);812}813// Put taken branch in slot 0814if (j2 == 0 && block->_num_succs == 2) {815// Flip targets in succs map816Block *tbs0 = block->_succs[0];817Block *tbs1 = block->_succs[1];818block->_succs.map(0, tbs1);819block->_succs.map(1, tbs0);820}821break;822}823}824}825826// Remove all CatchProjs827for (uint j = 0; j < block->_num_succs; j++) {828block->pop_node();829}830831} else if (block->_num_succs == 1) {832// Block ends in a Goto?833if (bnext == bs0) {834// We fall into next block; remove the Goto835block->pop_node();836}837838} else if(block->_num_succs == 2) { // Block ends in a If?839// Get opcode of 1st projection (matches _succs[0])840// Note: Since this basic block has 2 exits, the last 2 nodes must841// be projections (in any order), the 3rd last node must be842// the IfNode (we have excluded other 2-way exits such as843// CatchNodes already).844MachNode* iff = block->get_node(block->number_of_nodes() - 3)->as_Mach();845ProjNode* proj0 = block->get_node(block->number_of_nodes() - 2)->as_Proj();846ProjNode* proj1 = block->get_node(block->number_of_nodes() - 1)->as_Proj();847848// Assert that proj0 and succs[0] match up. Similarly for proj1 and succs[1].849assert(proj0->raw_out(0) == block->_succs[0]->head(), "Mismatch successor 0");850assert(proj1->raw_out(0) == block->_succs[1]->head(), "Mismatch successor 1");851852Block* bs1 = block->non_connector_successor(1);853854// Check for neither successor block following the current855// block ending in a conditional. If so, move one of the856// successors after the current one, provided that the857// successor was previously unscheduled, but moveable858// (i.e., all paths to it involve a branch).859if (!C->do_freq_based_layout() && bnext != bs0 && bnext != bs1) {860// Choose the more common successor based on the probability861// of the conditional branch.862Block* bx = bs0;863Block* by = bs1;864865// _prob is the probability of taking the true path. Make866// p the probability of taking successor #1.867float p = iff->as_MachIf()->_prob;868if (proj0->Opcode() == Op_IfTrue) {869p = 1.0 - p;870}871872// Prefer successor #1 if p > 0.5873if (p > PROB_FAIR) {874bx = bs1;875by = bs0;876}877878// Attempt the more common successor first879if (move_to_next(bx, i)) {880bnext = bx;881} else if (move_to_next(by, i)) {882bnext = by;883}884}885886// Check for conditional branching the wrong way. Negate887// conditional, if needed, so it falls into the following block888// and branches to the not-following block.889890// Check for the next block being in succs[0]. We are going to branch891// to succs[0], so we want the fall-thru case as the next block in892// succs[1].893if (bnext == bs0) {894// Fall-thru case in succs[0], so flip targets in succs map895Block* tbs0 = block->_succs[0];896Block* tbs1 = block->_succs[1];897block->_succs.map(0, tbs1);898block->_succs.map(1, tbs0);899// Flip projection for each target900ProjNode* tmp = proj0;901proj0 = proj1;902proj1 = tmp;903904} else if(bnext != bs1) {905// Need a double-branch906// The existing conditional branch need not change.907// Add a unconditional branch to the false target.908// Alas, it must appear in its own block and adding a909// block this late in the game is complicated. Sigh.910insert_goto_at(i, 1);911}912913// Make sure we TRUE branch to the target914if (proj0->Opcode() == Op_IfFalse) {915iff->as_MachIf()->negate();916}917918block->pop_node(); // Remove IfFalse & IfTrue projections919block->pop_node();920921} else {922// Multi-exit block, e.g. a switch statement923// But we don't need to do anything here924}925} // End of for all blocks926}927928929// postalloc_expand: Expand nodes after register allocation.930//931// postalloc_expand has to be called after register allocation, just932// before output (i.e. scheduling). It only gets called if933// Matcher::require_postalloc_expand is true.934//935// Background:936//937// Nodes that are expandend (one compound node requiring several938// assembler instructions to be implemented split into two or more939// non-compound nodes) after register allocation are not as nice as940// the ones expanded before register allocation - they don't941// participate in optimizations as global code motion. But after942// register allocation we can expand nodes that use registers which943// are not spillable or registers that are not allocated, because the944// old compound node is simply replaced (in its location in the basic945// block) by a new subgraph which does not contain compound nodes any946// more. The scheduler called during output can later on process these947// non-compound nodes.948//949// Implementation:950//951// Nodes requiring postalloc expand are specified in the ad file by using952// a postalloc_expand statement instead of ins_encode. A postalloc_expand953// contains a single call to an encoding, as does an ins_encode954// statement. Instead of an emit() function a postalloc_expand() function955// is generated that doesn't emit assembler but creates a new956// subgraph. The code below calls this postalloc_expand function for each957// node with the appropriate attribute. This function returns the new958// nodes generated in an array passed in the call. The old node,959// potential MachTemps before and potential Projs after it then get960// disconnected and replaced by the new nodes. The instruction961// generating the result has to be the last one in the array. In962// general it is assumed that Projs after the node expanded are963// kills. These kills are not required any more after expanding as964// there are now explicitly visible def-use chains and the Projs are965// removed. This does not hold for calls: They do not only have966// kill-Projs but also Projs defining values. Therefore Projs after967// the node expanded are removed for all but for calls. If a node is968// to be reused, it must be added to the nodes list returned, and it969// will be added again.970//971// Implementing the postalloc_expand function for a node in an enc_class972// is rather tedious. It requires knowledge about many node details, as973// the nodes and the subgraph must be hand crafted. To simplify this,974// adlc generates some utility variables into the postalloc_expand function,975// e.g., holding the operands as specified by the postalloc_expand encoding976// specification, e.g.:977// * unsigned idx_<par_name> holding the index of the node in the ins978// * Node *n_<par_name> holding the node loaded from the ins979// * MachOpnd *op_<par_name> holding the corresponding operand980//981// The ordering of operands can not be determined by looking at a982// rule. Especially if a match rule matches several different trees,983// several nodes are generated from one instruct specification with984// different operand orderings. In this case the adlc generated985// variables are the only way to access the ins and operands986// deterministically.987//988// If assigning a register to a node that contains an oop, don't989// forget to call ra_->set_oop() for the node.990void PhaseCFG::postalloc_expand(PhaseRegAlloc* _ra) {991GrowableArray <Node *> new_nodes(32); // Array with new nodes filled by postalloc_expand function of node.992GrowableArray <Node *> remove(32);993GrowableArray <Node *> succs(32);994unsigned int max_idx = C->unique(); // Remember to distinguish new from old nodes.995DEBUG_ONLY(bool foundNode = false);996997// for all blocks998for (uint i = 0; i < number_of_blocks(); i++) {999Block *b = _blocks[i];1000// For all instructions in the current block.1001for (uint j = 0; j < b->number_of_nodes(); j++) {1002Node *n = b->get_node(j);1003if (n->is_Mach() && n->as_Mach()->requires_postalloc_expand()) {1004#ifdef ASSERT1005if (TracePostallocExpand) {1006if (!foundNode) {1007foundNode = true;1008tty->print("POSTALLOC EXPANDING %d %s\n", C->compile_id(),1009C->method() ? C->method()->name()->as_utf8() : C->stub_name());1010}1011tty->print(" postalloc expanding "); n->dump();1012if (Verbose) {1013tty->print(" with ins:\n");1014for (uint k = 0; k < n->len(); ++k) {1015if (n->in(k)) { tty->print(" "); n->in(k)->dump(); }1016}1017}1018}1019#endif1020new_nodes.clear();1021// Collect nodes that have to be removed from the block later on.1022uint req = n->req();1023remove.clear();1024for (uint k = 0; k < req; ++k) {1025if (n->in(k) && n->in(k)->is_MachTemp()) {1026remove.push(n->in(k)); // MachTemps which are inputs to the old node have to be removed.1027n->in(k)->del_req(0);1028j--;1029}1030}10311032// Check whether we can allocate enough nodes. We set a fix limit for1033// the size of postalloc expands with this.1034uint unique_limit = C->unique() + 40;1035if (unique_limit >= _ra->node_regs_max_index()) {1036Compile::current()->record_failure("out of nodes in postalloc expand");1037return;1038}10391040// Emit (i.e. generate new nodes).1041n->as_Mach()->postalloc_expand(&new_nodes, _ra);10421043assert(C->unique() < unique_limit, "You allocated too many nodes in your postalloc expand.");10441045// Disconnect the inputs of the old node.1046//1047// We reuse MachSpillCopy nodes. If we need to expand them, there1048// are many, so reusing pays off. If reused, the node already1049// has the new ins. n must be the last node on new_nodes list.1050if (!n->is_MachSpillCopy()) {1051for (int k = req - 1; k >= 0; --k) {1052n->del_req(k);1053}1054}10551056#ifdef ASSERT1057// Check that all nodes have proper operands.1058for (int k = 0; k < new_nodes.length(); ++k) {1059if (new_nodes.at(k)->_idx < max_idx || !new_nodes.at(k)->is_Mach()) continue; // old node, Proj ...1060MachNode *m = new_nodes.at(k)->as_Mach();1061for (unsigned int l = 0; l < m->num_opnds(); ++l) {1062if (MachOper::notAnOper(m->_opnds[l])) {1063outputStream *os = tty;1064os->print("Node %s ", m->Name());1065os->print("has invalid opnd %d: %p\n", l, m->_opnds[l]);1066assert(0, "Invalid operands, see inline trace in hs_err_pid file.");1067}1068}1069}1070#endif10711072// Collect succs of old node in remove (for projections) and in succs (for1073// all other nodes) do _not_ collect projections in remove (but in succs)1074// in case the node is a call. We need the projections for calls as they are1075// associated with registes (i.e. they are defs).1076succs.clear();1077for (DUIterator k = n->outs(); n->has_out(k); k++) {1078if (n->out(k)->is_Proj() && !n->is_MachCall() && !n->is_MachBranch()) {1079remove.push(n->out(k));1080} else {1081succs.push(n->out(k));1082}1083}1084// Replace old node n as input of its succs by last of the new nodes.1085for (int k = 0; k < succs.length(); ++k) {1086Node *succ = succs.at(k);1087for (uint l = 0; l < succ->req(); ++l) {1088if (succ->in(l) == n) {1089succ->set_req(l, new_nodes.at(new_nodes.length() - 1));1090}1091}1092for (uint l = succ->req(); l < succ->len(); ++l) {1093if (succ->in(l) == n) {1094succ->set_prec(l, new_nodes.at(new_nodes.length() - 1));1095}1096}1097}10981099// Index of old node in block.1100uint index = b->find_node(n);1101// Insert new nodes into block and map them in nodes->blocks array1102// and remember last node in n2.1103Node *n2 = NULL;1104for (int k = 0; k < new_nodes.length(); ++k) {1105n2 = new_nodes.at(k);1106b->insert_node(n2, ++index);1107map_node_to_block(n2, b);1108}11091110// Add old node n to remove and remove them all from block.1111remove.push(n);1112j--;1113#ifdef ASSERT1114if (TracePostallocExpand && Verbose) {1115tty->print(" removing:\n");1116for (int k = 0; k < remove.length(); ++k) {1117tty->print(" "); remove.at(k)->dump();1118}1119tty->print(" inserting:\n");1120for (int k = 0; k < new_nodes.length(); ++k) {1121tty->print(" "); new_nodes.at(k)->dump();1122}1123}1124#endif1125for (int k = 0; k < remove.length(); ++k) {1126if (b->contains(remove.at(k))) {1127b->find_remove(remove.at(k));1128} else {1129assert(remove.at(k)->is_Proj() && (remove.at(k)->in(0)->is_MachBranch()), "");1130}1131}1132// If anything has been inserted (n2 != NULL), continue after last node inserted.1133// This does not always work. Some postalloc expands don't insert any nodes, if they1134// do optimizations (e.g., max(x,x)). In this case we decrement j accordingly.1135j = n2 ? b->find_node(n2) : j;1136}1137}1138}11391140#ifdef ASSERT1141if (foundNode) {1142tty->print("FINISHED %d %s\n", C->compile_id(),1143C->method() ? C->method()->name()->as_utf8() : C->stub_name());1144tty->flush();1145}1146#endif1147}114811491150//------------------------------dump-------------------------------------------1151#ifndef PRODUCT1152void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited ) const {1153const Node *x = end->is_block_proj();1154assert( x, "not a CFG" );11551156// Do not visit this block again1157if( visited.test_set(x->_idx) ) return;11581159// Skip through this block1160const Node *p = x;1161do {1162p = p->in(0); // Move control forward1163assert( !p->is_block_proj() || p->is_Root(), "not a CFG" );1164} while( !p->is_block_start() );11651166// Recursively visit1167for (uint i = 1; i < p->req(); i++) {1168_dump_cfg(p->in(i), visited);1169}11701171// Dump the block1172get_block_for_node(p)->dump(this);1173}11741175void PhaseCFG::dump( ) const {1176tty->print("\n--- CFG --- %d BBs\n", number_of_blocks());1177if (_blocks.size()) { // Did we do basic-block layout?1178for (uint i = 0; i < number_of_blocks(); i++) {1179const Block* block = get_block(i);1180block->dump(this);1181}1182} else { // Else do it with a DFS1183VectorSet visited(_block_arena);1184_dump_cfg(_root,visited);1185}1186}11871188void PhaseCFG::dump_headers() {1189for (uint i = 0; i < number_of_blocks(); i++) {1190Block* block = get_block(i);1191if (block != NULL) {1192block->dump_head(this);1193}1194}1195}11961197void PhaseCFG::verify() const {1198#ifdef ASSERT1199// Verify sane CFG1200for (uint i = 0; i < number_of_blocks(); i++) {1201Block* block = get_block(i);1202uint cnt = block->number_of_nodes();1203uint j;1204for (j = 0; j < cnt; j++) {1205Node *n = block->get_node(j);1206assert(get_block_for_node(n) == block, "");1207if (j >= 1 && n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CreateEx) {1208assert(j == 1 || block->get_node(j-1)->is_Phi(), "CreateEx must be first instruction in block");1209}1210if (n->needs_anti_dependence_check()) {1211verify_anti_dependences(block, n);1212}1213for (uint k = 0; k < n->req(); k++) {1214Node *def = n->in(k);1215if (def && def != n) {1216assert(get_block_for_node(def) || def->is_Con(), "must have block; constants for debug info ok");1217// Verify that instructions in the block is in correct order.1218// Uses must follow their definition if they are at the same block.1219// Mostly done to check that MachSpillCopy nodes are placed correctly1220// when CreateEx node is moved in build_ifg_physical().1221if (get_block_for_node(def) == block && !(block->head()->is_Loop() && n->is_Phi()) &&1222// See (+++) comment in reg_split.cpp1223!(n->jvms() != NULL && n->jvms()->is_monitor_use(k))) {1224bool is_loop = false;1225if (n->is_Phi()) {1226for (uint l = 1; l < def->req(); l++) {1227if (n == def->in(l)) {1228is_loop = true;1229break; // Some kind of loop1230}1231}1232}1233assert(is_loop || block->find_node(def) < j, "uses must follow definitions");1234}1235}1236}1237}12381239j = block->end_idx();1240Node* bp = (Node*)block->get_node(block->number_of_nodes() - 1)->is_block_proj();1241assert(bp, "last instruction must be a block proj");1242assert(bp == block->get_node(j), "wrong number of successors for this block");1243if (bp->is_Catch()) {1244while (block->get_node(--j)->is_MachProj()) {1245;1246}1247assert(block->get_node(j)->is_MachCall(), "CatchProj must follow call");1248} else if (bp->is_Mach() && bp->as_Mach()->ideal_Opcode() == Op_If) {1249assert(block->_num_succs == 2, "Conditional branch must have two targets");1250}1251}1252#endif1253}1254#endif12551256UnionFind::UnionFind( uint max ) : _cnt(max), _max(max), _indices(NEW_RESOURCE_ARRAY(uint,max)) {1257Copy::zero_to_bytes( _indices, sizeof(uint)*max );1258}12591260void UnionFind::extend( uint from_idx, uint to_idx ) {1261_nesting.check();1262if( from_idx >= _max ) {1263uint size = 16;1264while( size <= from_idx ) size <<=1;1265_indices = REALLOC_RESOURCE_ARRAY( uint, _indices, _max, size );1266_max = size;1267}1268while( _cnt <= from_idx ) _indices[_cnt++] = 0;1269_indices[from_idx] = to_idx;1270}12711272void UnionFind::reset( uint max ) {1273assert( max <= max_uint, "Must fit within uint" );1274// Force the Union-Find mapping to be at least this large1275extend(max,0);1276// Initialize to be the ID mapping.1277for( uint i=0; i<max; i++ ) map(i,i);1278}12791280// Straight out of Tarjan's union-find algorithm1281uint UnionFind::Find_compress( uint idx ) {1282uint cur = idx;1283uint next = lookup(cur);1284while( next != cur ) { // Scan chain of equivalences1285assert( next < cur, "always union smaller" );1286cur = next; // until find a fixed-point1287next = lookup(cur);1288}1289// Core of union-find algorithm: update chain of1290// equivalences to be equal to the root.1291while( idx != next ) {1292uint tmp = lookup(idx);1293map(idx, next);1294idx = tmp;1295}1296return idx;1297}12981299// Like Find above, but no path compress, so bad asymptotic behavior1300uint UnionFind::Find_const( uint idx ) const {1301if( idx == 0 ) return idx; // Ignore the zero idx1302// Off the end? This can happen during debugging dumps1303// when data structures have not finished being updated.1304if( idx >= _max ) return idx;1305uint next = lookup(idx);1306while( next != idx ) { // Scan chain of equivalences1307idx = next; // until find a fixed-point1308next = lookup(idx);1309}1310return next;1311}13121313// union 2 sets together.1314void UnionFind::Union( uint idx1, uint idx2 ) {1315uint src = Find(idx1);1316uint dst = Find(idx2);1317assert( src, "" );1318assert( dst, "" );1319assert( src < _max, "oob" );1320assert( dst < _max, "oob" );1321assert( src < dst, "always union smaller" );1322map(dst,src);1323}13241325#ifndef PRODUCT1326void Trace::dump( ) const {1327tty->print_cr("Trace (freq %f)", first_block()->_freq);1328for (Block *b = first_block(); b != NULL; b = next(b)) {1329tty->print(" B%d", b->_pre_order);1330if (b->head()->is_Loop()) {1331tty->print(" (L%d)", b->compute_loop_alignment());1332}1333if (b->has_loop_alignment()) {1334tty->print(" (T%d)", b->code_alignment());1335}1336}1337tty->cr();1338}13391340void CFGEdge::dump( ) const {1341tty->print(" B%d --> B%d Freq: %f out:%3d%% in:%3d%% State: ",1342from()->_pre_order, to()->_pre_order, freq(), _from_pct, _to_pct);1343switch(state()) {1344case connected:1345tty->print("connected");1346break;1347case open:1348tty->print("open");1349break;1350case interior:1351tty->print("interior");1352break;1353}1354if (infrequent()) {1355tty->print(" infrequent");1356}1357tty->cr();1358}1359#endif13601361// Comparison function for edges1362static int edge_order(CFGEdge **e0, CFGEdge **e1) {1363float freq0 = (*e0)->freq();1364float freq1 = (*e1)->freq();1365if (freq0 != freq1) {1366return freq0 > freq1 ? -1 : 1;1367}13681369int dist0 = (*e0)->to()->_rpo - (*e0)->from()->_rpo;1370int dist1 = (*e1)->to()->_rpo - (*e1)->from()->_rpo;13711372return dist1 - dist0;1373}13741375// Comparison function for edges1376extern "C" int trace_frequency_order(const void *p0, const void *p1) {1377Trace *tr0 = *(Trace **) p0;1378Trace *tr1 = *(Trace **) p1;1379Block *b0 = tr0->first_block();1380Block *b1 = tr1->first_block();13811382// The trace of connector blocks goes at the end;1383// we only expect one such trace1384if (b0->is_connector() != b1->is_connector()) {1385return b1->is_connector() ? -1 : 1;1386}13871388// Pull more frequently executed blocks to the beginning1389float freq0 = b0->_freq;1390float freq1 = b1->_freq;1391if (freq0 != freq1) {1392return freq0 > freq1 ? -1 : 1;1393}13941395int diff = tr0->first_block()->_rpo - tr1->first_block()->_rpo;13961397return diff;1398}13991400// Find edges of interest, i.e, those which can fall through. Presumes that1401// edges which don't fall through are of low frequency and can be generally1402// ignored. Initialize the list of traces.1403void PhaseBlockLayout::find_edges() {1404// Walk the blocks, creating edges and Traces1405uint i;1406Trace *tr = NULL;1407for (i = 0; i < _cfg.number_of_blocks(); i++) {1408Block* b = _cfg.get_block(i);1409tr = new Trace(b, next, prev);1410traces[tr->id()] = tr;14111412// All connector blocks should be at the end of the list1413if (b->is_connector()) break;14141415// If this block and the next one have a one-to-one successor1416// predecessor relationship, simply append the next block1417int nfallthru = b->num_fall_throughs();1418while (nfallthru == 1 &&1419b->succ_fall_through(0)) {1420Block *n = b->_succs[0];14211422// Skip over single-entry connector blocks, we don't want to1423// add them to the trace.1424while (n->is_connector() && n->num_preds() == 1) {1425n = n->_succs[0];1426}14271428// We see a merge point, so stop search for the next block1429if (n->num_preds() != 1) break;14301431i++;1432assert(n = _cfg.get_block(i), "expecting next block");1433tr->append(n);1434uf->map(n->_pre_order, tr->id());1435traces[n->_pre_order] = NULL;1436nfallthru = b->num_fall_throughs();1437b = n;1438}14391440if (nfallthru > 0) {1441// Create a CFGEdge for each outgoing1442// edge that could be a fall-through.1443for (uint j = 0; j < b->_num_succs; j++ ) {1444if (b->succ_fall_through(j)) {1445Block *target = b->non_connector_successor(j);1446float freq = b->_freq * b->succ_prob(j);1447int from_pct = (int) ((100 * freq) / b->_freq);1448int to_pct = (int) ((100 * freq) / target->_freq);1449edges->append(new CFGEdge(b, target, freq, from_pct, to_pct));1450}1451}1452}1453}14541455// Group connector blocks into one trace1456for (i++; i < _cfg.number_of_blocks(); i++) {1457Block *b = _cfg.get_block(i);1458assert(b->is_connector(), "connector blocks at the end");1459tr->append(b);1460uf->map(b->_pre_order, tr->id());1461traces[b->_pre_order] = NULL;1462}1463}14641465// Union two traces together in uf, and null out the trace in the list1466void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace) {1467uint old_id = old_trace->id();1468uint updated_id = updated_trace->id();14691470uint lo_id = updated_id;1471uint hi_id = old_id;14721473// If from is greater than to, swap values to meet1474// UnionFind guarantee.1475if (updated_id > old_id) {1476lo_id = old_id;1477hi_id = updated_id;14781479// Fix up the trace ids1480traces[lo_id] = traces[updated_id];1481updated_trace->set_id(lo_id);1482}14831484// Union the lower with the higher and remove the pointer1485// to the higher.1486uf->Union(lo_id, hi_id);1487traces[hi_id] = NULL;1488}14891490// Append traces together via the most frequently executed edges1491void PhaseBlockLayout::grow_traces() {1492// Order the edges, and drive the growth of Traces via the most1493// frequently executed edges.1494edges->sort(edge_order);1495for (int i = 0; i < edges->length(); i++) {1496CFGEdge *e = edges->at(i);14971498if (e->state() != CFGEdge::open) continue;14991500Block *src_block = e->from();1501Block *targ_block = e->to();15021503// Don't grow traces along backedges?1504if (!BlockLayoutRotateLoops) {1505if (targ_block->_rpo <= src_block->_rpo) {1506targ_block->set_loop_alignment(targ_block);1507continue;1508}1509}15101511Trace *src_trace = trace(src_block);1512Trace *targ_trace = trace(targ_block);15131514// If the edge in question can join two traces at their ends,1515// append one trace to the other.1516if (src_trace->last_block() == src_block) {1517if (src_trace == targ_trace) {1518e->set_state(CFGEdge::interior);1519if (targ_trace->backedge(e)) {1520// Reset i to catch any newly eligible edge1521// (Or we could remember the first "open" edge, and reset there)1522i = 0;1523}1524} else if (targ_trace->first_block() == targ_block) {1525e->set_state(CFGEdge::connected);1526src_trace->append(targ_trace);1527union_traces(src_trace, targ_trace);1528}1529}1530}1531}15321533// Embed one trace into another, if the fork or join points are sufficiently1534// balanced.1535void PhaseBlockLayout::merge_traces(bool fall_thru_only) {1536// Walk the edge list a another time, looking at unprocessed edges.1537// Fold in diamonds1538for (int i = 0; i < edges->length(); i++) {1539CFGEdge *e = edges->at(i);15401541if (e->state() != CFGEdge::open) continue;1542if (fall_thru_only) {1543if (e->infrequent()) continue;1544}15451546Block *src_block = e->from();1547Trace *src_trace = trace(src_block);1548bool src_at_tail = src_trace->last_block() == src_block;15491550Block *targ_block = e->to();1551Trace *targ_trace = trace(targ_block);1552bool targ_at_start = targ_trace->first_block() == targ_block;15531554if (src_trace == targ_trace) {1555// This may be a loop, but we can't do much about it.1556e->set_state(CFGEdge::interior);1557continue;1558}15591560if (fall_thru_only) {1561// If the edge links the middle of two traces, we can't do anything.1562// Mark the edge and continue.1563if (!src_at_tail & !targ_at_start) {1564continue;1565}15661567// Don't grow traces along backedges?1568if (!BlockLayoutRotateLoops && (targ_block->_rpo <= src_block->_rpo)) {1569continue;1570}15711572// If both ends of the edge are available, why didn't we handle it earlier?1573assert(src_at_tail ^ targ_at_start, "Should have caught this edge earlier.");15741575if (targ_at_start) {1576// Insert the "targ" trace in the "src" trace if the insertion point1577// is a two way branch.1578// Better profitability check possible, but may not be worth it.1579// Someday, see if the this "fork" has an associated "join";1580// then make a policy on merging this trace at the fork or join.1581// For example, other things being equal, it may be better to place this1582// trace at the join point if the "src" trace ends in a two-way, but1583// the insertion point is one-way.1584assert(src_block->num_fall_throughs() == 2, "unexpected diamond");1585e->set_state(CFGEdge::connected);1586src_trace->insert_after(src_block, targ_trace);1587union_traces(src_trace, targ_trace);1588} else if (src_at_tail) {1589if (src_trace != trace(_cfg.get_root_block())) {1590e->set_state(CFGEdge::connected);1591targ_trace->insert_before(targ_block, src_trace);1592union_traces(targ_trace, src_trace);1593}1594}1595} else if (e->state() == CFGEdge::open) {1596// Append traces, even without a fall-thru connection.1597// But leave root entry at the beginning of the block list.1598if (targ_trace != trace(_cfg.get_root_block())) {1599e->set_state(CFGEdge::connected);1600src_trace->append(targ_trace);1601union_traces(src_trace, targ_trace);1602}1603}1604}1605}16061607// Order the sequence of the traces in some desirable way, and fixup the1608// jumps at the end of each block.1609void PhaseBlockLayout::reorder_traces(int count) {1610ResourceArea *area = Thread::current()->resource_area();1611Trace ** new_traces = NEW_ARENA_ARRAY(area, Trace *, count);1612Block_List worklist;1613int new_count = 0;16141615// Compact the traces.1616for (int i = 0; i < count; i++) {1617Trace *tr = traces[i];1618if (tr != NULL) {1619new_traces[new_count++] = tr;1620}1621}16221623// The entry block should be first on the new trace list.1624Trace *tr = trace(_cfg.get_root_block());1625assert(tr == new_traces[0], "entry trace misplaced");16261627// Sort the new trace list by frequency1628qsort(new_traces + 1, new_count - 1, sizeof(new_traces[0]), trace_frequency_order);16291630// Patch up the successor blocks1631_cfg.clear_blocks();1632for (int i = 0; i < new_count; i++) {1633Trace *tr = new_traces[i];1634if (tr != NULL) {1635tr->fixup_blocks(_cfg);1636}1637}1638}16391640// Order basic blocks based on frequency1641PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg)1642: Phase(BlockLayout)1643, _cfg(cfg) {1644ResourceMark rm;1645ResourceArea *area = Thread::current()->resource_area();16461647// List of traces1648int size = _cfg.number_of_blocks() + 1;1649traces = NEW_ARENA_ARRAY(area, Trace *, size);1650memset(traces, 0, size*sizeof(Trace*));1651next = NEW_ARENA_ARRAY(area, Block *, size);1652memset(next, 0, size*sizeof(Block *));1653prev = NEW_ARENA_ARRAY(area, Block *, size);1654memset(prev , 0, size*sizeof(Block *));16551656// List of edges1657edges = new GrowableArray<CFGEdge*>;16581659// Mapping block index --> block_trace1660uf = new UnionFind(size);1661uf->reset(size);16621663// Find edges and create traces.1664find_edges();16651666// Grow traces at their ends via most frequent edges.1667grow_traces();16681669// Merge one trace into another, but only at fall-through points.1670// This may make diamonds and other related shapes in a trace.1671merge_traces(true);16721673// Run merge again, allowing two traces to be catenated, even if1674// one does not fall through into the other. This appends loosely1675// related traces to be near each other.1676merge_traces(false);16771678// Re-order all the remaining traces by frequency1679reorder_traces(size);16801681assert(_cfg.number_of_blocks() >= (uint) (size - 1), "number of blocks can not shrink");1682}168316841685// Edge e completes a loop in a trace. If the target block is head of the1686// loop, rotate the loop block so that the loop ends in a conditional branch.1687bool Trace::backedge(CFGEdge *e) {1688bool loop_rotated = false;1689Block *src_block = e->from();1690Block *targ_block = e->to();16911692assert(last_block() == src_block, "loop discovery at back branch");1693if (first_block() == targ_block) {1694if (BlockLayoutRotateLoops && last_block()->num_fall_throughs() < 2) {1695// Find the last block in the trace that has a conditional1696// branch.1697Block *b;1698for (b = last_block(); b != NULL; b = prev(b)) {1699if (b->num_fall_throughs() == 2) {1700break;1701}1702}17031704if (b != last_block() && b != NULL) {1705loop_rotated = true;17061707// Rotate the loop by doing two-part linked-list surgery.1708append(first_block());1709break_loop_after(b);1710}1711}17121713// Backbranch to the top of a trace1714// Scroll forward through the trace from the targ_block. If we find1715// a loop head before another loop top, use the the loop head alignment.1716for (Block *b = targ_block; b != NULL; b = next(b)) {1717if (b->has_loop_alignment()) {1718break;1719}1720if (b->head()->is_Loop()) {1721targ_block = b;1722break;1723}1724}17251726first_block()->set_loop_alignment(targ_block);17271728} else {1729// Backbranch into the middle of a trace1730targ_block->set_loop_alignment(targ_block);1731}17321733return loop_rotated;1734}17351736// push blocks onto the CFG list1737// ensure that blocks have the correct two-way branch sense1738void Trace::fixup_blocks(PhaseCFG &cfg) {1739Block *last = last_block();1740for (Block *b = first_block(); b != NULL; b = next(b)) {1741cfg.add_block(b);1742if (!b->is_connector()) {1743int nfallthru = b->num_fall_throughs();1744if (b != last) {1745if (nfallthru == 2) {1746// Ensure that the sense of the branch is correct1747Block *bnext = next(b);1748Block *bs0 = b->non_connector_successor(0);17491750MachNode *iff = b->get_node(b->number_of_nodes() - 3)->as_Mach();1751ProjNode *proj0 = b->get_node(b->number_of_nodes() - 2)->as_Proj();1752ProjNode *proj1 = b->get_node(b->number_of_nodes() - 1)->as_Proj();17531754if (bnext == bs0) {1755// Fall-thru case in succs[0], should be in succs[1]17561757// Flip targets in _succs map1758Block *tbs0 = b->_succs[0];1759Block *tbs1 = b->_succs[1];1760b->_succs.map( 0, tbs1 );1761b->_succs.map( 1, tbs0 );17621763// Flip projections to match targets1764b->map_node(proj1, b->number_of_nodes() - 2);1765b->map_node(proj0, b->number_of_nodes() - 1);1766}1767}1768}1769}1770}1771}177217731774