Path: blob/aarch64-shenandoah-jdk8u272-b10/hotspot/src/share/vm/opto/chaitin.cpp
32285 views
/*1* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include "precompiled.hpp"25#include "compiler/compileLog.hpp"26#include "compiler/oopMap.hpp"27#include "memory/allocation.inline.hpp"28#include "opto/addnode.hpp"29#include "opto/block.hpp"30#include "opto/callnode.hpp"31#include "opto/cfgnode.hpp"32#include "opto/chaitin.hpp"33#include "opto/coalesce.hpp"34#include "opto/connode.hpp"35#include "opto/idealGraphPrinter.hpp"36#include "opto/indexSet.hpp"37#include "opto/machnode.hpp"38#include "opto/memnode.hpp"39#include "opto/opcodes.hpp"40#include "opto/rootnode.hpp"4142#ifndef PRODUCT43void LRG::dump() const {44ttyLocker ttyl;45tty->print("%d ",num_regs());46_mask.dump();47if( _msize_valid ) {48if( mask_size() == compute_mask_size() ) tty->print(", #%d ",_mask_size);49else tty->print(", #!!!_%d_vs_%d ",_mask_size,_mask.Size());50} else {51tty->print(", #?(%d) ",_mask.Size());52}5354tty->print("EffDeg: ");55if( _degree_valid ) tty->print( "%d ", _eff_degree );56else tty->print("? ");5758if( is_multidef() ) {59tty->print("MultiDef ");60if (_defs != NULL) {61tty->print("(");62for (int i = 0; i < _defs->length(); i++) {63tty->print("N%d ", _defs->at(i)->_idx);64}65tty->print(") ");66}67}68else if( _def == 0 ) tty->print("Dead ");69else tty->print("Def: N%d ",_def->_idx);7071tty->print("Cost:%4.2g Area:%4.2g Score:%4.2g ",_cost,_area, score());72// Flags73if( _is_oop ) tty->print("Oop ");74if( _is_float ) tty->print("Float ");75if( _is_vector ) tty->print("Vector ");76if( _was_spilled1 ) tty->print("Spilled ");77if( _was_spilled2 ) tty->print("Spilled2 ");78if( _direct_conflict ) tty->print("Direct_conflict ");79if( _fat_proj ) tty->print("Fat ");80if( _was_lo ) tty->print("Lo ");81if( _has_copy ) tty->print("Copy ");82if( _at_risk ) tty->print("Risk ");8384if( _must_spill ) tty->print("Must_spill ");85if( _is_bound ) tty->print("Bound ");86if( _msize_valid ) {87if( _degree_valid && lo_degree() ) tty->print("Trivial ");88}8990tty->cr();91}92#endif9394// Compute score from cost and area. Low score is best to spill.95static double raw_score( double cost, double area ) {96return cost - (area*RegisterCostAreaRatio) * 1.52588e-5;97}9899double LRG::score() const {100// Scale _area by RegisterCostAreaRatio/64K then subtract from cost.101// Bigger area lowers score, encourages spilling this live range.102// Bigger cost raise score, prevents spilling this live range.103// (Note: 1/65536 is the magic constant below; I dont trust the C optimizer104// to turn a divide by a constant into a multiply by the reciprical).105double score = raw_score( _cost, _area);106107// Account for area. Basically, LRGs covering large areas are better108// to spill because more other LRGs get freed up.109if( _area == 0.0 ) // No area? Then no progress to spill110return 1e35;111112if( _was_spilled2 ) // If spilled once before, we are unlikely113return score + 1e30; // to make progress again.114115if( _cost >= _area*3.0 ) // Tiny area relative to cost116return score + 1e17; // Probably no progress to spill117118if( (_cost+_cost) >= _area*3.0 ) // Small area relative to cost119return score + 1e10; // Likely no progress to spill120121return score;122}123124#define NUMBUCKS 3125126// Straight out of Tarjan's union-find algorithm127uint LiveRangeMap::find_compress(uint lrg) {128uint cur = lrg;129uint next = _uf_map.at(cur);130while (next != cur) { // Scan chain of equivalences131assert( next < cur, "always union smaller");132cur = next; // until find a fixed-point133next = _uf_map.at(cur);134}135136// Core of union-find algorithm: update chain of137// equivalences to be equal to the root.138while (lrg != next) {139uint tmp = _uf_map.at(lrg);140_uf_map.at_put(lrg, next);141lrg = tmp;142}143return lrg;144}145146// Reset the Union-Find map to identity147void LiveRangeMap::reset_uf_map(uint max_lrg_id) {148_max_lrg_id= max_lrg_id;149// Force the Union-Find mapping to be at least this large150_uf_map.at_put_grow(_max_lrg_id, 0);151// Initialize it to be the ID mapping.152for (uint i = 0; i < _max_lrg_id; ++i) {153_uf_map.at_put(i, i);154}155}156157// Make all Nodes map directly to their final live range; no need for158// the Union-Find mapping after this call.159void LiveRangeMap::compress_uf_map_for_nodes() {160// For all Nodes, compress mapping161uint unique = _names.length();162for (uint i = 0; i < unique; ++i) {163uint lrg = _names.at(i);164uint compressed_lrg = find(lrg);165if (lrg != compressed_lrg) {166_names.at_put(i, compressed_lrg);167}168}169}170171// Like Find above, but no path compress, so bad asymptotic behavior172uint LiveRangeMap::find_const(uint lrg) const {173if (!lrg) {174return lrg; // Ignore the zero LRG175}176177// Off the end? This happens during debugging dumps when you got178// brand new live ranges but have not told the allocator yet.179if (lrg >= _max_lrg_id) {180return lrg;181}182183uint next = _uf_map.at(lrg);184while (next != lrg) { // Scan chain of equivalences185assert(next < lrg, "always union smaller");186lrg = next; // until find a fixed-point187next = _uf_map.at(lrg);188}189return next;190}191192PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)193: PhaseRegAlloc(unique, cfg, matcher,194#ifndef PRODUCT195print_chaitin_statistics196#else197NULL198#endif199)200, _lrg_map(Thread::current()->resource_area(), unique)201, _live(0)202, _spilled_once(Thread::current()->resource_area())203, _spilled_twice(Thread::current()->resource_area())204, _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0)205, _oldphi(unique)206#ifndef PRODUCT207, _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling"))208#endif209{210NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )211212_high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency());213214// Build a list of basic blocks, sorted by frequency215_blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());216// Experiment with sorting strategies to speed compilation217double cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket218Block **buckets[NUMBUCKS]; // Array of buckets219uint buckcnt[NUMBUCKS]; // Array of bucket counters220double buckval[NUMBUCKS]; // Array of bucket value cutoffs221for (uint i = 0; i < NUMBUCKS; i++) {222buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());223buckcnt[i] = 0;224// Bump by three orders of magnitude each time225cutoff *= 0.001;226buckval[i] = cutoff;227for (uint j = 0; j < _cfg.number_of_blocks(); j++) {228buckets[i][j] = NULL;229}230}231// Sort blocks into buckets232for (uint i = 0; i < _cfg.number_of_blocks(); i++) {233for (uint j = 0; j < NUMBUCKS; j++) {234if ((j == NUMBUCKS - 1) || (_cfg.get_block(i)->_freq > buckval[j])) {235// Assign block to end of list for appropriate bucket236buckets[j][buckcnt[j]++] = _cfg.get_block(i);237break; // kick out of inner loop238}239}240}241// Dump buckets into final block array242uint blkcnt = 0;243for (uint i = 0; i < NUMBUCKS; i++) {244for (uint j = 0; j < buckcnt[i]; j++) {245_blks[blkcnt++] = buckets[i][j];246}247}248249assert(blkcnt == _cfg.number_of_blocks(), "Block array not totally filled");250}251252// union 2 sets together.253void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) {254uint src = _lrg_map.find(src_n);255uint dst = _lrg_map.find(dst_n);256assert(src, "");257assert(dst, "");258assert(src < _lrg_map.max_lrg_id(), "oob");259assert(dst < _lrg_map.max_lrg_id(), "oob");260assert(src < dst, "always union smaller");261_lrg_map.uf_map(dst, src);262}263264void PhaseChaitin::new_lrg(const Node *x, uint lrg) {265// Make the Node->LRG mapping266_lrg_map.extend(x->_idx,lrg);267// Make the Union-Find mapping an identity function268_lrg_map.uf_extend(lrg, lrg);269}270271272int PhaseChaitin::clone_projs(Block* b, uint idx, Node* orig, Node* copy, uint& max_lrg_id) {273assert(b->find_node(copy) == (idx - 1), "incorrect insert index for copy kill projections");274DEBUG_ONLY( Block* borig = _cfg.get_block_for_node(orig); )275int found_projs = 0;276uint cnt = orig->outcnt();277for (uint i = 0; i < cnt; i++) {278Node* proj = orig->raw_out(i);279if (proj->is_MachProj()) {280assert(proj->outcnt() == 0, "only kill projections are expected here");281assert(_cfg.get_block_for_node(proj) == borig, "incorrect block for kill projections");282found_projs++;283// Copy kill projections after the cloned node284Node* kills = proj->clone();285kills->set_req(0, copy);286b->insert_node(kills, idx++);287_cfg.map_node_to_block(kills, b);288new_lrg(kills, max_lrg_id++);289}290}291return found_projs;292}293294// Renumber the live ranges to compact them. Makes the IFG smaller.295void PhaseChaitin::compact() {296// Current the _uf_map contains a series of short chains which are headed297// by a self-cycle. All the chains run from big numbers to little numbers.298// The Find() call chases the chains & shortens them for the next Find call.299// We are going to change this structure slightly. Numbers above a moving300// wave 'i' are unchanged. Numbers below 'j' point directly to their301// compacted live range with no further chaining. There are no chains or302// cycles below 'i', so the Find call no longer works.303uint j=1;304uint i;305for (i = 1; i < _lrg_map.max_lrg_id(); i++) {306uint lr = _lrg_map.uf_live_range_id(i);307// Ignore unallocated live ranges308if (!lr) {309continue;310}311assert(lr <= i, "");312_lrg_map.uf_map(i, ( lr == i ) ? j++ : _lrg_map.uf_live_range_id(lr));313}314// Now change the Node->LR mapping to reflect the compacted names315uint unique = _lrg_map.size();316for (i = 0; i < unique; i++) {317uint lrg_id = _lrg_map.live_range_id(i);318_lrg_map.map(i, _lrg_map.uf_live_range_id(lrg_id));319}320321// Reset the Union-Find mapping322_lrg_map.reset_uf_map(j);323}324325void PhaseChaitin::Register_Allocate() {326327// Above the OLD FP (and in registers) are the incoming arguments. Stack328// slots in this area are called "arg_slots". Above the NEW FP (and in329// registers) is the outgoing argument area; above that is the spill/temp330// area. These are all "frame_slots". Arg_slots start at the zero331// stack_slots and count up to the known arg_size. Frame_slots start at332// the stack_slot #arg_size and go up. After allocation I map stack333// slots to actual offsets. Stack-slots in the arg_slot area are biased334// by the frame_size; stack-slots in the frame_slot area are biased by 0.335336_trip_cnt = 0;337_alternate = 0;338_matcher._allocation_started = true;339340ResourceArea split_arena(mtCompiler); // Arena for Split local resources341ResourceArea live_arena(mtCompiler); // Arena for liveness & IFG info342ResourceMark rm(&live_arena);343344// Need live-ness for the IFG; need the IFG for coalescing. If the345// liveness is JUST for coalescing, then I can get some mileage by renaming346// all copy-related live ranges low and then using the max copy-related347// live range as a cut-off for LIVE and the IFG. In other words, I can348// build a subset of LIVE and IFG just for copies.349PhaseLive live(_cfg, _lrg_map.names(), &live_arena);350351// Need IFG for coalescing and coloring352PhaseIFG ifg(&live_arena);353_ifg = &ifg;354355// Come out of SSA world to the Named world. Assign (virtual) registers to356// Nodes. Use the same register for all inputs and the output of PhiNodes357// - effectively ending SSA form. This requires either coalescing live358// ranges or inserting copies. For the moment, we insert "virtual copies"359// - we pretend there is a copy prior to each Phi in predecessor blocks.360// We will attempt to coalesce such "virtual copies" before we manifest361// them for real.362de_ssa();363364#ifdef ASSERT365// Veify the graph before RA.366verify(&live_arena);367#endif368369{370NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )371_live = NULL; // Mark live as being not available372rm.reset_to_mark(); // Reclaim working storage373IndexSet::reset_memory(C, &live_arena);374ifg.init(_lrg_map.max_lrg_id()); // Empty IFG375gather_lrg_masks( false ); // Collect LRG masks376live.compute(_lrg_map.max_lrg_id()); // Compute liveness377_live = &live; // Mark LIVE as being available378}379380// Base pointers are currently "used" by instructions which define new381// derived pointers. This makes base pointers live up to the where the382// derived pointer is made, but not beyond. Really, they need to be live383// across any GC point where the derived value is live. So this code looks384// at all the GC points, and "stretches" the live range of any base pointer385// to the GC point.386if (stretch_base_pointer_live_ranges(&live_arena)) {387NOT_PRODUCT(Compile::TracePhase t3("computeLive (sbplr)", &_t_computeLive, TimeCompiler);)388// Since some live range stretched, I need to recompute live389_live = NULL;390rm.reset_to_mark(); // Reclaim working storage391IndexSet::reset_memory(C, &live_arena);392ifg.init(_lrg_map.max_lrg_id());393gather_lrg_masks(false);394live.compute(_lrg_map.max_lrg_id());395_live = &live;396}397// Create the interference graph using virtual copies398build_ifg_virtual(); // Include stack slots this time399400// Aggressive (but pessimistic) copy coalescing.401// This pass works on virtual copies. Any virtual copies which are not402// coalesced get manifested as actual copies403{404// The IFG is/was triangular. I am 'squaring it up' so Union can run405// faster. Union requires a 'for all' operation which is slow on the406// triangular adjacency matrix (quick reminder: the IFG is 'sparse' -407// meaning I can visit all the Nodes neighbors less than a Node in time408// O(# of neighbors), but I have to visit all the Nodes greater than a409// given Node and search them for an instance, i.e., time O(#MaxLRG)).410_ifg->SquareUp();411412PhaseAggressiveCoalesce coalesce(*this);413coalesce.coalesce_driver();414// Insert un-coalesced copies. Visit all Phis. Where inputs to a Phi do415// not match the Phi itself, insert a copy.416coalesce.insert_copies(_matcher);417if (C->failing()) {418return;419}420}421422// After aggressive coalesce, attempt a first cut at coloring.423// To color, we need the IFG and for that we need LIVE.424{425NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )426_live = NULL;427rm.reset_to_mark(); // Reclaim working storage428IndexSet::reset_memory(C, &live_arena);429ifg.init(_lrg_map.max_lrg_id());430gather_lrg_masks( true );431live.compute(_lrg_map.max_lrg_id());432_live = &live;433}434435// Build physical interference graph436uint must_spill = 0;437must_spill = build_ifg_physical(&live_arena);438// If we have a guaranteed spill, might as well spill now439if (must_spill) {440if(!_lrg_map.max_lrg_id()) {441return;442}443// Bail out if unique gets too large (ie - unique > MaxNodeLimit)444C->check_node_count(10*must_spill, "out of nodes before split");445if (C->failing()) {446return;447}448449uint new_max_lrg_id = Split(_lrg_map.max_lrg_id(), &split_arena); // Split spilling LRG everywhere450_lrg_map.set_max_lrg_id(new_max_lrg_id);451// Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)452// or we failed to split453C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after physical split");454if (C->failing()) {455return;456}457458NOT_PRODUCT(C->verify_graph_edges();)459460compact(); // Compact LRGs; return new lower max lrg461462{463NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )464_live = NULL;465rm.reset_to_mark(); // Reclaim working storage466IndexSet::reset_memory(C, &live_arena);467ifg.init(_lrg_map.max_lrg_id()); // Build a new interference graph468gather_lrg_masks( true ); // Collect intersect mask469live.compute(_lrg_map.max_lrg_id()); // Compute LIVE470_live = &live;471}472build_ifg_physical(&live_arena);473_ifg->SquareUp();474_ifg->Compute_Effective_Degree();475// Only do conservative coalescing if requested476if (OptoCoalesce) {477// Conservative (and pessimistic) copy coalescing of those spills478PhaseConservativeCoalesce coalesce(*this);479// If max live ranges greater than cutoff, don't color the stack.480// This cutoff can be larger than below since it is only done once.481coalesce.coalesce_driver();482}483_lrg_map.compress_uf_map_for_nodes();484485#ifdef ASSERT486verify(&live_arena, true);487#endif488} else {489ifg.SquareUp();490ifg.Compute_Effective_Degree();491#ifdef ASSERT492set_was_low();493#endif494}495496// Prepare for Simplify & Select497cache_lrg_info(); // Count degree of LRGs498499// Simplify the InterFerence Graph by removing LRGs of low degree.500// LRGs of low degree are trivially colorable.501Simplify();502503// Select colors by re-inserting LRGs back into the IFG in reverse order.504// Return whether or not something spills.505uint spills = Select( );506507// If we spill, split and recycle the entire thing508while( spills ) {509if( _trip_cnt++ > 24 ) {510DEBUG_ONLY( dump_for_spill_split_recycle(); )511if( _trip_cnt > 27 ) {512C->record_method_not_compilable("failed spill-split-recycle sanity check");513return;514}515}516517if (!_lrg_map.max_lrg_id()) {518return;519}520uint new_max_lrg_id = Split(_lrg_map.max_lrg_id(), &split_arena); // Split spilling LRG everywhere521_lrg_map.set_max_lrg_id(new_max_lrg_id);522// Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)523C->check_node_count(2 * NodeLimitFudgeFactor, "out of nodes after split");524if (C->failing()) {525return;526}527528compact(); // Compact LRGs; return new lower max lrg529530// Nuke the live-ness and interference graph and LiveRanGe info531{532NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )533_live = NULL;534rm.reset_to_mark(); // Reclaim working storage535IndexSet::reset_memory(C, &live_arena);536ifg.init(_lrg_map.max_lrg_id());537538// Create LiveRanGe array.539// Intersect register masks for all USEs and DEFs540gather_lrg_masks(true);541live.compute(_lrg_map.max_lrg_id());542_live = &live;543}544must_spill = build_ifg_physical(&live_arena);545_ifg->SquareUp();546_ifg->Compute_Effective_Degree();547548// Only do conservative coalescing if requested549if (OptoCoalesce) {550// Conservative (and pessimistic) copy coalescing551PhaseConservativeCoalesce coalesce(*this);552// Check for few live ranges determines how aggressive coalesce is.553coalesce.coalesce_driver();554}555_lrg_map.compress_uf_map_for_nodes();556#ifdef ASSERT557verify(&live_arena, true);558#endif559cache_lrg_info(); // Count degree of LRGs560561// Simplify the InterFerence Graph by removing LRGs of low degree.562// LRGs of low degree are trivially colorable.563Simplify();564565// Select colors by re-inserting LRGs back into the IFG in reverse order.566// Return whether or not something spills.567spills = Select();568}569570// Count number of Simplify-Select trips per coloring success.571_allocator_attempts += _trip_cnt + 1;572_allocator_successes += 1;573574// Peephole remove copies575post_allocate_copy_removal();576577// Merge multidefs if multiple defs representing the same value are used in a single block.578merge_multidefs();579580#ifdef ASSERT581// Veify the graph after RA.582verify(&live_arena);583#endif584585// max_reg is past the largest *register* used.586// Convert that to a frame_slot number.587if (_max_reg <= _matcher._new_SP) {588_framesize = C->out_preserve_stack_slots();589}590else {591_framesize = _max_reg -_matcher._new_SP;592}593assert((int)(_matcher._new_SP+_framesize) >= (int)_matcher._out_arg_limit, "framesize must be large enough");594595// This frame must preserve the required fp alignment596_framesize = round_to(_framesize, Matcher::stack_alignment_in_slots());597assert( _framesize >= 0 && _framesize <= 1000000, "sanity check" );598#ifndef PRODUCT599_total_framesize += _framesize;600if ((int)_framesize > _max_framesize) {601_max_framesize = _framesize;602}603#endif604605// Convert CISC spills606fixup_spills();607608// Log regalloc results609CompileLog* log = Compile::current()->log();610if (log != NULL) {611log->elem("regalloc attempts='%d' success='%d'", _trip_cnt, !C->failing());612}613614if (C->failing()) {615return;616}617618NOT_PRODUCT(C->verify_graph_edges();)619620// Move important info out of the live_arena to longer lasting storage.621alloc_node_regs(_lrg_map.size());622for (uint i=0; i < _lrg_map.size(); i++) {623if (_lrg_map.live_range_id(i)) { // Live range associated with Node?624LRG &lrg = lrgs(_lrg_map.live_range_id(i));625if (!lrg.alive()) {626set_bad(i);627} else if (lrg.num_regs() == 1) {628set1(i, lrg.reg());629} else { // Must be a register-set630if (!lrg._fat_proj) { // Must be aligned adjacent register set631// Live ranges record the highest register in their mask.632// We want the low register for the AD file writer's convenience.633OptoReg::Name hi = lrg.reg(); // Get hi register634OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo635// We have to use pair [lo,lo+1] even for wide vectors because636// the rest of code generation works only with pairs. It is safe637// since for registers encoding only 'lo' is used.638// Second reg from pair is used in ScheduleAndBundle on SPARC where639// vector max size is 8 which corresponds to registers pair.640// It is also used in BuildOopMaps but oop operations are not641// vectorized.642set2(i, lo);643} else { // Misaligned; extract 2 bits644OptoReg::Name hi = lrg.reg(); // Get hi register645lrg.Remove(hi); // Yank from mask646int lo = lrg.mask().find_first_elem(); // Find lo647set_pair(i, hi, lo);648}649}650if( lrg._is_oop ) _node_oops.set(i);651} else {652set_bad(i);653}654}655656// Done!657_live = NULL;658_ifg = NULL;659C->set_indexSet_arena(NULL); // ResourceArea is at end of scope660}661662void PhaseChaitin::de_ssa() {663// Set initial Names for all Nodes. Most Nodes get the virtual register664// number. A few get the ZERO live range number. These do not665// get allocated, but instead rely on correct scheduling to ensure that666// only one instance is simultaneously live at a time.667uint lr_counter = 1;668for( uint i = 0; i < _cfg.number_of_blocks(); i++ ) {669Block* block = _cfg.get_block(i);670uint cnt = block->number_of_nodes();671672// Handle all the normal Nodes in the block673for( uint j = 0; j < cnt; j++ ) {674Node *n = block->get_node(j);675// Pre-color to the zero live range, or pick virtual register676const RegMask &rm = n->out_RegMask();677_lrg_map.map(n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0);678}679}680681// Reset the Union-Find mapping to be identity682_lrg_map.reset_uf_map(lr_counter);683}684685686// Gather LiveRanGe information, including register masks. Modification of687// cisc spillable in_RegMasks should not be done before AggressiveCoalesce.688void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {689690// Nail down the frame pointer live range691uint fp_lrg = _lrg_map.live_range_id(_cfg.get_root_node()->in(1)->in(TypeFunc::FramePtr));692lrgs(fp_lrg)._cost += 1e12; // Cost is infinite693694// For all blocks695for (uint i = 0; i < _cfg.number_of_blocks(); i++) {696Block* block = _cfg.get_block(i);697698// For all instructions699for (uint j = 1; j < block->number_of_nodes(); j++) {700Node* n = block->get_node(j);701uint input_edge_start =1; // Skip control most nodes702if (n->is_Mach()) {703input_edge_start = n->as_Mach()->oper_input_base();704}705uint idx = n->is_Copy();706707// Get virtual register number, same as LiveRanGe index708uint vreg = _lrg_map.live_range_id(n);709LRG& lrg = lrgs(vreg);710if (vreg) { // No vreg means un-allocable (e.g. memory)711712// Collect has-copy bit713if (idx) {714lrg._has_copy = 1;715uint clidx = _lrg_map.live_range_id(n->in(idx));716LRG& copy_src = lrgs(clidx);717copy_src._has_copy = 1;718}719720// Check for float-vs-int live range (used in register-pressure721// calculations)722const Type *n_type = n->bottom_type();723if (n_type->is_floatingpoint()) {724lrg._is_float = 1;725}726727// Check for twice prior spilling. Once prior spilling might have728// spilled 'soft', 2nd prior spill should have spilled 'hard' and729// further spilling is unlikely to make progress.730if (_spilled_once.test(n->_idx)) {731lrg._was_spilled1 = 1;732if (_spilled_twice.test(n->_idx)) {733lrg._was_spilled2 = 1;734}735}736737#ifndef PRODUCT738if (trace_spilling() && lrg._def != NULL) {739// collect defs for MultiDef printing740if (lrg._defs == NULL) {741lrg._defs = new (_ifg->_arena) GrowableArray<Node*>(_ifg->_arena, 2, 0, NULL);742lrg._defs->append(lrg._def);743}744lrg._defs->append(n);745}746#endif747748// Check for a single def LRG; these can spill nicely749// via rematerialization. Flag as NULL for no def found750// yet, or 'n' for single def or -1 for many defs.751lrg._def = lrg._def ? NodeSentinel : n;752753// Limit result register mask to acceptable registers754const RegMask &rm = n->out_RegMask();755lrg.AND( rm );756757uint ireg = n->ideal_reg();758assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP,759"oops must be in Op_RegP's" );760761// Check for vector live range (only if vector register is used).762// On SPARC vector uses RegD which could be misaligned so it is not763// processes as vector in RA.764if (RegMask::is_vector(ireg))765lrg._is_vector = 1;766assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,767"vector must be in vector registers");768769// Check for bound register masks770const RegMask &lrgmask = lrg.mask();771if (lrgmask.is_bound(ireg)) {772lrg._is_bound = 1;773}774775// Check for maximum frequency value776if (lrg._maxfreq < block->_freq) {777lrg._maxfreq = block->_freq;778}779780// Check for oop-iness, or long/double781// Check for multi-kill projection782switch (ireg) {783case MachProjNode::fat_proj:784// Fat projections have size equal to number of registers killed785lrg.set_num_regs(rm.Size());786lrg.set_reg_pressure(lrg.num_regs());787lrg._fat_proj = 1;788lrg._is_bound = 1;789break;790case Op_RegP:791#ifdef _LP64792lrg.set_num_regs(2); // Size is 2 stack words793#else794lrg.set_num_regs(1); // Size is 1 stack word795#endif796// Register pressure is tracked relative to the maximum values797// suggested for that platform, INTPRESSURE and FLOATPRESSURE,798// and relative to other types which compete for the same regs.799//800// The following table contains suggested values based on the801// architectures as defined in each .ad file.802// INTPRESSURE and FLOATPRESSURE may be tuned differently for803// compile-speed or performance.804// Note1:805// SPARC and SPARCV9 reg_pressures are at 2 instead of 1806// since .ad registers are defined as high and low halves.807// These reg_pressure values remain compatible with the code808// in is_high_pressure() which relates get_invalid_mask_size(),809// Block::_reg_pressure and INTPRESSURE, FLOATPRESSURE.810// Note2:811// SPARC -d32 has 24 registers available for integral values,812// but only 10 of these are safe for 64-bit longs.813// Using set_reg_pressure(2) for both int and long means814// the allocator will believe it can fit 26 longs into815// registers. Using 2 for longs and 1 for ints means the816// allocator will attempt to put 52 integers into registers.817// The settings below limit this problem to methods with818// many long values which are being run on 32-bit SPARC.819//820// ------------------- reg_pressure --------------------821// Each entry is reg_pressure_per_value,number_of_regs822// RegL RegI RegFlags RegF RegD INTPRESSURE FLOATPRESSURE823// IA32 2 1 1 1 1 6 6824// IA64 1 1 1 1 1 50 41825// SPARC 2 2 2 2 2 48 (24) 52 (26)826// SPARCV9 2 2 2 2 2 48 (24) 52 (26)827// AMD64 1 1 1 1 1 14 15828// -----------------------------------------------------829#if defined(SPARC)830lrg.set_reg_pressure(2); // use for v9 as well831#else832lrg.set_reg_pressure(1); // normally one value per register833#endif834if( n_type->isa_oop_ptr() ) {835lrg._is_oop = 1;836}837break;838case Op_RegL: // Check for long or double839case Op_RegD:840lrg.set_num_regs(2);841// Define platform specific register pressure842#if defined(SPARC) || defined(ARM32)843lrg.set_reg_pressure(2);844#elif defined(IA32)845if( ireg == Op_RegL ) {846lrg.set_reg_pressure(2);847} else {848lrg.set_reg_pressure(1);849}850#else851lrg.set_reg_pressure(1); // normally one value per register852#endif853// If this def of a double forces a mis-aligned double,854// flag as '_fat_proj' - really flag as allowing misalignment855// AND changes how we count interferences. A mis-aligned856// double can interfere with TWO aligned pairs, or effectively857// FOUR registers!858if (rm.is_misaligned_pair()) {859lrg._fat_proj = 1;860lrg._is_bound = 1;861}862break;863case Op_RegF:864case Op_RegI:865case Op_RegN:866case Op_RegFlags:867case 0: // not an ideal register868lrg.set_num_regs(1);869#ifdef SPARC870lrg.set_reg_pressure(2);871#else872lrg.set_reg_pressure(1);873#endif874break;875case Op_VecS:876assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");877assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");878lrg.set_num_regs(RegMask::SlotsPerVecS);879lrg.set_reg_pressure(1);880break;881case Op_VecD:882assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecD), "sanity");883assert(RegMask::num_registers(Op_VecD) == RegMask::SlotsPerVecD, "sanity");884assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecD), "vector should be aligned");885lrg.set_num_regs(RegMask::SlotsPerVecD);886lrg.set_reg_pressure(1);887break;888case Op_VecX:889assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecX), "sanity");890assert(RegMask::num_registers(Op_VecX) == RegMask::SlotsPerVecX, "sanity");891assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecX), "vector should be aligned");892lrg.set_num_regs(RegMask::SlotsPerVecX);893lrg.set_reg_pressure(1);894break;895case Op_VecY:896assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecY), "sanity");897assert(RegMask::num_registers(Op_VecY) == RegMask::SlotsPerVecY, "sanity");898assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecY), "vector should be aligned");899lrg.set_num_regs(RegMask::SlotsPerVecY);900lrg.set_reg_pressure(1);901break;902default:903ShouldNotReachHere();904}905}906907// Now do the same for inputs908uint cnt = n->req();909// Setup for CISC SPILLING910uint inp = (uint)AdlcVMDeps::Not_cisc_spillable;911if( UseCISCSpill && after_aggressive ) {912inp = n->cisc_operand();913if( inp != (uint)AdlcVMDeps::Not_cisc_spillable )914// Convert operand number to edge index number915inp = n->as_Mach()->operand_index(inp);916}917// Prepare register mask for each input918for( uint k = input_edge_start; k < cnt; k++ ) {919uint vreg = _lrg_map.live_range_id(n->in(k));920if (!vreg) {921continue;922}923924// If this instruction is CISC Spillable, add the flags925// bit to its appropriate input926if( UseCISCSpill && after_aggressive && inp == k ) {927#ifndef PRODUCT928if( TraceCISCSpill ) {929tty->print(" use_cisc_RegMask: ");930n->dump();931}932#endif933n->as_Mach()->use_cisc_RegMask();934}935936LRG &lrg = lrgs(vreg);937// // Testing for floating point code shape938// Node *test = n->in(k);939// if( test->is_Mach() ) {940// MachNode *m = test->as_Mach();941// int op = m->ideal_Opcode();942// if (n->is_Call() && (op == Op_AddF || op == Op_MulF) ) {943// int zzz = 1;944// }945// }946947// Limit result register mask to acceptable registers.948// Do not limit registers from uncommon uses before949// AggressiveCoalesce. This effectively pre-virtual-splits950// around uncommon uses of common defs.951const RegMask &rm = n->in_RegMask(k);952if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * block->_freq) {953// Since we are BEFORE aggressive coalesce, leave the register954// mask untrimmed by the call. This encourages more coalescing.955// Later, AFTER aggressive, this live range will have to spill956// but the spiller handles slow-path calls very nicely.957} else {958lrg.AND( rm );959}960961// Check for bound register masks962const RegMask &lrgmask = lrg.mask();963uint kreg = n->in(k)->ideal_reg();964bool is_vect = RegMask::is_vector(kreg);965assert(n->in(k)->bottom_type()->isa_vect() == NULL ||966is_vect || kreg == Op_RegD || kreg == Op_RegL,967"vector must be in vector registers");968if (lrgmask.is_bound(kreg))969lrg._is_bound = 1;970971// If this use of a double forces a mis-aligned double,972// flag as '_fat_proj' - really flag as allowing misalignment973// AND changes how we count interferences. A mis-aligned974// double can interfere with TWO aligned pairs, or effectively975// FOUR registers!976#ifdef ASSERT977if (is_vect) {978assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned");979assert(!lrg._fat_proj, "sanity");980assert(RegMask::num_registers(kreg) == lrg.num_regs(), "sanity");981}982#endif983if (!is_vect && lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_pair()) {984lrg._fat_proj = 1;985lrg._is_bound = 1;986}987// if the LRG is an unaligned pair, we will have to spill988// so clear the LRG's register mask if it is not already spilled989if (!is_vect && !n->is_SpillCopy() &&990(lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) &&991lrgmask.is_misaligned_pair()) {992lrg.Clear();993}994995// Check for maximum frequency value996if (lrg._maxfreq < block->_freq) {997lrg._maxfreq = block->_freq;998}9991000} // End for all allocated inputs1001} // end for all instructions1002} // end for all blocks10031004// Final per-liverange setup1005for (uint i2 = 0; i2 < _lrg_map.max_lrg_id(); i2++) {1006LRG &lrg = lrgs(i2);1007assert(!lrg._is_vector || !lrg._fat_proj, "sanity");1008if (lrg.num_regs() > 1 && !lrg._fat_proj) {1009lrg.clear_to_sets();1010}1011lrg.compute_set_mask_size();1012if (lrg.not_free()) { // Handle case where we lose from the start1013lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));1014lrg._direct_conflict = 1;1015}1016lrg.set_degree(0); // no neighbors in IFG yet1017}1018}10191020// Set the was-lo-degree bit. Conservative coalescing should not change the1021// colorability of the graph. If any live range was of low-degree before1022// coalescing, it should Simplify. This call sets the was-lo-degree bit.1023// The bit is checked in Simplify.1024void PhaseChaitin::set_was_low() {1025#ifdef ASSERT1026for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {1027int size = lrgs(i).num_regs();1028uint old_was_lo = lrgs(i)._was_lo;1029lrgs(i)._was_lo = 0;1030if( lrgs(i).lo_degree() ) {1031lrgs(i)._was_lo = 1; // Trivially of low degree1032} else { // Else check the Brigg's assertion1033// Brigg's observation is that the lo-degree neighbors of a1034// hi-degree live range will not interfere with the color choices1035// of said hi-degree live range. The Simplify reverse-stack-coloring1036// order takes care of the details. Hence you do not have to count1037// low-degree neighbors when determining if this guy colors.1038int briggs_degree = 0;1039IndexSet *s = _ifg->neighbors(i);1040IndexSetIterator elements(s);1041uint lidx;1042while((lidx = elements.next()) != 0) {1043if( !lrgs(lidx).lo_degree() )1044briggs_degree += MAX2(size,lrgs(lidx).num_regs());1045}1046if( briggs_degree < lrgs(i).degrees_of_freedom() )1047lrgs(i)._was_lo = 1; // Low degree via the briggs assertion1048}1049assert(old_was_lo <= lrgs(i)._was_lo, "_was_lo may not decrease");1050}1051#endif1052}10531054#define REGISTER_CONSTRAINED 1610551056// Compute cost/area ratio, in case we spill. Build the lo-degree list.1057void PhaseChaitin::cache_lrg_info( ) {10581059for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {1060LRG &lrg = lrgs(i);10611062// Check for being of low degree: means we can be trivially colored.1063// Low degree, dead or must-spill guys just get to simplify right away1064if( lrg.lo_degree() ||1065!lrg.alive() ||1066lrg._must_spill ) {1067// Split low degree list into those guys that must get a1068// register and those that can go to register or stack.1069// The idea is LRGs that can go register or stack color first when1070// they have a good chance of getting a register. The register-only1071// lo-degree live ranges always get a register.1072OptoReg::Name hi_reg = lrg.mask().find_last_elem();1073if( OptoReg::is_stack(hi_reg)) { // Can go to stack?1074lrg._next = _lo_stk_degree;1075_lo_stk_degree = i;1076} else {1077lrg._next = _lo_degree;1078_lo_degree = i;1079}1080} else { // Else high degree1081lrgs(_hi_degree)._prev = i;1082lrg._next = _hi_degree;1083lrg._prev = 0;1084_hi_degree = i;1085}1086}1087}10881089// Simplify the IFG by removing LRGs of low degree that have NO copies1090void PhaseChaitin::Pre_Simplify( ) {10911092// Warm up the lo-degree no-copy list1093int lo_no_copy = 0;1094for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {1095if ((lrgs(i).lo_degree() && !lrgs(i)._has_copy) ||1096!lrgs(i).alive() ||1097lrgs(i)._must_spill) {1098lrgs(i)._next = lo_no_copy;1099lo_no_copy = i;1100}1101}11021103while( lo_no_copy ) {1104uint lo = lo_no_copy;1105lo_no_copy = lrgs(lo)._next;1106int size = lrgs(lo).num_regs();11071108// Put the simplified guy on the simplified list.1109lrgs(lo)._next = _simplified;1110_simplified = lo;11111112// Yank this guy from the IFG.1113IndexSet *adj = _ifg->remove_node( lo );11141115// If any neighbors' degrees fall below their number of1116// allowed registers, then put that neighbor on the low degree1117// list. Note that 'degree' can only fall and 'numregs' is1118// unchanged by this action. Thus the two are equal at most once,1119// so LRGs hit the lo-degree worklists at most once.1120IndexSetIterator elements(adj);1121uint neighbor;1122while ((neighbor = elements.next()) != 0) {1123LRG *n = &lrgs(neighbor);1124assert( _ifg->effective_degree(neighbor) == n->degree(), "" );11251126// Check for just becoming of-low-degree1127if( n->just_lo_degree() && !n->_has_copy ) {1128assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice");1129// Put on lo-degree list1130n->_next = lo_no_copy;1131lo_no_copy = neighbor;1132}1133}1134} // End of while lo-degree no_copy worklist not empty11351136// No more lo-degree no-copy live ranges to simplify1137}11381139// Simplify the IFG by removing LRGs of low degree.1140void PhaseChaitin::Simplify( ) {11411142while( 1 ) { // Repeat till simplified it all1143// May want to explore simplifying lo_degree before _lo_stk_degree.1144// This might result in more spills coloring into registers during1145// Select().1146while( _lo_degree || _lo_stk_degree ) {1147// If possible, pull from lo_stk first1148uint lo;1149if( _lo_degree ) {1150lo = _lo_degree;1151_lo_degree = lrgs(lo)._next;1152} else {1153lo = _lo_stk_degree;1154_lo_stk_degree = lrgs(lo)._next;1155}11561157// Put the simplified guy on the simplified list.1158lrgs(lo)._next = _simplified;1159_simplified = lo;1160// If this guy is "at risk" then mark his current neighbors1161if( lrgs(lo)._at_risk ) {1162IndexSetIterator elements(_ifg->neighbors(lo));1163uint datum;1164while ((datum = elements.next()) != 0) {1165lrgs(datum)._risk_bias = lo;1166}1167}11681169// Yank this guy from the IFG.1170IndexSet *adj = _ifg->remove_node( lo );11711172// If any neighbors' degrees fall below their number of1173// allowed registers, then put that neighbor on the low degree1174// list. Note that 'degree' can only fall and 'numregs' is1175// unchanged by this action. Thus the two are equal at most once,1176// so LRGs hit the lo-degree worklist at most once.1177IndexSetIterator elements(adj);1178uint neighbor;1179while ((neighbor = elements.next()) != 0) {1180LRG *n = &lrgs(neighbor);1181#ifdef ASSERT1182if( VerifyOpto || VerifyRegisterAllocator ) {1183assert( _ifg->effective_degree(neighbor) == n->degree(), "" );1184}1185#endif11861187// Check for just becoming of-low-degree just counting registers.1188// _must_spill live ranges are already on the low degree list.1189if( n->just_lo_degree() && !n->_must_spill ) {1190assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice");1191// Pull from hi-degree list1192uint prev = n->_prev;1193uint next = n->_next;1194if( prev ) lrgs(prev)._next = next;1195else _hi_degree = next;1196lrgs(next)._prev = prev;1197n->_next = _lo_degree;1198_lo_degree = neighbor;1199}1200}1201} // End of while lo-degree/lo_stk_degree worklist not empty12021203// Check for got everything: is hi-degree list empty?1204if( !_hi_degree ) break;12051206// Time to pick a potential spill guy1207uint lo_score = _hi_degree;1208double score = lrgs(lo_score).score();1209double area = lrgs(lo_score)._area;1210double cost = lrgs(lo_score)._cost;1211bool bound = lrgs(lo_score)._is_bound;12121213// Find cheapest guy1214debug_only( int lo_no_simplify=0; );1215for( uint i = _hi_degree; i; i = lrgs(i)._next ) {1216assert( !(*_ifg->_yanked)[i], "" );1217// It's just vaguely possible to move hi-degree to lo-degree without1218// going through a just-lo-degree stage: If you remove a double from1219// a float live range it's degree will drop by 2 and you can skip the1220// just-lo-degree stage. It's very rare (shows up after 5000+ methods1221// in -Xcomp of Java2Demo). So just choose this guy to simplify next.1222if( lrgs(i).lo_degree() ) {1223lo_score = i;1224break;1225}1226debug_only( if( lrgs(i)._was_lo ) lo_no_simplify=i; );1227double iscore = lrgs(i).score();1228double iarea = lrgs(i)._area;1229double icost = lrgs(i)._cost;1230bool ibound = lrgs(i)._is_bound;12311232// Compare cost/area of i vs cost/area of lo_score. Smaller cost/area1233// wins. Ties happen because all live ranges in question have spilled1234// a few times before and the spill-score adds a huge number which1235// washes out the low order bits. We are choosing the lesser of 21236// evils; in this case pick largest area to spill.1237// Ties also happen when live ranges are defined and used only inside1238// one block. In which case their area is 0 and score set to max.1239// In such case choose bound live range over unbound to free registers1240// or with smaller cost to spill.1241if( iscore < score ||1242(iscore == score && iarea > area && lrgs(lo_score)._was_spilled2) ||1243(iscore == score && iarea == area &&1244( (ibound && !bound) || ibound == bound && (icost < cost) )) ) {1245lo_score = i;1246score = iscore;1247area = iarea;1248cost = icost;1249bound = ibound;1250}1251}1252LRG *lo_lrg = &lrgs(lo_score);1253// The live range we choose for spilling is either hi-degree, or very1254// rarely it can be low-degree. If we choose a hi-degree live range1255// there better not be any lo-degree choices.1256assert( lo_lrg->lo_degree() || !lo_no_simplify, "Live range was lo-degree before coalesce; should simplify" );12571258// Pull from hi-degree list1259uint prev = lo_lrg->_prev;1260uint next = lo_lrg->_next;1261if( prev ) lrgs(prev)._next = next;1262else _hi_degree = next;1263lrgs(next)._prev = prev;1264// Jam him on the lo-degree list, despite his high degree.1265// Maybe he'll get a color, and maybe he'll spill.1266// Only Select() will know.1267lrgs(lo_score)._at_risk = true;1268_lo_degree = lo_score;1269lo_lrg->_next = 0;12701271} // End of while not simplified everything12721273}12741275// Is 'reg' register legal for 'lrg'?1276static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {1277if (reg >= chunk && reg < (chunk + RegMask::CHUNK_SIZE) &&1278lrg.mask().Member(OptoReg::add(reg,-chunk))) {1279// RA uses OptoReg which represent the highest element of a registers set.1280// For example, vectorX (128bit) on x86 uses [XMM,XMMb,XMMc,XMMd] set1281// in which XMMd is used by RA to represent such vectors. A double value1282// uses [XMM,XMMb] pairs and XMMb is used by RA for it.1283// The register mask uses largest bits set of overlapping register sets.1284// On x86 with AVX it uses 8 bits for each XMM registers set.1285//1286// The 'lrg' already has cleared-to-set register mask (done in Select()1287// before calling choose_color()). Passing mask.Member(reg) check above1288// indicates that the size (num_regs) of 'reg' set is less or equal to1289// 'lrg' set size.1290// For set size 1 any register which is member of 'lrg' mask is legal.1291if (lrg.num_regs()==1)1292return true;1293// For larger sets only an aligned register with the same set size is legal.1294int mask = lrg.num_regs()-1;1295if ((reg&mask) == mask)1296return true;1297}1298return false;1299}13001301// Choose a color using the biasing heuristic1302OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {13031304// Check for "at_risk" LRG's1305uint risk_lrg = _lrg_map.find(lrg._risk_bias);1306if( risk_lrg != 0 ) {1307// Walk the colored neighbors of the "at_risk" candidate1308// Choose a color which is both legal and already taken by a neighbor1309// of the "at_risk" candidate in order to improve the chances of the1310// "at_risk" candidate of coloring1311IndexSetIterator elements(_ifg->neighbors(risk_lrg));1312uint datum;1313while ((datum = elements.next()) != 0) {1314OptoReg::Name reg = lrgs(datum).reg();1315// If this LRG's register is legal for us, choose it1316if (is_legal_reg(lrg, reg, chunk))1317return reg;1318}1319}13201321uint copy_lrg = _lrg_map.find(lrg._copy_bias);1322if( copy_lrg != 0 ) {1323// If he has a color,1324if( !(*(_ifg->_yanked))[copy_lrg] ) {1325OptoReg::Name reg = lrgs(copy_lrg).reg();1326// And it is legal for you,1327if (is_legal_reg(lrg, reg, chunk))1328return reg;1329} else if( chunk == 0 ) {1330// Choose a color which is legal for him1331RegMask tempmask = lrg.mask();1332tempmask.AND(lrgs(copy_lrg).mask());1333tempmask.clear_to_sets(lrg.num_regs());1334OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());1335if (OptoReg::is_valid(reg))1336return reg;1337}1338}13391340// If no bias info exists, just go with the register selection ordering1341if (lrg._is_vector || lrg.num_regs() == 2) {1342// Find an aligned set1343return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);1344}13451346// CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate1347// copy removal to remove many more copies, by preventing a just-assigned1348// register from being repeatedly assigned.1349OptoReg::Name reg = lrg.mask().find_first_elem();1350if( (++_alternate & 1) && OptoReg::is_valid(reg) ) {1351// This 'Remove; find; Insert' idiom is an expensive way to find the1352// SECOND element in the mask.1353lrg.Remove(reg);1354OptoReg::Name reg2 = lrg.mask().find_first_elem();1355lrg.Insert(reg);1356if( OptoReg::is_reg(reg2))1357reg = reg2;1358}1359return OptoReg::add( reg, chunk );1360}13611362// Choose a color in the current chunk1363OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) {1364assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)");1365assert(C->out_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP+0)), "must not allocate stack0 (inside preserve area)");13661367if( lrg.num_regs() == 1 || // Common Case1368!lrg._fat_proj ) // Aligned+adjacent pairs ok1369// Use a heuristic to "bias" the color choice1370return bias_color(lrg, chunk);13711372assert(!lrg._is_vector, "should be not vector here" );1373assert( lrg.num_regs() >= 2, "dead live ranges do not color" );13741375// Fat-proj case or misaligned double argument.1376assert(lrg.compute_mask_size() == lrg.num_regs() ||1377lrg.num_regs() == 2,"fat projs exactly color" );1378assert( !chunk, "always color in 1st chunk" );1379// Return the highest element in the set.1380return lrg.mask().find_last_elem();1381}13821383// Select colors by re-inserting LRGs back into the IFG. LRGs are re-inserted1384// in reverse order of removal. As long as nothing of hi-degree was yanked,1385// everything going back is guaranteed a color. Select that color. If some1386// hi-degree LRG cannot get a color then we record that we must spill.1387uint PhaseChaitin::Select( ) {1388uint spill_reg = LRG::SPILL_REG;1389_max_reg = OptoReg::Name(0); // Past max register used1390while( _simplified ) {1391// Pull next LRG from the simplified list - in reverse order of removal1392uint lidx = _simplified;1393LRG *lrg = &lrgs(lidx);1394_simplified = lrg->_next;139513961397#ifndef PRODUCT1398if (trace_spilling()) {1399ttyLocker ttyl;1400tty->print_cr("L%d selecting degree %d degrees_of_freedom %d", lidx, lrg->degree(),1401lrg->degrees_of_freedom());1402lrg->dump();1403}1404#endif14051406// Re-insert into the IFG1407_ifg->re_insert(lidx);1408if( !lrg->alive() ) continue;1409// capture allstackedness flag before mask is hacked1410const int is_allstack = lrg->mask().is_AllStack();14111412// Yeah, yeah, yeah, I know, I know. I can refactor this1413// to avoid the GOTO, although the refactored code will not1414// be much clearer. We arrive here IFF we have a stack-based1415// live range that cannot color in the current chunk, and it1416// has to move into the next free stack chunk.1417int chunk = 0; // Current chunk is first chunk1418retry_next_chunk:14191420// Remove neighbor colors1421IndexSet *s = _ifg->neighbors(lidx);14221423debug_only(RegMask orig_mask = lrg->mask();)1424IndexSetIterator elements(s);1425uint neighbor;1426while ((neighbor = elements.next()) != 0) {1427// Note that neighbor might be a spill_reg. In this case, exclusion1428// of its color will be a no-op, since the spill_reg chunk is in outer1429// space. Also, if neighbor is in a different chunk, this exclusion1430// will be a no-op. (Later on, if lrg runs out of possible colors in1431// its chunk, a new chunk of color may be tried, in which case1432// examination of neighbors is started again, at retry_next_chunk.)1433LRG &nlrg = lrgs(neighbor);1434OptoReg::Name nreg = nlrg.reg();1435// Only subtract masks in the same chunk1436if( nreg >= chunk && nreg < chunk + RegMask::CHUNK_SIZE ) {1437#ifndef PRODUCT1438uint size = lrg->mask().Size();1439RegMask rm = lrg->mask();1440#endif1441lrg->SUBTRACT(nlrg.mask());1442#ifndef PRODUCT1443if (trace_spilling() && lrg->mask().Size() != size) {1444ttyLocker ttyl;1445tty->print("L%d ", lidx);1446rm.dump();1447tty->print(" intersected L%d ", neighbor);1448nlrg.mask().dump();1449tty->print(" removed ");1450rm.SUBTRACT(lrg->mask());1451rm.dump();1452tty->print(" leaving ");1453lrg->mask().dump();1454tty->cr();1455}1456#endif1457}1458}1459//assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness");1460// Aligned pairs need aligned masks1461assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");1462if (lrg->num_regs() > 1 && !lrg->_fat_proj) {1463lrg->clear_to_sets();1464}14651466// Check if a color is available and if so pick the color1467OptoReg::Name reg = choose_color( *lrg, chunk );1468#ifdef SPARC1469debug_only(lrg->compute_set_mask_size());1470assert(lrg->num_regs() < 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned");1471#endif14721473//---------------1474// If we fail to color and the AllStack flag is set, trigger1475// a chunk-rollover event1476if(!OptoReg::is_valid(OptoReg::add(reg,-chunk)) && is_allstack) {1477// Bump register mask up to next stack chunk1478chunk += RegMask::CHUNK_SIZE;1479lrg->Set_All();14801481goto retry_next_chunk;1482}14831484//---------------1485// Did we get a color?1486else if( OptoReg::is_valid(reg)) {1487#ifndef PRODUCT1488RegMask avail_rm = lrg->mask();1489#endif14901491// Record selected register1492lrg->set_reg(reg);14931494if( reg >= _max_reg ) // Compute max register limit1495_max_reg = OptoReg::add(reg,1);1496// Fold reg back into normal space1497reg = OptoReg::add(reg,-chunk);14981499// If the live range is not bound, then we actually had some choices1500// to make. In this case, the mask has more bits in it than the colors1501// chosen. Restrict the mask to just what was picked.1502int n_regs = lrg->num_regs();1503assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");1504if (n_regs == 1 || !lrg->_fat_proj) {1505assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecY, "sanity");1506lrg->Clear(); // Clear the mask1507lrg->Insert(reg); // Set regmask to match selected reg1508// For vectors and pairs, also insert the low bit of the pair1509for (int i = 1; i < n_regs; i++)1510lrg->Insert(OptoReg::add(reg,-i));1511lrg->set_mask_size(n_regs);1512} else { // Else fatproj1513// mask must be equal to fatproj bits, by definition1514}1515#ifndef PRODUCT1516if (trace_spilling()) {1517ttyLocker ttyl;1518tty->print("L%d selected ", lidx);1519lrg->mask().dump();1520tty->print(" from ");1521avail_rm.dump();1522tty->cr();1523}1524#endif1525// Note that reg is the highest-numbered register in the newly-bound mask.1526} // end color available case15271528//---------------1529// Live range is live and no colors available1530else {1531assert( lrg->alive(), "" );1532assert( !lrg->_fat_proj || lrg->is_multidef() ||1533lrg->_def->outcnt() > 0, "fat_proj cannot spill");1534assert( !orig_mask.is_AllStack(), "All Stack does not spill" );15351536// Assign the special spillreg register1537lrg->set_reg(OptoReg::Name(spill_reg++));1538// Do not empty the regmask; leave mask_size lying around1539// for use during Spilling1540#ifndef PRODUCT1541if( trace_spilling() ) {1542ttyLocker ttyl;1543tty->print("L%d spilling with neighbors: ", lidx);1544s->dump();1545debug_only(tty->print(" original mask: "));1546debug_only(orig_mask.dump());1547dump_lrg(lidx);1548}1549#endif1550} // end spill case15511552}15531554return spill_reg-LRG::SPILL_REG; // Return number of spills1555}15561557// Copy 'was_spilled'-edness from the source Node to the dst Node.1558void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) {1559if( _spilled_once.test(src->_idx) ) {1560_spilled_once.set(dst->_idx);1561lrgs(_lrg_map.find(dst))._was_spilled1 = 1;1562if( _spilled_twice.test(src->_idx) ) {1563_spilled_twice.set(dst->_idx);1564lrgs(_lrg_map.find(dst))._was_spilled2 = 1;1565}1566}1567}15681569// Set the 'spilled_once' or 'spilled_twice' flag on a node.1570void PhaseChaitin::set_was_spilled( Node *n ) {1571if( _spilled_once.test_set(n->_idx) )1572_spilled_twice.set(n->_idx);1573}15741575// Convert Ideal spill instructions into proper FramePtr + offset Loads and1576// Stores. Use-def chains are NOT preserved, but Node->LRG->reg maps are.1577void PhaseChaitin::fixup_spills() {1578// This function does only cisc spill work.1579if( !UseCISCSpill ) return;15801581NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); )15821583// Grab the Frame Pointer1584Node *fp = _cfg.get_root_block()->head()->in(1)->in(TypeFunc::FramePtr);15851586// For all blocks1587for (uint i = 0; i < _cfg.number_of_blocks(); i++) {1588Block* block = _cfg.get_block(i);15891590// For all instructions in block1591uint last_inst = block->end_idx();1592for (uint j = 1; j <= last_inst; j++) {1593Node* n = block->get_node(j);15941595// Dead instruction???1596assert( n->outcnt() != 0 ||// Nothing dead after post alloc1597C->top() == n || // Or the random TOP node1598n->is_Proj(), // Or a fat-proj kill node1599"No dead instructions after post-alloc" );16001601int inp = n->cisc_operand();1602if( inp != AdlcVMDeps::Not_cisc_spillable ) {1603// Convert operand number to edge index number1604MachNode *mach = n->as_Mach();1605inp = mach->operand_index(inp);1606Node *src = n->in(inp); // Value to load or store1607LRG &lrg_cisc = lrgs(_lrg_map.find_const(src));1608OptoReg::Name src_reg = lrg_cisc.reg();1609// Doubles record the HIGH register of an adjacent pair.1610src_reg = OptoReg::add(src_reg,1-lrg_cisc.num_regs());1611if( OptoReg::is_stack(src_reg) ) { // If input is on stack1612// This is a CISC Spill, get stack offset and construct new node1613#ifndef PRODUCT1614if( TraceCISCSpill ) {1615tty->print(" reg-instr: ");1616n->dump();1617}1618#endif1619int stk_offset = reg2offset(src_reg);1620// Bailout if we might exceed node limit when spilling this instruction1621C->check_node_count(0, "out of nodes fixing spills");1622if (C->failing()) return;1623// Transform node1624MachNode *cisc = mach->cisc_version(stk_offset, C)->as_Mach();1625cisc->set_req(inp,fp); // Base register is frame pointer1626if( cisc->oper_input_base() > 1 && mach->oper_input_base() <= 1 ) {1627assert( cisc->oper_input_base() == 2, "Only adding one edge");1628cisc->ins_req(1,src); // Requires a memory edge1629}1630block->map_node(cisc, j); // Insert into basic block1631n->subsume_by(cisc, C); // Correct graph1632//1633++_used_cisc_instructions;1634#ifndef PRODUCT1635if( TraceCISCSpill ) {1636tty->print(" cisc-instr: ");1637cisc->dump();1638}1639#endif1640} else {1641#ifndef PRODUCT1642if( TraceCISCSpill ) {1643tty->print(" using reg-instr: ");1644n->dump();1645}1646#endif1647++_unused_cisc_instructions; // input can be on stack1648}1649}16501651} // End of for all instructions16521653} // End of for all blocks1654}16551656// Helper to stretch above; recursively discover the base Node for a1657// given derived Node. Easy for AddP-related machine nodes, but needs1658// to be recursive for derived Phis.1659Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derived, uint &maxlrg ) {1660// See if already computed; if so return it1661if( derived_base_map[derived->_idx] )1662return derived_base_map[derived->_idx];16631664// See if this happens to be a base.1665// NOTE: we use TypePtr instead of TypeOopPtr because we can have1666// pointers derived from NULL! These are always along paths that1667// can't happen at run-time but the optimizer cannot deduce it so1668// we have to handle it gracefully.1669assert(!derived->bottom_type()->isa_narrowoop() ||1670derived->bottom_type()->make_ptr()->is_ptr()->_offset == 0, "sanity");1671const TypePtr *tj = derived->bottom_type()->isa_ptr();1672// If its an OOP with a non-zero offset, then it is derived.1673if( tj == NULL || tj->_offset == 0 ) {1674derived_base_map[derived->_idx] = derived;1675return derived;1676}1677// Derived is NULL+offset? Base is NULL!1678if( derived->is_Con() ) {1679Node *base = _matcher.mach_null();1680assert(base != NULL, "sanity");1681if (base->in(0) == NULL) {1682// Initialize it once and make it shared:1683// set control to _root and place it into Start block1684// (where top() node is placed).1685base->init_req(0, _cfg.get_root_node());1686Block *startb = _cfg.get_block_for_node(C->top());1687uint node_pos = startb->find_node(C->top());1688startb->insert_node(base, node_pos);1689_cfg.map_node_to_block(base, startb);1690assert(_lrg_map.live_range_id(base) == 0, "should not have LRG yet");16911692// The loadConP0 might have projection nodes depending on architecture1693// Add the projection nodes to the CFG1694for (DUIterator_Fast imax, i = base->fast_outs(imax); i < imax; i++) {1695Node* use = base->fast_out(i);1696if (use->is_MachProj()) {1697startb->insert_node(use, ++node_pos);1698_cfg.map_node_to_block(use, startb);1699new_lrg(use, maxlrg++);1700}1701}1702}1703if (_lrg_map.live_range_id(base) == 0) {1704new_lrg(base, maxlrg++);1705}1706assert(base->in(0) == _cfg.get_root_node() && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared");1707derived_base_map[derived->_idx] = base;1708return base;1709}17101711// Check for AddP-related opcodes1712if (!derived->is_Phi()) {1713assert(derived->as_Mach()->ideal_Opcode() == Op_AddP, err_msg_res("but is: %s", derived->Name()));1714Node *base = derived->in(AddPNode::Base);1715derived_base_map[derived->_idx] = base;1716return base;1717}17181719// Recursively find bases for Phis.1720// First check to see if we can avoid a base Phi here.1721Node *base = find_base_for_derived( derived_base_map, derived->in(1),maxlrg);1722uint i;1723for( i = 2; i < derived->req(); i++ )1724if( base != find_base_for_derived( derived_base_map,derived->in(i),maxlrg))1725break;1726// Went to the end without finding any different bases?1727if( i == derived->req() ) { // No need for a base Phi here1728derived_base_map[derived->_idx] = base;1729return base;1730}17311732// Now we see we need a base-Phi here to merge the bases1733const Type *t = base->bottom_type();1734base = new (C) PhiNode( derived->in(0), t );1735for( i = 1; i < derived->req(); i++ ) {1736base->init_req(i, find_base_for_derived(derived_base_map, derived->in(i), maxlrg));1737t = t->meet(base->in(i)->bottom_type());1738}1739base->as_Phi()->set_type(t);17401741// Search the current block for an existing base-Phi1742Block *b = _cfg.get_block_for_node(derived);1743for( i = 1; i <= b->end_idx(); i++ ) {// Search for matching Phi1744Node *phi = b->get_node(i);1745if( !phi->is_Phi() ) { // Found end of Phis with no match?1746b->insert_node(base, i); // Must insert created Phi here as base1747_cfg.map_node_to_block(base, b);1748new_lrg(base,maxlrg++);1749break;1750}1751// See if Phi matches.1752uint j;1753for( j = 1; j < base->req(); j++ )1754if( phi->in(j) != base->in(j) &&1755!(phi->in(j)->is_Con() && base->in(j)->is_Con()) ) // allow different NULLs1756break;1757if( j == base->req() ) { // All inputs match?1758base = phi; // Then use existing 'phi' and drop 'base'1759break;1760}1761}176217631764// Cache info for later passes1765derived_base_map[derived->_idx] = base;1766return base;1767}17681769// At each Safepoint, insert extra debug edges for each pair of derived value/1770// base pointer that is live across the Safepoint for oopmap building. The1771// edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the1772// required edge set.1773bool PhaseChaitin::stretch_base_pointer_live_ranges(ResourceArea *a) {1774int must_recompute_live = false;1775uint maxlrg = _lrg_map.max_lrg_id();1776Node **derived_base_map = (Node**)a->Amalloc(sizeof(Node*)*C->unique());1777memset( derived_base_map, 0, sizeof(Node*)*C->unique() );17781779// For all blocks in RPO do...1780for (uint i = 0; i < _cfg.number_of_blocks(); i++) {1781Block* block = _cfg.get_block(i);1782// Note use of deep-copy constructor. I cannot hammer the original1783// liveout bits, because they are needed by the following coalesce pass.1784IndexSet liveout(_live->live(block));17851786for (uint j = block->end_idx() + 1; j > 1; j--) {1787Node* n = block->get_node(j - 1);17881789// Pre-split compares of loop-phis. Loop-phis form a cycle we would1790// like to see in the same register. Compare uses the loop-phi and so1791// extends its live range BUT cannot be part of the cycle. If this1792// extended live range overlaps with the update of the loop-phi value1793// we need both alive at the same time -- which requires at least 11794// copy. But because Intel has only 2-address registers we end up with1795// at least 2 copies, one before the loop-phi update instruction and1796// one after. Instead we split the input to the compare just after the1797// phi.1798if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CmpI ) {1799Node *phi = n->in(1);1800if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) {1801Block *phi_block = _cfg.get_block_for_node(phi);1802if (_cfg.get_block_for_node(phi_block->pred(2)) == block) {1803const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI];1804Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask );1805insert_proj( phi_block, 1, spill, maxlrg++ );1806n->set_req(1,spill);1807must_recompute_live = true;1808}1809}1810}18111812// Get value being defined1813uint lidx = _lrg_map.live_range_id(n);1814// Ignore the occasional brand-new live range1815if (lidx && lidx < _lrg_map.max_lrg_id()) {1816// Remove from live-out set1817liveout.remove(lidx);18181819// Copies do not define a new value and so do not interfere.1820// Remove the copies source from the liveout set before interfering.1821uint idx = n->is_Copy();1822if (idx) {1823liveout.remove(_lrg_map.live_range_id(n->in(idx)));1824}1825}18261827// Found a safepoint?1828JVMState *jvms = n->jvms();1829if( jvms ) {1830// Now scan for a live derived pointer1831IndexSetIterator elements(&liveout);1832uint neighbor;1833while ((neighbor = elements.next()) != 0) {1834// Find reaching DEF for base and derived values1835// This works because we are still in SSA during this call.1836Node *derived = lrgs(neighbor)._def;1837const TypePtr *tj = derived->bottom_type()->isa_ptr();1838assert(!derived->bottom_type()->isa_narrowoop() ||1839derived->bottom_type()->make_ptr()->is_ptr()->_offset == 0, "sanity");1840// If its an OOP with a non-zero offset, then it is derived.1841if( tj && tj->_offset != 0 && tj->isa_oop_ptr() ) {1842Node *base = find_base_for_derived(derived_base_map, derived, maxlrg);1843assert(base->_idx < _lrg_map.size(), "");1844// Add reaching DEFs of derived pointer and base pointer as a1845// pair of inputs1846n->add_req(derived);1847n->add_req(base);18481849// See if the base pointer is already live to this point.1850// Since I'm working on the SSA form, live-ness amounts to1851// reaching def's. So if I find the base's live range then1852// I know the base's def reaches here.1853if ((_lrg_map.live_range_id(base) >= _lrg_map.max_lrg_id() || // (Brand new base (hence not live) or1854!liveout.member(_lrg_map.live_range_id(base))) && // not live) AND1855(_lrg_map.live_range_id(base) > 0) && // not a constant1856_cfg.get_block_for_node(base) != block) { // base not def'd in blk)1857// Base pointer is not currently live. Since I stretched1858// the base pointer to here and it crosses basic-block1859// boundaries, the global live info is now incorrect.1860// Recompute live.1861must_recompute_live = true;1862} // End of if base pointer is not live to debug info1863}1864} // End of scan all live data for derived ptrs crossing GC point1865} // End of if found a GC point18661867// Make all inputs live1868if (!n->is_Phi()) { // Phi function uses come from prior block1869for (uint k = 1; k < n->req(); k++) {1870uint lidx = _lrg_map.live_range_id(n->in(k));1871if (lidx < _lrg_map.max_lrg_id()) {1872liveout.insert(lidx);1873}1874}1875}18761877} // End of forall instructions in block1878liveout.clear(); // Free the memory used by liveout.18791880} // End of forall blocks1881_lrg_map.set_max_lrg_id(maxlrg);18821883// If I created a new live range I need to recompute live1884if (maxlrg != _ifg->_maxlrg) {1885must_recompute_live = true;1886}18871888return must_recompute_live != 0;1889}18901891// Extend the node to LRG mapping18921893void PhaseChaitin::add_reference(const Node *node, const Node *old_node) {1894_lrg_map.extend(node->_idx, _lrg_map.live_range_id(old_node));1895}18961897#ifndef PRODUCT1898void PhaseChaitin::dump(const Node *n) const {1899uint r = (n->_idx < _lrg_map.size()) ? _lrg_map.find_const(n) : 0;1900tty->print("L%d",r);1901if (r && n->Opcode() != Op_Phi) {1902if( _node_regs ) { // Got a post-allocation copy of allocation?1903tty->print("[");1904OptoReg::Name second = get_reg_second(n);1905if( OptoReg::is_valid(second) ) {1906if( OptoReg::is_reg(second) )1907tty->print("%s:",Matcher::regName[second]);1908else1909tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(second));1910}1911OptoReg::Name first = get_reg_first(n);1912if( OptoReg::is_reg(first) )1913tty->print("%s]",Matcher::regName[first]);1914else1915tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(first));1916} else1917n->out_RegMask().dump();1918}1919tty->print("/N%d\t",n->_idx);1920tty->print("%s === ", n->Name());1921uint k;1922for (k = 0; k < n->req(); k++) {1923Node *m = n->in(k);1924if (!m) {1925tty->print("_ ");1926}1927else {1928uint r = (m->_idx < _lrg_map.size()) ? _lrg_map.find_const(m) : 0;1929tty->print("L%d",r);1930// Data MultiNode's can have projections with no real registers.1931// Don't die while dumping them.1932int op = n->Opcode();1933if( r && op != Op_Phi && op != Op_Proj && op != Op_SCMemProj) {1934if( _node_regs ) {1935tty->print("[");1936OptoReg::Name second = get_reg_second(n->in(k));1937if( OptoReg::is_valid(second) ) {1938if( OptoReg::is_reg(second) )1939tty->print("%s:",Matcher::regName[second]);1940else1941tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer),1942reg2offset_unchecked(second));1943}1944OptoReg::Name first = get_reg_first(n->in(k));1945if( OptoReg::is_reg(first) )1946tty->print("%s]",Matcher::regName[first]);1947else1948tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer),1949reg2offset_unchecked(first));1950} else1951n->in_RegMask(k).dump();1952}1953tty->print("/N%d ",m->_idx);1954}1955}1956if( k < n->len() && n->in(k) ) tty->print("| ");1957for( ; k < n->len(); k++ ) {1958Node *m = n->in(k);1959if(!m) {1960break;1961}1962uint r = (m->_idx < _lrg_map.size()) ? _lrg_map.find_const(m) : 0;1963tty->print("L%d",r);1964tty->print("/N%d ",m->_idx);1965}1966if( n->is_Mach() ) n->as_Mach()->dump_spec(tty);1967else n->dump_spec(tty);1968if( _spilled_once.test(n->_idx ) ) {1969tty->print(" Spill_1");1970if( _spilled_twice.test(n->_idx ) )1971tty->print(" Spill_2");1972}1973tty->print("\n");1974}19751976void PhaseChaitin::dump(const Block *b) const {1977b->dump_head(&_cfg);19781979// For all instructions1980for( uint j = 0; j < b->number_of_nodes(); j++ )1981dump(b->get_node(j));1982// Print live-out info at end of block1983if( _live ) {1984tty->print("Liveout: ");1985IndexSet *live = _live->live(b);1986IndexSetIterator elements(live);1987tty->print("{");1988uint i;1989while ((i = elements.next()) != 0) {1990tty->print("L%d ", _lrg_map.find_const(i));1991}1992tty->print_cr("}");1993}1994tty->print("\n");1995}19961997void PhaseChaitin::dump() const {1998tty->print( "--- Chaitin -- argsize: %d framesize: %d ---\n",1999_matcher._new_SP, _framesize );20002001// For all blocks2002for (uint i = 0; i < _cfg.number_of_blocks(); i++) {2003dump(_cfg.get_block(i));2004}2005// End of per-block dump2006tty->print("\n");20072008if (!_ifg) {2009tty->print("(No IFG.)\n");2010return;2011}20122013// Dump LRG array2014tty->print("--- Live RanGe Array ---\n");2015for (uint i2 = 1; i2 < _lrg_map.max_lrg_id(); i2++) {2016tty->print("L%d: ",i2);2017if (i2 < _ifg->_maxlrg) {2018lrgs(i2).dump();2019}2020else {2021tty->print_cr("new LRG");2022}2023}2024tty->cr();20252026// Dump lo-degree list2027tty->print("Lo degree: ");2028for(uint i3 = _lo_degree; i3; i3 = lrgs(i3)._next )2029tty->print("L%d ",i3);2030tty->cr();20312032// Dump lo-stk-degree list2033tty->print("Lo stk degree: ");2034for(uint i4 = _lo_stk_degree; i4; i4 = lrgs(i4)._next )2035tty->print("L%d ",i4);2036tty->cr();20372038// Dump lo-degree list2039tty->print("Hi degree: ");2040for(uint i5 = _hi_degree; i5; i5 = lrgs(i5)._next )2041tty->print("L%d ",i5);2042tty->cr();2043}20442045void PhaseChaitin::dump_degree_lists() const {2046// Dump lo-degree list2047tty->print("Lo degree: ");2048for( uint i = _lo_degree; i; i = lrgs(i)._next )2049tty->print("L%d ",i);2050tty->cr();20512052// Dump lo-stk-degree list2053tty->print("Lo stk degree: ");2054for(uint i2 = _lo_stk_degree; i2; i2 = lrgs(i2)._next )2055tty->print("L%d ",i2);2056tty->cr();20572058// Dump lo-degree list2059tty->print("Hi degree: ");2060for(uint i3 = _hi_degree; i3; i3 = lrgs(i3)._next )2061tty->print("L%d ",i3);2062tty->cr();2063}20642065void PhaseChaitin::dump_simplified() const {2066tty->print("Simplified: ");2067for( uint i = _simplified; i; i = lrgs(i)._next )2068tty->print("L%d ",i);2069tty->cr();2070}20712072static char *print_reg( OptoReg::Name reg, const PhaseChaitin *pc, char *buf ) {2073if ((int)reg < 0)2074sprintf(buf, "<OptoReg::%d>", (int)reg);2075else if (OptoReg::is_reg(reg))2076strcpy(buf, Matcher::regName[reg]);2077else2078sprintf(buf,"%s + #%d",OptoReg::regname(OptoReg::c_frame_pointer),2079pc->reg2offset(reg));2080return buf+strlen(buf);2081}20822083// Dump a register name into a buffer. Be intelligent if we get called2084// before allocation is complete.2085char *PhaseChaitin::dump_register( const Node *n, char *buf ) const {2086if( this == NULL ) { // Not got anything?2087sprintf(buf,"N%d",n->_idx); // Then use Node index2088} else if( _node_regs ) {2089// Post allocation, use direct mappings, no LRG info available2090print_reg( get_reg_first(n), this, buf );2091} else {2092uint lidx = _lrg_map.find_const(n); // Grab LRG number2093if( !_ifg ) {2094sprintf(buf,"L%d",lidx); // No register binding yet2095} else if( !lidx ) { // Special, not allocated value2096strcpy(buf,"Special");2097} else {2098if (lrgs(lidx)._is_vector) {2099if (lrgs(lidx).mask().is_bound_set(lrgs(lidx).num_regs()))2100print_reg( lrgs(lidx).reg(), this, buf ); // a bound machine register2101else2102sprintf(buf,"L%d",lidx); // No register binding yet2103} else if( (lrgs(lidx).num_regs() == 1)2104? lrgs(lidx).mask().is_bound1()2105: lrgs(lidx).mask().is_bound_pair() ) {2106// Hah! We have a bound machine register2107print_reg( lrgs(lidx).reg(), this, buf );2108} else {2109sprintf(buf,"L%d",lidx); // No register binding yet2110}2111}2112}2113return buf+strlen(buf);2114}21152116void PhaseChaitin::dump_for_spill_split_recycle() const {2117if( WizardMode && (PrintCompilation || PrintOpto) ) {2118// Display which live ranges need to be split and the allocator's state2119tty->print_cr("Graph-Coloring Iteration %d will split the following live ranges", _trip_cnt);2120for (uint bidx = 1; bidx < _lrg_map.max_lrg_id(); bidx++) {2121if( lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG ) {2122tty->print("L%d: ", bidx);2123lrgs(bidx).dump();2124}2125}2126tty->cr();2127dump();2128}2129}21302131void PhaseChaitin::dump_frame() const {2132const char *fp = OptoReg::regname(OptoReg::c_frame_pointer);2133const TypeTuple *domain = C->tf()->domain();2134const int argcnt = domain->cnt() - TypeFunc::Parms;21352136// Incoming arguments in registers dump2137for( int k = 0; k < argcnt; k++ ) {2138OptoReg::Name parmreg = _matcher._parm_regs[k].first();2139if( OptoReg::is_reg(parmreg)) {2140const char *reg_name = OptoReg::regname(parmreg);2141tty->print("#r%3.3d %s", parmreg, reg_name);2142parmreg = _matcher._parm_regs[k].second();2143if( OptoReg::is_reg(parmreg)) {2144tty->print(":%s", OptoReg::regname(parmreg));2145}2146tty->print(" : parm %d: ", k);2147domain->field_at(k + TypeFunc::Parms)->dump();2148tty->cr();2149}2150}21512152// Check for un-owned padding above incoming args2153OptoReg::Name reg = _matcher._new_SP;2154if( reg > _matcher._in_arg_limit ) {2155reg = OptoReg::add(reg, -1);2156tty->print_cr("#r%3.3d %s+%2d: pad0, owned by CALLER", reg, fp, reg2offset_unchecked(reg));2157}21582159// Incoming argument area dump2160OptoReg::Name begin_in_arg = OptoReg::add(_matcher._old_SP,C->out_preserve_stack_slots());2161while( reg > begin_in_arg ) {2162reg = OptoReg::add(reg, -1);2163tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg));2164int j;2165for( j = 0; j < argcnt; j++) {2166if( _matcher._parm_regs[j].first() == reg ||2167_matcher._parm_regs[j].second() == reg ) {2168tty->print("parm %d: ",j);2169domain->field_at(j + TypeFunc::Parms)->dump();2170tty->cr();2171break;2172}2173}2174if( j >= argcnt )2175tty->print_cr("HOLE, owned by SELF");2176}21772178// Old outgoing preserve area2179while( reg > _matcher._old_SP ) {2180reg = OptoReg::add(reg, -1);2181tty->print_cr("#r%3.3d %s+%2d: old out preserve",reg,fp,reg2offset_unchecked(reg));2182}21832184// Old SP2185tty->print_cr("# -- Old %s -- Framesize: %d --",fp,2186reg2offset_unchecked(OptoReg::add(_matcher._old_SP,-1)) - reg2offset_unchecked(_matcher._new_SP)+jintSize);21872188// Preserve area dump2189int fixed_slots = C->fixed_slots();2190OptoReg::Name begin_in_preserve = OptoReg::add(_matcher._old_SP, -(int)C->in_preserve_stack_slots());2191OptoReg::Name return_addr = _matcher.return_addr();21922193reg = OptoReg::add(reg, -1);2194while (OptoReg::is_stack(reg)) {2195tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg));2196if (return_addr == reg) {2197tty->print_cr("return address");2198} else if (reg >= begin_in_preserve) {2199// Preserved slots are present on x862200if (return_addr == OptoReg::add(reg, VMRegImpl::slots_per_word))2201tty->print_cr("saved fp register");2202else if (return_addr == OptoReg::add(reg, 2*VMRegImpl::slots_per_word) &&2203VerifyStackAtCalls)2204tty->print_cr("0xBADB100D +VerifyStackAtCalls");2205else2206tty->print_cr("in_preserve");2207} else if ((int)OptoReg::reg2stack(reg) < fixed_slots) {2208tty->print_cr("Fixed slot %d", OptoReg::reg2stack(reg));2209} else {2210tty->print_cr("pad2, stack alignment");2211}2212reg = OptoReg::add(reg, -1);2213}22142215// Spill area dump2216reg = OptoReg::add(_matcher._new_SP, _framesize );2217while( reg > _matcher._out_arg_limit ) {2218reg = OptoReg::add(reg, -1);2219tty->print_cr("#r%3.3d %s+%2d: spill",reg,fp,reg2offset_unchecked(reg));2220}22212222// Outgoing argument area dump2223while( reg > OptoReg::add(_matcher._new_SP, C->out_preserve_stack_slots()) ) {2224reg = OptoReg::add(reg, -1);2225tty->print_cr("#r%3.3d %s+%2d: outgoing argument",reg,fp,reg2offset_unchecked(reg));2226}22272228// Outgoing new preserve area2229while( reg > _matcher._new_SP ) {2230reg = OptoReg::add(reg, -1);2231tty->print_cr("#r%3.3d %s+%2d: new out preserve",reg,fp,reg2offset_unchecked(reg));2232}2233tty->print_cr("#");2234}22352236void PhaseChaitin::dump_bb( uint pre_order ) const {2237tty->print_cr("---dump of B%d---",pre_order);2238for (uint i = 0; i < _cfg.number_of_blocks(); i++) {2239Block* block = _cfg.get_block(i);2240if (block->_pre_order == pre_order) {2241dump(block);2242}2243}2244}22452246void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const {2247tty->print_cr("---dump of L%d---",lidx);22482249if (_ifg) {2250if (lidx >= _lrg_map.max_lrg_id()) {2251tty->print("Attempt to print live range index beyond max live range.\n");2252return;2253}2254tty->print("L%d: ",lidx);2255if (lidx < _ifg->_maxlrg) {2256lrgs(lidx).dump();2257} else {2258tty->print_cr("new LRG");2259}2260}2261if( _ifg && lidx < _ifg->_maxlrg) {2262tty->print("Neighbors: %d - ", _ifg->neighbor_cnt(lidx));2263_ifg->neighbors(lidx)->dump();2264tty->cr();2265}2266// For all blocks2267for (uint i = 0; i < _cfg.number_of_blocks(); i++) {2268Block* block = _cfg.get_block(i);2269int dump_once = 0;22702271// For all instructions2272for( uint j = 0; j < block->number_of_nodes(); j++ ) {2273Node *n = block->get_node(j);2274if (_lrg_map.find_const(n) == lidx) {2275if (!dump_once++) {2276tty->cr();2277block->dump_head(&_cfg);2278}2279dump(n);2280continue;2281}2282if (!defs_only) {2283uint cnt = n->req();2284for( uint k = 1; k < cnt; k++ ) {2285Node *m = n->in(k);2286if (!m) {2287continue; // be robust in the dumper2288}2289if (_lrg_map.find_const(m) == lidx) {2290if (!dump_once++) {2291tty->cr();2292block->dump_head(&_cfg);2293}2294dump(n);2295}2296}2297}2298}2299} // End of per-block dump2300tty->cr();2301}2302#endif // not PRODUCT23032304int PhaseChaitin::_final_loads = 0;2305int PhaseChaitin::_final_stores = 0;2306int PhaseChaitin::_final_memoves= 0;2307int PhaseChaitin::_final_copies = 0;2308double PhaseChaitin::_final_load_cost = 0;2309double PhaseChaitin::_final_store_cost = 0;2310double PhaseChaitin::_final_memove_cost= 0;2311double PhaseChaitin::_final_copy_cost = 0;2312int PhaseChaitin::_conserv_coalesce = 0;2313int PhaseChaitin::_conserv_coalesce_pair = 0;2314int PhaseChaitin::_conserv_coalesce_trie = 0;2315int PhaseChaitin::_conserv_coalesce_quad = 0;2316int PhaseChaitin::_post_alloc = 0;2317int PhaseChaitin::_lost_opp_pp_coalesce = 0;2318int PhaseChaitin::_lost_opp_cflow_coalesce = 0;2319int PhaseChaitin::_used_cisc_instructions = 0;2320int PhaseChaitin::_unused_cisc_instructions = 0;2321int PhaseChaitin::_allocator_attempts = 0;2322int PhaseChaitin::_allocator_successes = 0;23232324#ifndef PRODUCT2325uint PhaseChaitin::_high_pressure = 0;2326uint PhaseChaitin::_low_pressure = 0;23272328void PhaseChaitin::print_chaitin_statistics() {2329tty->print_cr("Inserted %d spill loads, %d spill stores, %d mem-mem moves and %d copies.", _final_loads, _final_stores, _final_memoves, _final_copies);2330tty->print_cr("Total load cost= %6.0f, store cost = %6.0f, mem-mem cost = %5.2f, copy cost = %5.0f.", _final_load_cost, _final_store_cost, _final_memove_cost, _final_copy_cost);2331tty->print_cr("Adjusted spill cost = %7.0f.",2332_final_load_cost*4.0 + _final_store_cost * 2.0 +2333_final_copy_cost*1.0 + _final_memove_cost*12.0);2334tty->print("Conservatively coalesced %d copies, %d pairs",2335_conserv_coalesce, _conserv_coalesce_pair);2336if( _conserv_coalesce_trie || _conserv_coalesce_quad )2337tty->print(", %d tries, %d quads", _conserv_coalesce_trie, _conserv_coalesce_quad);2338tty->print_cr(", %d post alloc.", _post_alloc);2339if( _lost_opp_pp_coalesce || _lost_opp_cflow_coalesce )2340tty->print_cr("Lost coalesce opportunity, %d private-private, and %d cflow interfered.",2341_lost_opp_pp_coalesce, _lost_opp_cflow_coalesce );2342if( _used_cisc_instructions || _unused_cisc_instructions )2343tty->print_cr("Used cisc instruction %d, remained in register %d",2344_used_cisc_instructions, _unused_cisc_instructions);2345if( _allocator_successes != 0 )2346tty->print_cr("Average allocation trips %f", (float)_allocator_attempts/(float)_allocator_successes);2347tty->print_cr("High Pressure Blocks = %d, Low Pressure Blocks = %d", _high_pressure, _low_pressure);2348}2349#endif // not PRODUCT235023512352