Path: blob/jdk8u272-b10-aarch32-20201026/hotspot/src/share/vm/opto/compile.cpp
83404 views
/*1* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include "precompiled.hpp"25#include "asm/macroAssembler.hpp"26#include "asm/macroAssembler.inline.hpp"27#include "ci/ciReplay.hpp"28#include "classfile/systemDictionary.hpp"29#include "code/exceptionHandlerTable.hpp"30#include "code/nmethod.hpp"31#include "compiler/compileLog.hpp"32#include "compiler/disassembler.hpp"33#include "compiler/oopMap.hpp"34#include "jfr/jfrEvents.hpp"35#include "opto/addnode.hpp"36#include "opto/block.hpp"37#include "opto/c2compiler.hpp"38#include "opto/callGenerator.hpp"39#include "opto/callnode.hpp"40#include "opto/cfgnode.hpp"41#include "opto/chaitin.hpp"42#include "opto/compile.hpp"43#include "opto/connode.hpp"44#include "opto/divnode.hpp"45#include "opto/escape.hpp"46#include "opto/idealGraphPrinter.hpp"47#include "opto/loopnode.hpp"48#include "opto/machnode.hpp"49#include "opto/macro.hpp"50#include "opto/matcher.hpp"51#include "opto/mathexactnode.hpp"52#include "opto/memnode.hpp"53#include "opto/mulnode.hpp"54#include "opto/node.hpp"55#include "opto/opcodes.hpp"56#include "opto/output.hpp"57#include "opto/parse.hpp"58#include "opto/phaseX.hpp"59#include "opto/rootnode.hpp"60#include "opto/runtime.hpp"61#include "opto/stringopts.hpp"62#include "opto/type.hpp"63#include "opto/vectornode.hpp"64#include "runtime/arguments.hpp"65#include "runtime/signature.hpp"66#include "runtime/stubRoutines.hpp"67#include "runtime/timer.hpp"68#include "utilities/copy.hpp"69#if defined AD_MD_HPP70# include AD_MD_HPP71#elif defined TARGET_ARCH_MODEL_x86_3272# include "adfiles/ad_x86_32.hpp"73#elif defined TARGET_ARCH_MODEL_x86_6474# include "adfiles/ad_x86_64.hpp"75#elif defined TARGET_ARCH_MODEL_aarch6476# include "adfiles/ad_aarch64.hpp"77#elif defined TARGET_ARCH_MODEL_sparc78# include "adfiles/ad_sparc.hpp"79#elif defined TARGET_ARCH_MODEL_zero80# include "adfiles/ad_zero.hpp"81#elif defined TARGET_ARCH_MODEL_ppc_6482# include "adfiles/ad_ppc_64.hpp"83#elif defined TARGET_ARCH_MODEL_aarch3284# include "adfiles/ad_aarch32.hpp"85#endif8687// -------------------- Compile::mach_constant_base_node -----------------------88// Constant table base node singleton.89MachConstantBaseNode* Compile::mach_constant_base_node() {90if (_mach_constant_base_node == NULL) {91_mach_constant_base_node = new (C) MachConstantBaseNode();92_mach_constant_base_node->add_req(C->root());93}94return _mach_constant_base_node;95}969798/// Support for intrinsics.99100// Return the index at which m must be inserted (or already exists).101// The sort order is by the address of the ciMethod, with is_virtual as minor key.102int Compile::intrinsic_insertion_index(ciMethod* m, bool is_virtual) {103#ifdef ASSERT104for (int i = 1; i < _intrinsics->length(); i++) {105CallGenerator* cg1 = _intrinsics->at(i-1);106CallGenerator* cg2 = _intrinsics->at(i);107assert(cg1->method() != cg2->method()108? cg1->method() < cg2->method()109: cg1->is_virtual() < cg2->is_virtual(),110"compiler intrinsics list must stay sorted");111}112#endif113// Binary search sorted list, in decreasing intervals [lo, hi].114int lo = 0, hi = _intrinsics->length()-1;115while (lo <= hi) {116int mid = (uint)(hi + lo) / 2;117ciMethod* mid_m = _intrinsics->at(mid)->method();118if (m < mid_m) {119hi = mid-1;120} else if (m > mid_m) {121lo = mid+1;122} else {123// look at minor sort key124bool mid_virt = _intrinsics->at(mid)->is_virtual();125if (is_virtual < mid_virt) {126hi = mid-1;127} else if (is_virtual > mid_virt) {128lo = mid+1;129} else {130return mid; // exact match131}132}133}134return lo; // inexact match135}136137void Compile::register_intrinsic(CallGenerator* cg) {138if (_intrinsics == NULL) {139_intrinsics = new (comp_arena())GrowableArray<CallGenerator*>(comp_arena(), 60, 0, NULL);140}141// This code is stolen from ciObjectFactory::insert.142// Really, GrowableArray should have methods for143// insert_at, remove_at, and binary_search.144int len = _intrinsics->length();145int index = intrinsic_insertion_index(cg->method(), cg->is_virtual());146if (index == len) {147_intrinsics->append(cg);148} else {149#ifdef ASSERT150CallGenerator* oldcg = _intrinsics->at(index);151assert(oldcg->method() != cg->method() || oldcg->is_virtual() != cg->is_virtual(), "don't register twice");152#endif153_intrinsics->append(_intrinsics->at(len-1));154int pos;155for (pos = len-2; pos >= index; pos--) {156_intrinsics->at_put(pos+1,_intrinsics->at(pos));157}158_intrinsics->at_put(index, cg);159}160assert(find_intrinsic(cg->method(), cg->is_virtual()) == cg, "registration worked");161}162163CallGenerator* Compile::find_intrinsic(ciMethod* m, bool is_virtual) {164assert(m->is_loaded(), "don't try this on unloaded methods");165if (_intrinsics != NULL) {166int index = intrinsic_insertion_index(m, is_virtual);167if (index < _intrinsics->length()168&& _intrinsics->at(index)->method() == m169&& _intrinsics->at(index)->is_virtual() == is_virtual) {170return _intrinsics->at(index);171}172}173// Lazily create intrinsics for intrinsic IDs well-known in the runtime.174if (m->intrinsic_id() != vmIntrinsics::_none &&175m->intrinsic_id() <= vmIntrinsics::LAST_COMPILER_INLINE) {176CallGenerator* cg = make_vm_intrinsic(m, is_virtual);177if (cg != NULL) {178// Save it for next time:179register_intrinsic(cg);180return cg;181} else {182gather_intrinsic_statistics(m->intrinsic_id(), is_virtual, _intrinsic_disabled);183}184}185return NULL;186}187188// Compile:: register_library_intrinsics and make_vm_intrinsic are defined189// in library_call.cpp.190191192#ifndef PRODUCT193// statistics gathering...194195juint Compile::_intrinsic_hist_count[vmIntrinsics::ID_LIMIT] = {0};196jubyte Compile::_intrinsic_hist_flags[vmIntrinsics::ID_LIMIT] = {0};197198bool Compile::gather_intrinsic_statistics(vmIntrinsics::ID id, bool is_virtual, int flags) {199assert(id > vmIntrinsics::_none && id < vmIntrinsics::ID_LIMIT, "oob");200int oflags = _intrinsic_hist_flags[id];201assert(flags != 0, "what happened?");202if (is_virtual) {203flags |= _intrinsic_virtual;204}205bool changed = (flags != oflags);206if ((flags & _intrinsic_worked) != 0) {207juint count = (_intrinsic_hist_count[id] += 1);208if (count == 1) {209changed = true; // first time210}211// increment the overall count also:212_intrinsic_hist_count[vmIntrinsics::_none] += 1;213}214if (changed) {215if (((oflags ^ flags) & _intrinsic_virtual) != 0) {216// Something changed about the intrinsic's virtuality.217if ((flags & _intrinsic_virtual) != 0) {218// This is the first use of this intrinsic as a virtual call.219if (oflags != 0) {220// We already saw it as a non-virtual, so note both cases.221flags |= _intrinsic_both;222}223} else if ((oflags & _intrinsic_both) == 0) {224// This is the first use of this intrinsic as a non-virtual225flags |= _intrinsic_both;226}227}228_intrinsic_hist_flags[id] = (jubyte) (oflags | flags);229}230// update the overall flags also:231_intrinsic_hist_flags[vmIntrinsics::_none] |= (jubyte) flags;232return changed;233}234235static char* format_flags(int flags, char* buf) {236buf[0] = 0;237if ((flags & Compile::_intrinsic_worked) != 0) strcat(buf, ",worked");238if ((flags & Compile::_intrinsic_failed) != 0) strcat(buf, ",failed");239if ((flags & Compile::_intrinsic_disabled) != 0) strcat(buf, ",disabled");240if ((flags & Compile::_intrinsic_virtual) != 0) strcat(buf, ",virtual");241if ((flags & Compile::_intrinsic_both) != 0) strcat(buf, ",nonvirtual");242if (buf[0] == 0) strcat(buf, ",");243assert(buf[0] == ',', "must be");244return &buf[1];245}246247void Compile::print_intrinsic_statistics() {248char flagsbuf[100];249ttyLocker ttyl;250if (xtty != NULL) xtty->head("statistics type='intrinsic'");251tty->print_cr("Compiler intrinsic usage:");252juint total = _intrinsic_hist_count[vmIntrinsics::_none];253if (total == 0) total = 1; // avoid div0 in case of no successes254#define PRINT_STAT_LINE(name, c, f) \255tty->print_cr(" %4d (%4.1f%%) %s (%s)", (int)(c), ((c) * 100.0) / total, name, f);256for (int index = 1 + (int)vmIntrinsics::_none; index < (int)vmIntrinsics::ID_LIMIT; index++) {257vmIntrinsics::ID id = (vmIntrinsics::ID) index;258int flags = _intrinsic_hist_flags[id];259juint count = _intrinsic_hist_count[id];260if ((flags | count) != 0) {261PRINT_STAT_LINE(vmIntrinsics::name_at(id), count, format_flags(flags, flagsbuf));262}263}264PRINT_STAT_LINE("total", total, format_flags(_intrinsic_hist_flags[vmIntrinsics::_none], flagsbuf));265if (xtty != NULL) xtty->tail("statistics");266}267268void Compile::print_statistics() {269{ ttyLocker ttyl;270if (xtty != NULL) xtty->head("statistics type='opto'");271Parse::print_statistics();272PhaseCCP::print_statistics();273PhaseRegAlloc::print_statistics();274Scheduling::print_statistics();275PhasePeephole::print_statistics();276PhaseIdealLoop::print_statistics();277if (xtty != NULL) xtty->tail("statistics");278}279if (_intrinsic_hist_flags[vmIntrinsics::_none] != 0) {280// put this under its own <statistics> element.281print_intrinsic_statistics();282}283}284#endif //PRODUCT285286// Support for bundling info287Bundle* Compile::node_bundling(const Node *n) {288assert(valid_bundle_info(n), "oob");289return &_node_bundling_base[n->_idx];290}291292bool Compile::valid_bundle_info(const Node *n) {293return (_node_bundling_limit > n->_idx);294}295296297void Compile::gvn_replace_by(Node* n, Node* nn) {298for (DUIterator_Last imin, i = n->last_outs(imin); i >= imin; ) {299Node* use = n->last_out(i);300bool is_in_table = initial_gvn()->hash_delete(use);301uint uses_found = 0;302for (uint j = 0; j < use->len(); j++) {303if (use->in(j) == n) {304if (j < use->req())305use->set_req(j, nn);306else307use->set_prec(j, nn);308uses_found++;309}310}311if (is_in_table) {312// reinsert into table313initial_gvn()->hash_find_insert(use);314}315record_for_igvn(use);316i -= uses_found; // we deleted 1 or more copies of this edge317}318}319320321static inline bool not_a_node(const Node* n) {322if (n == NULL) return true;323if (((intptr_t)n & 1) != 0) return true; // uninitialized, etc.324if (*(address*)n == badAddress) return true; // kill by Node::destruct325return false;326}327328// Identify all nodes that are reachable from below, useful.329// Use breadth-first pass that records state in a Unique_Node_List,330// recursive traversal is slower.331void Compile::identify_useful_nodes(Unique_Node_List &useful) {332int estimated_worklist_size = live_nodes();333useful.map( estimated_worklist_size, NULL ); // preallocate space334335// Initialize worklist336if (root() != NULL) { useful.push(root()); }337// If 'top' is cached, declare it useful to preserve cached node338if( cached_top_node() ) { useful.push(cached_top_node()); }339340// Push all useful nodes onto the list, breadthfirst341for( uint next = 0; next < useful.size(); ++next ) {342assert( next < unique(), "Unique useful nodes < total nodes");343Node *n = useful.at(next);344uint max = n->len();345for( uint i = 0; i < max; ++i ) {346Node *m = n->in(i);347if (not_a_node(m)) continue;348useful.push(m);349}350}351}352353// Update dead_node_list with any missing dead nodes using useful354// list. Consider all non-useful nodes to be useless i.e., dead nodes.355void Compile::update_dead_node_list(Unique_Node_List &useful) {356uint max_idx = unique();357VectorSet& useful_node_set = useful.member_set();358359for (uint node_idx = 0; node_idx < max_idx; node_idx++) {360// If node with index node_idx is not in useful set,361// mark it as dead in dead node list.362if (! useful_node_set.test(node_idx) ) {363record_dead_node(node_idx);364}365}366}367368void Compile::remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful) {369int shift = 0;370for (int i = 0; i < inlines->length(); i++) {371CallGenerator* cg = inlines->at(i);372CallNode* call = cg->call_node();373if (shift > 0) {374inlines->at_put(i-shift, cg);375}376if (!useful.member(call)) {377shift++;378}379}380inlines->trunc_to(inlines->length()-shift);381}382383// Disconnect all useless nodes by disconnecting those at the boundary.384void Compile::remove_useless_nodes(Unique_Node_List &useful) {385uint next = 0;386while (next < useful.size()) {387Node *n = useful.at(next++);388if (n->is_SafePoint()) {389// We're done with a parsing phase. Replaced nodes are not valid390// beyond that point.391n->as_SafePoint()->delete_replaced_nodes();392}393// Use raw traversal of out edges since this code removes out edges394int max = n->outcnt();395for (int j = 0; j < max; ++j) {396Node* child = n->raw_out(j);397if (! useful.member(child)) {398assert(!child->is_top() || child != top(),399"If top is cached in Compile object it is in useful list");400// Only need to remove this out-edge to the useless node401n->raw_del_out(j);402--j;403--max;404}405}406if (n->outcnt() == 1 && n->has_special_unique_user()) {407record_for_igvn(n->unique_out());408}409}410// Remove useless macro and predicate opaq nodes411for (int i = C->macro_count()-1; i >= 0; i--) {412Node* n = C->macro_node(i);413if (!useful.member(n)) {414remove_macro_node(n);415}416}417// Remove useless CastII nodes with range check dependency418for (int i = range_check_cast_count() - 1; i >= 0; i--) {419Node* cast = range_check_cast_node(i);420if (!useful.member(cast)) {421remove_range_check_cast(cast);422}423}424// Remove useless expensive node425for (int i = C->expensive_count()-1; i >= 0; i--) {426Node* n = C->expensive_node(i);427if (!useful.member(n)) {428remove_expensive_node(n);429}430}431// clean up the late inline lists432remove_useless_late_inlines(&_string_late_inlines, useful);433remove_useless_late_inlines(&_boxing_late_inlines, useful);434remove_useless_late_inlines(&_late_inlines, useful);435debug_only(verify_graph_edges(true/*check for no_dead_code*/);)436}437438//------------------------------frame_size_in_words-----------------------------439// frame_slots in units of words440int Compile::frame_size_in_words() const {441// shift is 0 in LP32 and 1 in LP64442const int shift = (LogBytesPerWord - LogBytesPerInt);443int words = _frame_slots >> shift;444assert( words << shift == _frame_slots, "frame size must be properly aligned in LP64" );445return words;446}447448// To bang the stack of this compiled method we use the stack size449// that the interpreter would need in case of a deoptimization. This450// removes the need to bang the stack in the deoptimization blob which451// in turn simplifies stack overflow handling.452int Compile::bang_size_in_bytes() const {453return MAX2(_interpreter_frame_size, frame_size_in_bytes());454}455456// ============================================================================457//------------------------------CompileWrapper---------------------------------458class CompileWrapper : public StackObj {459Compile *const _compile;460public:461CompileWrapper(Compile* compile);462463~CompileWrapper();464};465466CompileWrapper::CompileWrapper(Compile* compile) : _compile(compile) {467// the Compile* pointer is stored in the current ciEnv:468ciEnv* env = compile->env();469assert(env == ciEnv::current(), "must already be a ciEnv active");470assert(env->compiler_data() == NULL, "compile already active?");471env->set_compiler_data(compile);472assert(compile == Compile::current(), "sanity");473474compile->set_type_dict(NULL);475compile->set_type_hwm(NULL);476compile->set_type_last_size(0);477compile->set_last_tf(NULL, NULL);478compile->set_indexSet_arena(NULL);479compile->set_indexSet_free_block_list(NULL);480compile->init_type_arena();481Type::Initialize(compile);482_compile->set_scratch_buffer_blob(NULL);483_compile->begin_method();484}485CompileWrapper::~CompileWrapper() {486_compile->end_method();487if (_compile->scratch_buffer_blob() != NULL)488BufferBlob::free(_compile->scratch_buffer_blob());489_compile->env()->set_compiler_data(NULL);490}491492493//----------------------------print_compile_messages---------------------------494void Compile::print_compile_messages() {495#ifndef PRODUCT496// Check if recompiling497if (_subsume_loads == false && PrintOpto) {498// Recompiling without allowing machine instructions to subsume loads499tty->print_cr("*********************************************************");500tty->print_cr("** Bailout: Recompile without subsuming loads **");501tty->print_cr("*********************************************************");502}503if (_do_escape_analysis != DoEscapeAnalysis && PrintOpto) {504// Recompiling without escape analysis505tty->print_cr("*********************************************************");506tty->print_cr("** Bailout: Recompile without escape analysis **");507tty->print_cr("*********************************************************");508}509if (_eliminate_boxing != EliminateAutoBox && PrintOpto) {510// Recompiling without boxing elimination511tty->print_cr("*********************************************************");512tty->print_cr("** Bailout: Recompile without boxing elimination **");513tty->print_cr("*********************************************************");514}515if (env()->break_at_compile()) {516// Open the debugger when compiling this method.517tty->print("### Breaking when compiling: ");518method()->print_short_name();519tty->cr();520BREAKPOINT;521}522523if( PrintOpto ) {524if (is_osr_compilation()) {525tty->print("[OSR]%3d", _compile_id);526} else {527tty->print("%3d", _compile_id);528}529}530#endif531}532533534//-----------------------init_scratch_buffer_blob------------------------------535// Construct a temporary BufferBlob and cache it for this compile.536void Compile::init_scratch_buffer_blob(int const_size) {537// If there is already a scratch buffer blob allocated and the538// constant section is big enough, use it. Otherwise free the539// current and allocate a new one.540BufferBlob* blob = scratch_buffer_blob();541if ((blob != NULL) && (const_size <= _scratch_const_size)) {542// Use the current blob.543} else {544if (blob != NULL) {545BufferBlob::free(blob);546}547548ResourceMark rm;549_scratch_const_size = const_size;550int size = (MAX_inst_size + MAX_stubs_size + _scratch_const_size);551blob = BufferBlob::create("Compile::scratch_buffer", size);552// Record the buffer blob for next time.553set_scratch_buffer_blob(blob);554// Have we run out of code space?555if (scratch_buffer_blob() == NULL) {556// Let CompilerBroker disable further compilations.557record_failure("Not enough space for scratch buffer in CodeCache");558return;559}560}561562// Initialize the relocation buffers563relocInfo* locs_buf = (relocInfo*) blob->content_end() - MAX_locs_size;564set_scratch_locs_memory(locs_buf);565}566567568//-----------------------scratch_emit_size-------------------------------------569// Helper function that computes size by emitting code570uint Compile::scratch_emit_size(const Node* n) {571// Start scratch_emit_size section.572set_in_scratch_emit_size(true);573574// Emit into a trash buffer and count bytes emitted.575// This is a pretty expensive way to compute a size,576// but it works well enough if seldom used.577// All common fixed-size instructions are given a size578// method by the AD file.579// Note that the scratch buffer blob and locs memory are580// allocated at the beginning of the compile task, and581// may be shared by several calls to scratch_emit_size.582// The allocation of the scratch buffer blob is particularly583// expensive, since it has to grab the code cache lock.584BufferBlob* blob = this->scratch_buffer_blob();585assert(blob != NULL, "Initialize BufferBlob at start");586assert(blob->size() > MAX_inst_size, "sanity");587relocInfo* locs_buf = scratch_locs_memory();588address blob_begin = blob->content_begin();589address blob_end = (address)locs_buf;590assert(blob->content_contains(blob_end), "sanity");591CodeBuffer buf(blob_begin, blob_end - blob_begin);592buf.initialize_consts_size(_scratch_const_size);593buf.initialize_stubs_size(MAX_stubs_size);594assert(locs_buf != NULL, "sanity");595int lsize = MAX_locs_size / 3;596buf.consts()->initialize_shared_locs(&locs_buf[lsize * 0], lsize);597buf.insts()->initialize_shared_locs( &locs_buf[lsize * 1], lsize);598buf.stubs()->initialize_shared_locs( &locs_buf[lsize * 2], lsize);599600// Do the emission.601602Label fakeL; // Fake label for branch instructions.603Label* saveL = NULL;604uint save_bnum = 0;605bool is_branch = n->is_MachBranch();606if (is_branch) {607MacroAssembler masm(&buf);608masm.bind(fakeL);609n->as_MachBranch()->save_label(&saveL, &save_bnum);610n->as_MachBranch()->label_set(&fakeL, 0);611}612n->emit(buf, this->regalloc());613614// Emitting into the scratch buffer should not fail615assert (!failing(), err_msg_res("Must not have pending failure. Reason is: %s", failure_reason()));616617if (is_branch) // Restore label.618n->as_MachBranch()->label_set(saveL, save_bnum);619620// End scratch_emit_size section.621set_in_scratch_emit_size(false);622623return buf.insts_size();624}625626627// ============================================================================628//------------------------------Compile standard-------------------------------629debug_only( int Compile::_debug_idx = 100000; )630631// Compile a method. entry_bci is -1 for normal compilations and indicates632// the continuation bci for on stack replacement.633634635Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci,636bool subsume_loads, bool do_escape_analysis, bool eliminate_boxing )637: Phase(Compiler),638_env(ci_env),639_log(ci_env->log()),640_compile_id(ci_env->compile_id()),641_save_argument_registers(false),642_stub_name(NULL),643_stub_function(NULL),644_stub_entry_point(NULL),645_method(target),646_entry_bci(osr_bci),647_initial_gvn(NULL),648_for_igvn(NULL),649_warm_calls(NULL),650_subsume_loads(subsume_loads),651_do_escape_analysis(do_escape_analysis),652_eliminate_boxing(eliminate_boxing),653_failure_reason(NULL),654_code_buffer("Compile::Fill_buffer"),655_orig_pc_slot(0),656_orig_pc_slot_offset_in_bytes(0),657_has_method_handle_invokes(false),658_mach_constant_base_node(NULL),659_node_bundling_limit(0),660_node_bundling_base(NULL),661_java_calls(0),662_inner_loops(0),663_scratch_const_size(-1),664_in_scratch_emit_size(false),665_dead_node_list(comp_arena()),666_dead_node_count(0),667#ifndef PRODUCT668_trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),669_in_dump_cnt(0),670_printer(IdealGraphPrinter::printer()),671#endif672_congraph(NULL),673_comp_arena(mtCompiler),674_node_arena(mtCompiler),675_old_arena(mtCompiler),676_Compile_types(mtCompiler),677_replay_inline_data(NULL),678_late_inlines(comp_arena(), 2, 0, NULL),679_string_late_inlines(comp_arena(), 2, 0, NULL),680_boxing_late_inlines(comp_arena(), 2, 0, NULL),681_late_inlines_pos(0),682_number_of_mh_late_inlines(0),683_inlining_progress(false),684_inlining_incrementally(false),685_print_inlining_list(NULL),686_print_inlining_idx(0),687_interpreter_frame_size(0),688_max_node_limit(MaxNodeLimit) {689C = this;690691CompileWrapper cw(this);692#ifndef PRODUCT693if (TimeCompiler2) {694tty->print(" ");695target->holder()->name()->print();696tty->print(".");697target->print_short_name();698tty->print(" ");699}700TraceTime t1("Total compilation time", &_t_totalCompilation, TimeCompiler, TimeCompiler2);701TraceTime t2(NULL, &_t_methodCompilation, TimeCompiler, false);702bool print_opto_assembly = PrintOptoAssembly || _method->has_option("PrintOptoAssembly");703if (!print_opto_assembly) {704bool print_assembly = (PrintAssembly || _method->should_print_assembly());705if (print_assembly && !Disassembler::can_decode()) {706tty->print_cr("PrintAssembly request changed to PrintOptoAssembly");707print_opto_assembly = true;708}709}710set_print_assembly(print_opto_assembly);711set_parsed_irreducible_loop(false);712713if (method()->has_option("ReplayInline")) {714_replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level());715}716#endif717set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining));718set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics"));719set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it720721if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) {722// Make sure the method being compiled gets its own MDO,723// so we can at least track the decompile_count().724// Need MDO to record RTM code generation state.725method()->ensure_method_data();726}727728Init(::AliasLevel);729730731print_compile_messages();732733_ilt = InlineTree::build_inline_tree_root();734735// Even if NO memory addresses are used, MergeMem nodes must have at least 1 slice736assert(num_alias_types() >= AliasIdxRaw, "");737738#define MINIMUM_NODE_HASH 1023739// Node list that Iterative GVN will start with740Unique_Node_List for_igvn(comp_arena());741set_for_igvn(&for_igvn);742743// GVN that will be run immediately on new nodes744uint estimated_size = method()->code_size()*4+64;745estimated_size = (estimated_size < MINIMUM_NODE_HASH ? MINIMUM_NODE_HASH : estimated_size);746PhaseGVN gvn(node_arena(), estimated_size);747set_initial_gvn(&gvn);748749if (print_inlining() || print_intrinsics()) {750_print_inlining_list = new (comp_arena())GrowableArray<PrintInliningBuffer>(comp_arena(), 1, 1, PrintInliningBuffer());751}752{ // Scope for timing the parser753TracePhase t3("parse", &_t_parser, true);754755// Put top into the hash table ASAP.756initial_gvn()->transform_no_reclaim(top());757758// Set up tf(), start(), and find a CallGenerator.759CallGenerator* cg = NULL;760if (is_osr_compilation()) {761const TypeTuple *domain = StartOSRNode::osr_domain();762const TypeTuple *range = TypeTuple::make_range(method()->signature());763init_tf(TypeFunc::make(domain, range));764StartNode* s = new (this) StartOSRNode(root(), domain);765initial_gvn()->set_type_bottom(s);766init_start(s);767cg = CallGenerator::for_osr(method(), entry_bci());768} else {769// Normal case.770init_tf(TypeFunc::make(method()));771StartNode* s = new (this) StartNode(root(), tf()->domain());772initial_gvn()->set_type_bottom(s);773init_start(s);774if (method()->intrinsic_id() == vmIntrinsics::_Reference_get && UseG1GC) {775// With java.lang.ref.reference.get() we must go through the776// intrinsic when G1 is enabled - even when get() is the root777// method of the compile - so that, if necessary, the value in778// the referent field of the reference object gets recorded by779// the pre-barrier code.780// Specifically, if G1 is enabled, the value in the referent781// field is recorded by the G1 SATB pre barrier. This will782// result in the referent being marked live and the reference783// object removed from the list of discovered references during784// reference processing.785cg = find_intrinsic(method(), false);786}787if (cg == NULL) {788float past_uses = method()->interpreter_invocation_count();789float expected_uses = past_uses;790cg = CallGenerator::for_inline(method(), expected_uses);791}792}793if (failing()) return;794if (cg == NULL) {795record_method_not_compilable_all_tiers("cannot parse method");796return;797}798JVMState* jvms = build_start_state(start(), tf());799if ((jvms = cg->generate(jvms)) == NULL) {800if (!failure_reason_is(C2Compiler::retry_class_loading_during_parsing())) {801record_method_not_compilable("method parse failed");802}803return;804}805GraphKit kit(jvms);806807if (!kit.stopped()) {808// Accept return values, and transfer control we know not where.809// This is done by a special, unique ReturnNode bound to root.810return_values(kit.jvms());811}812813if (kit.has_exceptions()) {814// Any exceptions that escape from this call must be rethrown815// to whatever caller is dynamically above us on the stack.816// This is done by a special, unique RethrowNode bound to root.817rethrow_exceptions(kit.transfer_exceptions_into_jvms());818}819820assert(IncrementalInline || (_late_inlines.length() == 0 && !has_mh_late_inlines()), "incremental inlining is off");821822if (_late_inlines.length() == 0 && !has_mh_late_inlines() && !failing() && has_stringbuilder()) {823inline_string_calls(true);824}825826if (failing()) return;827828print_method(PHASE_BEFORE_REMOVEUSELESS, 3);829830// Remove clutter produced by parsing.831if (!failing()) {832ResourceMark rm;833PhaseRemoveUseless pru(initial_gvn(), &for_igvn);834}835}836837// Note: Large methods are capped off in do_one_bytecode().838if (failing()) return;839840// After parsing, node notes are no longer automagic.841// They must be propagated by register_new_node_with_optimizer(),842// clone(), or the like.843set_default_node_notes(NULL);844845for (;;) {846int successes = Inline_Warm();847if (failing()) return;848if (successes == 0) break;849}850851// Drain the list.852Finish_Warm();853#ifndef PRODUCT854if (_printer) {855_printer->print_inlining(this);856}857#endif858859if (failing()) return;860NOT_PRODUCT( verify_graph_edges(); )861862// Now optimize863Optimize();864if (failing()) return;865NOT_PRODUCT( verify_graph_edges(); )866867#ifndef PRODUCT868if (PrintIdeal) {869ttyLocker ttyl; // keep the following output all in one block870// This output goes directly to the tty, not the compiler log.871// To enable tools to match it up with the compilation activity,872// be sure to tag this tty output with the compile ID.873if (xtty != NULL) {874xtty->head("ideal compile_id='%d'%s", compile_id(),875is_osr_compilation() ? " compile_kind='osr'" :876"");877}878root()->dump(9999);879if (xtty != NULL) {880xtty->tail("ideal");881}882}883#endif884885NOT_PRODUCT( verify_barriers(); )886887// Dump compilation data to replay it.888if (method()->has_option("DumpReplay")) {889env()->dump_replay_data(_compile_id);890}891if (method()->has_option("DumpInline") && (ilt() != NULL)) {892env()->dump_inline_data(_compile_id);893}894895// Now that we know the size of all the monitors we can add a fixed slot896// for the original deopt pc.897898_orig_pc_slot = fixed_slots();899int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size);900set_fixed_slots(next_slot);901902// Compute when to use implicit null checks. Used by matching trap based903// nodes and NullCheck optimization.904set_allowed_deopt_reasons();905906// Now generate code907Code_Gen();908if (failing()) return;909910// Check if we want to skip execution of all compiled code.911{912#ifndef PRODUCT913if (OptoNoExecute) {914record_method_not_compilable("+OptoNoExecute"); // Flag as failed915return;916}917TracePhase t2("install_code", &_t_registerMethod, TimeCompiler);918#endif919920if (is_osr_compilation()) {921_code_offsets.set_value(CodeOffsets::Verified_Entry, 0);922_code_offsets.set_value(CodeOffsets::OSR_Entry, _first_block_size);923} else {924_code_offsets.set_value(CodeOffsets::Verified_Entry, _first_block_size);925_code_offsets.set_value(CodeOffsets::OSR_Entry, 0);926}927928env()->register_method(_method, _entry_bci,929&_code_offsets,930_orig_pc_slot_offset_in_bytes,931code_buffer(),932frame_size_in_words(), _oop_map_set,933&_handler_table, &_inc_table,934compiler,935env()->comp_level(),936has_unsafe_access(),937SharedRuntime::is_wide_vector(max_vector_size()),938rtm_state()939);940941if (log() != NULL) // Print code cache state into compiler log942log()->code_cache_state();943}944}945946//------------------------------Compile----------------------------------------947// Compile a runtime stub948Compile::Compile( ciEnv* ci_env,949TypeFunc_generator generator,950address stub_function,951const char *stub_name,952int is_fancy_jump,953bool pass_tls,954bool save_arg_registers,955bool return_pc )956: Phase(Compiler),957_env(ci_env),958_log(ci_env->log()),959_compile_id(0),960_save_argument_registers(save_arg_registers),961_method(NULL),962_stub_name(stub_name),963_stub_function(stub_function),964_stub_entry_point(NULL),965_entry_bci(InvocationEntryBci),966_initial_gvn(NULL),967_for_igvn(NULL),968_warm_calls(NULL),969_orig_pc_slot(0),970_orig_pc_slot_offset_in_bytes(0),971_subsume_loads(true),972_do_escape_analysis(false),973_eliminate_boxing(false),974_failure_reason(NULL),975_code_buffer("Compile::Fill_buffer"),976_has_method_handle_invokes(false),977_mach_constant_base_node(NULL),978_node_bundling_limit(0),979_node_bundling_base(NULL),980_java_calls(0),981_inner_loops(0),982#ifndef PRODUCT983_trace_opto_output(TraceOptoOutput),984_in_dump_cnt(0),985_printer(NULL),986#endif987_comp_arena(mtCompiler),988_node_arena(mtCompiler),989_old_arena(mtCompiler),990_Compile_types(mtCompiler),991_dead_node_list(comp_arena()),992_dead_node_count(0),993_congraph(NULL),994_replay_inline_data(NULL),995_number_of_mh_late_inlines(0),996_inlining_progress(false),997_inlining_incrementally(false),998_print_inlining_list(NULL),999_print_inlining_idx(0),1000_allowed_reasons(0),1001_interpreter_frame_size(0),1002_max_node_limit(MaxNodeLimit) {1003C = this;10041005#ifndef PRODUCT1006TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);1007TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);1008set_print_assembly(PrintFrameConverterAssembly);1009set_parsed_irreducible_loop(false);1010#endif1011set_has_irreducible_loop(false); // no loops10121013CompileWrapper cw(this);1014Init(/*AliasLevel=*/ 0);1015init_tf((*generator)());10161017{1018// The following is a dummy for the sake of GraphKit::gen_stub1019Unique_Node_List for_igvn(comp_arena());1020set_for_igvn(&for_igvn); // not used, but some GraphKit guys push on this1021PhaseGVN gvn(Thread::current()->resource_area(),255);1022set_initial_gvn(&gvn); // not significant, but GraphKit guys use it pervasively1023gvn.transform_no_reclaim(top());10241025GraphKit kit;1026kit.gen_stub(stub_function, stub_name, is_fancy_jump, pass_tls, return_pc);1027}10281029NOT_PRODUCT( verify_graph_edges(); )1030Code_Gen();1031if (failing()) return;103210331034// Entry point will be accessed using compile->stub_entry_point();1035if (code_buffer() == NULL) {1036Matcher::soft_match_failure();1037} else {1038if (PrintAssembly && (WizardMode || Verbose))1039tty->print_cr("### Stub::%s", stub_name);10401041if (!failing()) {1042assert(_fixed_slots == 0, "no fixed slots used for runtime stubs");10431044// Make the NMethod1045// For now we mark the frame as never safe for profile stackwalking1046RuntimeStub *rs = RuntimeStub::new_runtime_stub(stub_name,1047code_buffer(),1048CodeOffsets::frame_never_safe,1049// _code_offsets.value(CodeOffsets::Frame_Complete),1050frame_size_in_words(),1051_oop_map_set,1052save_arg_registers);1053assert(rs != NULL && rs->is_runtime_stub(), "sanity check");10541055_stub_entry_point = rs->entry_point();1056}1057}1058}10591060//------------------------------Init-------------------------------------------1061// Prepare for a single compilation1062void Compile::Init(int aliaslevel) {1063_unique = 0;1064_regalloc = NULL;10651066_tf = NULL; // filled in later1067_top = NULL; // cached later1068_matcher = NULL; // filled in later1069_cfg = NULL; // filled in later10701071set_24_bit_selection_and_mode(Use24BitFP, false);10721073_node_note_array = NULL;1074_default_node_notes = NULL;10751076_immutable_memory = NULL; // filled in at first inquiry10771078// Globally visible Nodes1079// First set TOP to NULL to give safe behavior during creation of RootNode1080set_cached_top_node(NULL);1081set_root(new (this) RootNode());1082// Now that you have a Root to point to, create the real TOP1083set_cached_top_node( new (this) ConNode(Type::TOP) );1084set_recent_alloc(NULL, NULL);10851086// Create Debug Information Recorder to record scopes, oopmaps, etc.1087env()->set_oop_recorder(new OopRecorder(env()->arena()));1088env()->set_debug_info(new DebugInformationRecorder(env()->oop_recorder()));1089env()->set_dependencies(new Dependencies(env()));10901091_fixed_slots = 0;1092set_has_split_ifs(false);1093set_has_loops(has_method() && method()->has_loops()); // first approximation1094set_has_stringbuilder(false);1095set_has_boxed_value(false);1096_trap_can_recompile = false; // no traps emitted yet1097_major_progress = true; // start out assuming good things will happen1098set_has_unsafe_access(false);1099set_max_vector_size(0);1100Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));1101set_decompile_count(0);11021103set_do_freq_based_layout(BlockLayoutByFrequency || method_has_option("BlockLayoutByFrequency"));1104set_num_loop_opts(LoopOptsCount);1105set_do_inlining(Inline);1106set_max_inline_size(MaxInlineSize);1107set_freq_inline_size(FreqInlineSize);1108set_do_scheduling(OptoScheduling);1109set_do_count_invocations(false);1110set_do_method_data_update(false);1111set_rtm_state(NoRTM); // No RTM lock eliding by default1112method_has_option_value("MaxNodeLimit", _max_node_limit);1113#if INCLUDE_RTM_OPT1114if (UseRTMLocking && has_method() && (method()->method_data_or_null() != NULL)) {1115int rtm_state = method()->method_data()->rtm_state();1116if (method_has_option("NoRTMLockEliding") || ((rtm_state & NoRTM) != 0)) {1117// Don't generate RTM lock eliding code.1118set_rtm_state(NoRTM);1119} else if (method_has_option("UseRTMLockEliding") || ((rtm_state & UseRTM) != 0) || !UseRTMDeopt) {1120// Generate RTM lock eliding code without abort ratio calculation code.1121set_rtm_state(UseRTM);1122} else if (UseRTMDeopt) {1123// Generate RTM lock eliding code and include abort ratio calculation1124// code if UseRTMDeopt is on.1125set_rtm_state(ProfileRTM);1126}1127}1128#endif1129if (debug_info()->recording_non_safepoints()) {1130set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*>1131(comp_arena(), 8, 0, NULL));1132set_default_node_notes(Node_Notes::make(this));1133}11341135// // -- Initialize types before each compile --1136// // Update cached type information1137// if( _method && _method->constants() )1138// Type::update_loaded_types(_method, _method->constants());11391140// Init alias_type map.1141if (!_do_escape_analysis && aliaslevel == 3)1142aliaslevel = 2; // No unique types without escape analysis1143_AliasLevel = aliaslevel;1144const int grow_ats = 16;1145_max_alias_types = grow_ats;1146_alias_types = NEW_ARENA_ARRAY(comp_arena(), AliasType*, grow_ats);1147AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType, grow_ats);1148Copy::zero_to_bytes(ats, sizeof(AliasType)*grow_ats);1149{1150for (int i = 0; i < grow_ats; i++) _alias_types[i] = &ats[i];1151}1152// Initialize the first few types.1153_alias_types[AliasIdxTop]->Init(AliasIdxTop, NULL);1154_alias_types[AliasIdxBot]->Init(AliasIdxBot, TypePtr::BOTTOM);1155_alias_types[AliasIdxRaw]->Init(AliasIdxRaw, TypeRawPtr::BOTTOM);1156_num_alias_types = AliasIdxRaw+1;1157// Zero out the alias type cache.1158Copy::zero_to_bytes(_alias_cache, sizeof(_alias_cache));1159// A NULL adr_type hits in the cache right away. Preload the right answer.1160probe_alias_cache(NULL)->_index = AliasIdxTop;11611162_intrinsics = NULL;1163_macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);1164_predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);1165_expensive_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);1166_range_check_casts = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);1167register_library_intrinsics();1168#ifdef ASSERT1169_type_verify_symmetry = true;1170#endif1171}11721173//---------------------------init_start----------------------------------------1174// Install the StartNode on this compile object.1175void Compile::init_start(StartNode* s) {1176if (failing())1177return; // already failing1178assert(s == start(), "");1179}11801181StartNode* Compile::start() const {1182assert(!failing(), "");1183for (DUIterator_Fast imax, i = root()->fast_outs(imax); i < imax; i++) {1184Node* start = root()->fast_out(i);1185if( start->is_Start() )1186return start->as_Start();1187}1188fatal("Did not find Start node!");1189return NULL;1190}11911192//-------------------------------immutable_memory-------------------------------------1193// Access immutable memory1194Node* Compile::immutable_memory() {1195if (_immutable_memory != NULL) {1196return _immutable_memory;1197}1198StartNode* s = start();1199for (DUIterator_Fast imax, i = s->fast_outs(imax); true; i++) {1200Node *p = s->fast_out(i);1201if (p != s && p->as_Proj()->_con == TypeFunc::Memory) {1202_immutable_memory = p;1203return _immutable_memory;1204}1205}1206ShouldNotReachHere();1207return NULL;1208}12091210//----------------------set_cached_top_node------------------------------------1211// Install the cached top node, and make sure Node::is_top works correctly.1212void Compile::set_cached_top_node(Node* tn) {1213if (tn != NULL) verify_top(tn);1214Node* old_top = _top;1215_top = tn;1216// Calling Node::setup_is_top allows the nodes the chance to adjust1217// their _out arrays.1218if (_top != NULL) _top->setup_is_top();1219if (old_top != NULL) old_top->setup_is_top();1220assert(_top == NULL || top()->is_top(), "");1221}12221223#ifdef ASSERT1224uint Compile::count_live_nodes_by_graph_walk() {1225Unique_Node_List useful(comp_arena());1226// Get useful node list by walking the graph.1227identify_useful_nodes(useful);1228return useful.size();1229}12301231void Compile::print_missing_nodes() {12321233// Return if CompileLog is NULL and PrintIdealNodeCount is false.1234if ((_log == NULL) && (! PrintIdealNodeCount)) {1235return;1236}12371238// This is an expensive function. It is executed only when the user1239// specifies VerifyIdealNodeCount option or otherwise knows the1240// additional work that needs to be done to identify reachable nodes1241// by walking the flow graph and find the missing ones using1242// _dead_node_list.12431244Unique_Node_List useful(comp_arena());1245// Get useful node list by walking the graph.1246identify_useful_nodes(useful);12471248uint l_nodes = C->live_nodes();1249uint l_nodes_by_walk = useful.size();12501251if (l_nodes != l_nodes_by_walk) {1252if (_log != NULL) {1253_log->begin_head("mismatched_nodes count='%d'", abs((int) (l_nodes - l_nodes_by_walk)));1254_log->stamp();1255_log->end_head();1256}1257VectorSet& useful_member_set = useful.member_set();1258int last_idx = l_nodes_by_walk;1259for (int i = 0; i < last_idx; i++) {1260if (useful_member_set.test(i)) {1261if (_dead_node_list.test(i)) {1262if (_log != NULL) {1263_log->elem("mismatched_node_info node_idx='%d' type='both live and dead'", i);1264}1265if (PrintIdealNodeCount) {1266// Print the log message to tty1267tty->print_cr("mismatched_node idx='%d' both live and dead'", i);1268useful.at(i)->dump();1269}1270}1271}1272else if (! _dead_node_list.test(i)) {1273if (_log != NULL) {1274_log->elem("mismatched_node_info node_idx='%d' type='neither live nor dead'", i);1275}1276if (PrintIdealNodeCount) {1277// Print the log message to tty1278tty->print_cr("mismatched_node idx='%d' type='neither live nor dead'", i);1279}1280}1281}1282if (_log != NULL) {1283_log->tail("mismatched_nodes");1284}1285}1286}1287#endif12881289#ifndef PRODUCT1290void Compile::verify_top(Node* tn) const {1291if (tn != NULL) {1292assert(tn->is_Con(), "top node must be a constant");1293assert(((ConNode*)tn)->type() == Type::TOP, "top node must have correct type");1294assert(tn->in(0) != NULL, "must have live top node");1295}1296}1297#endif129812991300///-------------------Managing Per-Node Debug & Profile Info-------------------13011302void Compile::grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by) {1303guarantee(arr != NULL, "");1304int num_blocks = arr->length();1305if (grow_by < num_blocks) grow_by = num_blocks;1306int num_notes = grow_by * _node_notes_block_size;1307Node_Notes* notes = NEW_ARENA_ARRAY(node_arena(), Node_Notes, num_notes);1308Copy::zero_to_bytes(notes, num_notes * sizeof(Node_Notes));1309while (num_notes > 0) {1310arr->append(notes);1311notes += _node_notes_block_size;1312num_notes -= _node_notes_block_size;1313}1314assert(num_notes == 0, "exact multiple, please");1315}13161317bool Compile::copy_node_notes_to(Node* dest, Node* source) {1318if (source == NULL || dest == NULL) return false;13191320if (dest->is_Con())1321return false; // Do not push debug info onto constants.13221323#ifdef ASSERT1324// Leave a bread crumb trail pointing to the original node:1325if (dest != NULL && dest != source && dest->debug_orig() == NULL) {1326dest->set_debug_orig(source);1327}1328#endif13291330if (node_note_array() == NULL)1331return false; // Not collecting any notes now.13321333// This is a copy onto a pre-existing node, which may already have notes.1334// If both nodes have notes, do not overwrite any pre-existing notes.1335Node_Notes* source_notes = node_notes_at(source->_idx);1336if (source_notes == NULL || source_notes->is_clear()) return false;1337Node_Notes* dest_notes = node_notes_at(dest->_idx);1338if (dest_notes == NULL || dest_notes->is_clear()) {1339return set_node_notes_at(dest->_idx, source_notes);1340}13411342Node_Notes merged_notes = (*source_notes);1343// The order of operations here ensures that dest notes will win...1344merged_notes.update_from(dest_notes);1345return set_node_notes_at(dest->_idx, &merged_notes);1346}134713481349//--------------------------allow_range_check_smearing-------------------------1350// Gating condition for coalescing similar range checks.1351// Sometimes we try 'speculatively' replacing a series of a range checks by a1352// single covering check that is at least as strong as any of them.1353// If the optimization succeeds, the simplified (strengthened) range check1354// will always succeed. If it fails, we will deopt, and then give up1355// on the optimization.1356bool Compile::allow_range_check_smearing() const {1357// If this method has already thrown a range-check,1358// assume it was because we already tried range smearing1359// and it failed.1360uint already_trapped = trap_count(Deoptimization::Reason_range_check);1361return !already_trapped;1362}136313641365//------------------------------flatten_alias_type-----------------------------1366const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const {1367int offset = tj->offset();1368TypePtr::PTR ptr = tj->ptr();13691370// Known instance (scalarizable allocation) alias only with itself.1371bool is_known_inst = tj->isa_oopptr() != NULL &&1372tj->is_oopptr()->is_known_instance();13731374// Process weird unsafe references.1375if (offset == Type::OffsetBot && (tj->isa_instptr() /*|| tj->isa_klassptr()*/)) {1376assert(InlineUnsafeOps, "indeterminate pointers come only from unsafe ops");1377assert(!is_known_inst, "scalarizable allocation should not have unsafe references");1378tj = TypeOopPtr::BOTTOM;1379ptr = tj->ptr();1380offset = tj->offset();1381}13821383// Array pointers need some flattening1384const TypeAryPtr *ta = tj->isa_aryptr();1385if (ta && ta->is_stable()) {1386// Erase stability property for alias analysis.1387tj = ta = ta->cast_to_stable(false);1388}1389if( ta && is_known_inst ) {1390if ( offset != Type::OffsetBot &&1391offset > arrayOopDesc::length_offset_in_bytes() ) {1392offset = Type::OffsetBot; // Flatten constant access into array body only1393tj = ta = TypeAryPtr::make(ptr, ta->ary(), ta->klass(), true, offset, ta->instance_id());1394}1395} else if( ta && _AliasLevel >= 2 ) {1396// For arrays indexed by constant indices, we flatten the alias1397// space to include all of the array body. Only the header, klass1398// and array length can be accessed un-aliased.1399if( offset != Type::OffsetBot ) {1400if( ta->const_oop() ) { // MethodData* or Method*1401offset = Type::OffsetBot; // Flatten constant access into array body1402tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),ta->ary(),ta->klass(),false,offset);1403} else if( offset == arrayOopDesc::length_offset_in_bytes() ) {1404// range is OK as-is.1405tj = ta = TypeAryPtr::RANGE;1406} else if( offset == oopDesc::klass_offset_in_bytes() ) {1407tj = TypeInstPtr::KLASS; // all klass loads look alike1408ta = TypeAryPtr::RANGE; // generic ignored junk1409ptr = TypePtr::BotPTR;1410} else if( offset == oopDesc::mark_offset_in_bytes() ) {1411tj = TypeInstPtr::MARK;1412ta = TypeAryPtr::RANGE; // generic ignored junk1413ptr = TypePtr::BotPTR;1414} else { // Random constant offset into array body1415offset = Type::OffsetBot; // Flatten constant access into array body1416tj = ta = TypeAryPtr::make(ptr,ta->ary(),ta->klass(),false,offset);1417}1418}1419// Arrays of fixed size alias with arrays of unknown size.1420if (ta->size() != TypeInt::POS) {1421const TypeAry *tary = TypeAry::make(ta->elem(), TypeInt::POS);1422tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,ta->klass(),false,offset);1423}1424// Arrays of known objects become arrays of unknown objects.1425if (ta->elem()->isa_narrowoop() && ta->elem() != TypeNarrowOop::BOTTOM) {1426const TypeAry *tary = TypeAry::make(TypeNarrowOop::BOTTOM, ta->size());1427tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);1428}1429if (ta->elem()->isa_oopptr() && ta->elem() != TypeInstPtr::BOTTOM) {1430const TypeAry *tary = TypeAry::make(TypeInstPtr::BOTTOM, ta->size());1431tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);1432}1433// Arrays of bytes and of booleans both use 'bastore' and 'baload' so1434// cannot be distinguished by bytecode alone.1435if (ta->elem() == TypeInt::BOOL) {1436const TypeAry *tary = TypeAry::make(TypeInt::BYTE, ta->size());1437ciKlass* aklass = ciTypeArrayKlass::make(T_BYTE);1438tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,aklass,false,offset);1439}1440// During the 2nd round of IterGVN, NotNull castings are removed.1441// Make sure the Bottom and NotNull variants alias the same.1442// Also, make sure exact and non-exact variants alias the same.1443if (ptr == TypePtr::NotNull || ta->klass_is_exact() || ta->speculative() != NULL) {1444tj = ta = TypeAryPtr::make(TypePtr::BotPTR,ta->ary(),ta->klass(),false,offset);1445}1446}14471448// Oop pointers need some flattening1449const TypeInstPtr *to = tj->isa_instptr();1450if( to && _AliasLevel >= 2 && to != TypeOopPtr::BOTTOM ) {1451ciInstanceKlass *k = to->klass()->as_instance_klass();1452if( ptr == TypePtr::Constant ) {1453if (to->klass() != ciEnv::current()->Class_klass() ||1454offset < k->size_helper() * wordSize) {1455// No constant oop pointers (such as Strings); they alias with1456// unknown strings.1457assert(!is_known_inst, "not scalarizable allocation");1458tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);1459}1460} else if( is_known_inst ) {1461tj = to; // Keep NotNull and klass_is_exact for instance type1462} else if( ptr == TypePtr::NotNull || to->klass_is_exact() ) {1463// During the 2nd round of IterGVN, NotNull castings are removed.1464// Make sure the Bottom and NotNull variants alias the same.1465// Also, make sure exact and non-exact variants alias the same.1466tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);1467}1468if (to->speculative() != NULL) {1469tj = to = TypeInstPtr::make(to->ptr(),to->klass(),to->klass_is_exact(),to->const_oop(),to->offset(), to->instance_id());1470}1471// Canonicalize the holder of this field1472if (offset >= 0 && offset < instanceOopDesc::base_offset_in_bytes()) {1473// First handle header references such as a LoadKlassNode, even if the1474// object's klass is unloaded at compile time (4965979).1475if (!is_known_inst) { // Do it only for non-instance types1476tj = to = TypeInstPtr::make(TypePtr::BotPTR, env()->Object_klass(), false, NULL, offset);1477}1478} else if (offset < 0 || offset >= k->size_helper() * wordSize) {1479// Static fields are in the space above the normal instance1480// fields in the java.lang.Class instance.1481if (to->klass() != ciEnv::current()->Class_klass()) {1482to = NULL;1483tj = TypeOopPtr::BOTTOM;1484offset = tj->offset();1485}1486} else {1487ciInstanceKlass *canonical_holder = k->get_canonical_holder(offset);1488if (!k->equals(canonical_holder) || tj->offset() != offset) {1489if( is_known_inst ) {1490tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, true, NULL, offset, to->instance_id());1491} else {1492tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, false, NULL, offset);1493}1494}1495}1496}14971498// Klass pointers to object array klasses need some flattening1499const TypeKlassPtr *tk = tj->isa_klassptr();1500if( tk ) {1501// If we are referencing a field within a Klass, we need1502// to assume the worst case of an Object. Both exact and1503// inexact types must flatten to the same alias class so1504// use NotNull as the PTR.1505if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) {15061507tj = tk = TypeKlassPtr::make(TypePtr::NotNull,1508TypeKlassPtr::OBJECT->klass(),1509offset);1510}15111512ciKlass* klass = tk->klass();1513if( klass->is_obj_array_klass() ) {1514ciKlass* k = TypeAryPtr::OOPS->klass();1515if( !k || !k->is_loaded() ) // Only fails for some -Xcomp runs1516k = TypeInstPtr::BOTTOM->klass();1517tj = tk = TypeKlassPtr::make( TypePtr::NotNull, k, offset );1518}15191520// Check for precise loads from the primary supertype array and force them1521// to the supertype cache alias index. Check for generic array loads from1522// the primary supertype array and also force them to the supertype cache1523// alias index. Since the same load can reach both, we need to merge1524// these 2 disparate memories into the same alias class. Since the1525// primary supertype array is read-only, there's no chance of confusion1526// where we bypass an array load and an array store.1527int primary_supers_offset = in_bytes(Klass::primary_supers_offset());1528if (offset == Type::OffsetBot ||1529(offset >= primary_supers_offset &&1530offset < (int)(primary_supers_offset + Klass::primary_super_limit() * wordSize)) ||1531offset == (int)in_bytes(Klass::secondary_super_cache_offset())) {1532offset = in_bytes(Klass::secondary_super_cache_offset());1533tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset );1534}1535}15361537// Flatten all Raw pointers together.1538if (tj->base() == Type::RawPtr)1539tj = TypeRawPtr::BOTTOM;15401541if (tj->base() == Type::AnyPtr)1542tj = TypePtr::BOTTOM; // An error, which the caller must check for.15431544// Flatten all to bottom for now1545switch( _AliasLevel ) {1546case 0:1547tj = TypePtr::BOTTOM;1548break;1549case 1: // Flatten to: oop, static, field or array1550switch (tj->base()) {1551//case Type::AryPtr: tj = TypeAryPtr::RANGE; break;1552case Type::RawPtr: tj = TypeRawPtr::BOTTOM; break;1553case Type::AryPtr: // do not distinguish arrays at all1554case Type::InstPtr: tj = TypeInstPtr::BOTTOM; break;1555case Type::KlassPtr: tj = TypeKlassPtr::OBJECT; break;1556case Type::AnyPtr: tj = TypePtr::BOTTOM; break; // caller checks it1557default: ShouldNotReachHere();1558}1559break;1560case 2: // No collapsing at level 2; keep all splits1561case 3: // No collapsing at level 3; keep all splits1562break;1563default:1564Unimplemented();1565}15661567offset = tj->offset();1568assert( offset != Type::OffsetTop, "Offset has fallen from constant" );15691570assert( (offset != Type::OffsetBot && tj->base() != Type::AryPtr) ||1571(offset == Type::OffsetBot && tj->base() == Type::AryPtr) ||1572(offset == Type::OffsetBot && tj == TypeOopPtr::BOTTOM) ||1573(offset == Type::OffsetBot && tj == TypePtr::BOTTOM) ||1574(offset == oopDesc::mark_offset_in_bytes() && tj->base() == Type::AryPtr) ||1575(offset == oopDesc::klass_offset_in_bytes() && tj->base() == Type::AryPtr) ||1576(offset == arrayOopDesc::length_offset_in_bytes() && tj->base() == Type::AryPtr) ,1577"For oops, klasses, raw offset must be constant; for arrays the offset is never known" );1578assert( tj->ptr() != TypePtr::TopPTR &&1579tj->ptr() != TypePtr::AnyNull &&1580tj->ptr() != TypePtr::Null, "No imprecise addresses" );1581// assert( tj->ptr() != TypePtr::Constant ||1582// tj->base() == Type::RawPtr ||1583// tj->base() == Type::KlassPtr, "No constant oop addresses" );15841585return tj;1586}15871588void Compile::AliasType::Init(int i, const TypePtr* at) {1589_index = i;1590_adr_type = at;1591_field = NULL;1592_element = NULL;1593_is_rewritable = true; // default1594const TypeOopPtr *atoop = (at != NULL) ? at->isa_oopptr() : NULL;1595if (atoop != NULL && atoop->is_known_instance()) {1596const TypeOopPtr *gt = atoop->cast_to_instance_id(TypeOopPtr::InstanceBot);1597_general_index = Compile::current()->get_alias_index(gt);1598} else {1599_general_index = 0;1600}1601}16021603BasicType Compile::AliasType::basic_type() const {1604if (element() != NULL) {1605const Type* element = adr_type()->is_aryptr()->elem();1606return element->isa_narrowoop() ? T_OBJECT : element->array_element_basic_type();1607} if (field() != NULL) {1608return field()->layout_type();1609} else {1610return T_ILLEGAL; // unknown1611}1612}16131614//---------------------------------print_on------------------------------------1615#ifndef PRODUCT1616void Compile::AliasType::print_on(outputStream* st) {1617if (index() < 10)1618st->print("@ <%d> ", index());1619else st->print("@ <%d>", index());1620st->print(is_rewritable() ? " " : " RO");1621int offset = adr_type()->offset();1622if (offset == Type::OffsetBot)1623st->print(" +any");1624else st->print(" +%-3d", offset);1625st->print(" in ");1626adr_type()->dump_on(st);1627const TypeOopPtr* tjp = adr_type()->isa_oopptr();1628if (field() != NULL && tjp) {1629if (tjp->klass() != field()->holder() ||1630tjp->offset() != field()->offset_in_bytes()) {1631st->print(" != ");1632field()->print();1633st->print(" ***");1634}1635}1636}16371638void print_alias_types() {1639Compile* C = Compile::current();1640tty->print_cr("--- Alias types, AliasIdxBot .. %d", C->num_alias_types()-1);1641for (int idx = Compile::AliasIdxBot; idx < C->num_alias_types(); idx++) {1642C->alias_type(idx)->print_on(tty);1643tty->cr();1644}1645}1646#endif164716481649//----------------------------probe_alias_cache--------------------------------1650Compile::AliasCacheEntry* Compile::probe_alias_cache(const TypePtr* adr_type) {1651intptr_t key = (intptr_t) adr_type;1652key ^= key >> logAliasCacheSize;1653return &_alias_cache[key & right_n_bits(logAliasCacheSize)];1654}165516561657//-----------------------------grow_alias_types--------------------------------1658void Compile::grow_alias_types() {1659const int old_ats = _max_alias_types; // how many before?1660const int new_ats = old_ats; // how many more?1661const int grow_ats = old_ats+new_ats; // how many now?1662_max_alias_types = grow_ats;1663_alias_types = REALLOC_ARENA_ARRAY(comp_arena(), AliasType*, _alias_types, old_ats, grow_ats);1664AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType, new_ats);1665Copy::zero_to_bytes(ats, sizeof(AliasType)*new_ats);1666for (int i = 0; i < new_ats; i++) _alias_types[old_ats+i] = &ats[i];1667}166816691670//--------------------------------find_alias_type------------------------------1671Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_create, ciField* original_field) {1672if (_AliasLevel == 0)1673return alias_type(AliasIdxBot);16741675AliasCacheEntry* ace = probe_alias_cache(adr_type);1676if (ace->_adr_type == adr_type) {1677return alias_type(ace->_index);1678}16791680// Handle special cases.1681if (adr_type == NULL) return alias_type(AliasIdxTop);1682if (adr_type == TypePtr::BOTTOM) return alias_type(AliasIdxBot);16831684// Do it the slow way.1685const TypePtr* flat = flatten_alias_type(adr_type);16861687#ifdef ASSERT1688{1689ResourceMark rm;1690assert(flat == flatten_alias_type(flat),1691err_msg("not idempotent: adr_type = %s; flat = %s => %s", Type::str(adr_type),1692Type::str(flat), Type::str(flatten_alias_type(flat))));1693assert(flat != TypePtr::BOTTOM,1694err_msg("cannot alias-analyze an untyped ptr: adr_type = %s", Type::str(adr_type)));1695if (flat->isa_oopptr() && !flat->isa_klassptr()) {1696const TypeOopPtr* foop = flat->is_oopptr();1697// Scalarizable allocations have exact klass always.1698bool exact = !foop->klass_is_exact() || foop->is_known_instance();1699const TypePtr* xoop = foop->cast_to_exactness(exact)->is_ptr();1700assert(foop == flatten_alias_type(xoop),1701err_msg("exactness must not affect alias type: foop = %s; xoop = %s",1702Type::str(foop), Type::str(xoop)));1703}1704}1705#endif17061707int idx = AliasIdxTop;1708for (int i = 0; i < num_alias_types(); i++) {1709if (alias_type(i)->adr_type() == flat) {1710idx = i;1711break;1712}1713}17141715if (idx == AliasIdxTop) {1716if (no_create) return NULL;1717// Grow the array if necessary.1718if (_num_alias_types == _max_alias_types) grow_alias_types();1719// Add a new alias type.1720idx = _num_alias_types++;1721_alias_types[idx]->Init(idx, flat);1722if (flat == TypeInstPtr::KLASS) alias_type(idx)->set_rewritable(false);1723if (flat == TypeAryPtr::RANGE) alias_type(idx)->set_rewritable(false);1724if (flat->isa_instptr()) {1725if (flat->offset() == java_lang_Class::klass_offset_in_bytes()1726&& flat->is_instptr()->klass() == env()->Class_klass())1727alias_type(idx)->set_rewritable(false);1728}1729if (flat->isa_aryptr()) {1730#ifdef ASSERT1731const int header_size_min = arrayOopDesc::base_offset_in_bytes(T_BYTE);1732// (T_BYTE has the weakest alignment and size restrictions...)1733assert(flat->offset() < header_size_min, "array body reference must be OffsetBot");1734#endif1735if (flat->offset() == TypePtr::OffsetBot) {1736alias_type(idx)->set_element(flat->is_aryptr()->elem());1737}1738}1739if (flat->isa_klassptr()) {1740if (flat->offset() == in_bytes(Klass::super_check_offset_offset()))1741alias_type(idx)->set_rewritable(false);1742if (flat->offset() == in_bytes(Klass::modifier_flags_offset()))1743alias_type(idx)->set_rewritable(false);1744if (flat->offset() == in_bytes(Klass::access_flags_offset()))1745alias_type(idx)->set_rewritable(false);1746if (flat->offset() == in_bytes(Klass::java_mirror_offset()))1747alias_type(idx)->set_rewritable(false);1748}1749// %%% (We would like to finalize JavaThread::threadObj_offset(),1750// but the base pointer type is not distinctive enough to identify1751// references into JavaThread.)17521753// Check for final fields.1754const TypeInstPtr* tinst = flat->isa_instptr();1755if (tinst && tinst->offset() >= instanceOopDesc::base_offset_in_bytes()) {1756ciField* field;1757if (tinst->const_oop() != NULL &&1758tinst->klass() == ciEnv::current()->Class_klass() &&1759tinst->offset() >= (tinst->klass()->as_instance_klass()->size_helper() * wordSize)) {1760// static field1761ciInstanceKlass* k = tinst->const_oop()->as_instance()->java_lang_Class_klass()->as_instance_klass();1762field = k->get_field_by_offset(tinst->offset(), true);1763} else {1764ciInstanceKlass *k = tinst->klass()->as_instance_klass();1765field = k->get_field_by_offset(tinst->offset(), false);1766}1767assert(field == NULL ||1768original_field == NULL ||1769(field->holder() == original_field->holder() &&1770field->offset() == original_field->offset() &&1771field->is_static() == original_field->is_static()), "wrong field?");1772// Set field() and is_rewritable() attributes.1773if (field != NULL) alias_type(idx)->set_field(field);1774}1775}17761777// Fill the cache for next time.1778ace->_adr_type = adr_type;1779ace->_index = idx;1780assert(alias_type(adr_type) == alias_type(idx), "type must be installed");17811782// Might as well try to fill the cache for the flattened version, too.1783AliasCacheEntry* face = probe_alias_cache(flat);1784if (face->_adr_type == NULL) {1785face->_adr_type = flat;1786face->_index = idx;1787assert(alias_type(flat) == alias_type(idx), "flat type must work too");1788}17891790return alias_type(idx);1791}179217931794Compile::AliasType* Compile::alias_type(ciField* field) {1795const TypeOopPtr* t;1796if (field->is_static())1797t = TypeInstPtr::make(field->holder()->java_mirror());1798else1799t = TypeOopPtr::make_from_klass_raw(field->holder());1800AliasType* atp = alias_type(t->add_offset(field->offset_in_bytes()), field);1801assert((field->is_final() || field->is_stable()) == !atp->is_rewritable(), "must get the rewritable bits correct");1802return atp;1803}180418051806//------------------------------have_alias_type--------------------------------1807bool Compile::have_alias_type(const TypePtr* adr_type) {1808AliasCacheEntry* ace = probe_alias_cache(adr_type);1809if (ace->_adr_type == adr_type) {1810return true;1811}18121813// Handle special cases.1814if (adr_type == NULL) return true;1815if (adr_type == TypePtr::BOTTOM) return true;18161817return find_alias_type(adr_type, true, NULL) != NULL;1818}18191820//-----------------------------must_alias--------------------------------------1821// True if all values of the given address type are in the given alias category.1822bool Compile::must_alias(const TypePtr* adr_type, int alias_idx) {1823if (alias_idx == AliasIdxBot) return true; // the universal category1824if (adr_type == NULL) return true; // NULL serves as TypePtr::TOP1825if (alias_idx == AliasIdxTop) return false; // the empty category1826if (adr_type->base() == Type::AnyPtr) return false; // TypePtr::BOTTOM or its twins18271828// the only remaining possible overlap is identity1829int adr_idx = get_alias_index(adr_type);1830assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");1831assert(adr_idx == alias_idx ||1832(alias_type(alias_idx)->adr_type() != TypeOopPtr::BOTTOM1833&& adr_type != TypeOopPtr::BOTTOM),1834"should not be testing for overlap with an unsafe pointer");1835return adr_idx == alias_idx;1836}18371838//------------------------------can_alias--------------------------------------1839// True if any values of the given address type are in the given alias category.1840bool Compile::can_alias(const TypePtr* adr_type, int alias_idx) {1841if (alias_idx == AliasIdxTop) return false; // the empty category1842if (adr_type == NULL) return false; // NULL serves as TypePtr::TOP1843if (alias_idx == AliasIdxBot) return true; // the universal category1844if (adr_type->base() == Type::AnyPtr) return true; // TypePtr::BOTTOM or its twins18451846// the only remaining possible overlap is identity1847int adr_idx = get_alias_index(adr_type);1848assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");1849return adr_idx == alias_idx;1850}1851185218531854//---------------------------pop_warm_call-------------------------------------1855WarmCallInfo* Compile::pop_warm_call() {1856WarmCallInfo* wci = _warm_calls;1857if (wci != NULL) _warm_calls = wci->remove_from(wci);1858return wci;1859}18601861//----------------------------Inline_Warm--------------------------------------1862int Compile::Inline_Warm() {1863// If there is room, try to inline some more warm call sites.1864// %%% Do a graph index compaction pass when we think we're out of space?1865if (!InlineWarmCalls) return 0;18661867int calls_made_hot = 0;1868int room_to_grow = NodeCountInliningCutoff - unique();1869int amount_to_grow = MIN2(room_to_grow, (int)NodeCountInliningStep);1870int amount_grown = 0;1871WarmCallInfo* call;1872while (amount_to_grow > 0 && (call = pop_warm_call()) != NULL) {1873int est_size = (int)call->size();1874if (est_size > (room_to_grow - amount_grown)) {1875// This one won't fit anyway. Get rid of it.1876call->make_cold();1877continue;1878}1879call->make_hot();1880calls_made_hot++;1881amount_grown += est_size;1882amount_to_grow -= est_size;1883}18841885if (calls_made_hot > 0) set_major_progress();1886return calls_made_hot;1887}188818891890//----------------------------Finish_Warm--------------------------------------1891void Compile::Finish_Warm() {1892if (!InlineWarmCalls) return;1893if (failing()) return;1894if (warm_calls() == NULL) return;18951896// Clean up loose ends, if we are out of space for inlining.1897WarmCallInfo* call;1898while ((call = pop_warm_call()) != NULL) {1899call->make_cold();1900}1901}19021903//---------------------cleanup_loop_predicates-----------------------1904// Remove the opaque nodes that protect the predicates so that all unused1905// checks and uncommon_traps will be eliminated from the ideal graph1906void Compile::cleanup_loop_predicates(PhaseIterGVN &igvn) {1907if (predicate_count()==0) return;1908for (int i = predicate_count(); i > 0; i--) {1909Node * n = predicate_opaque1_node(i-1);1910assert(n->Opcode() == Op_Opaque1, "must be");1911igvn.replace_node(n, n->in(1));1912}1913assert(predicate_count()==0, "should be clean!");1914}19151916void Compile::add_range_check_cast(Node* n) {1917assert(n->isa_CastII()->has_range_check(), "CastII should have range check dependency");1918assert(!_range_check_casts->contains(n), "duplicate entry in range check casts");1919_range_check_casts->append(n);1920}19211922// Remove all range check dependent CastIINodes.1923void Compile::remove_range_check_casts(PhaseIterGVN &igvn) {1924for (int i = range_check_cast_count(); i > 0; i--) {1925Node* cast = range_check_cast_node(i-1);1926assert(cast->isa_CastII()->has_range_check(), "CastII should have range check dependency");1927igvn.replace_node(cast, cast->in(1));1928}1929assert(range_check_cast_count() == 0, "should be empty");1930}19311932// StringOpts and late inlining of string methods1933void Compile::inline_string_calls(bool parse_time) {1934{1935// remove useless nodes to make the usage analysis simpler1936ResourceMark rm;1937PhaseRemoveUseless pru(initial_gvn(), for_igvn());1938}19391940{1941ResourceMark rm;1942print_method(PHASE_BEFORE_STRINGOPTS, 3);1943PhaseStringOpts pso(initial_gvn(), for_igvn());1944print_method(PHASE_AFTER_STRINGOPTS, 3);1945}19461947// now inline anything that we skipped the first time around1948if (!parse_time) {1949_late_inlines_pos = _late_inlines.length();1950}19511952while (_string_late_inlines.length() > 0) {1953CallGenerator* cg = _string_late_inlines.pop();1954cg->do_late_inline();1955if (failing()) return;1956}1957_string_late_inlines.trunc_to(0);1958}19591960// Late inlining of boxing methods1961void Compile::inline_boxing_calls(PhaseIterGVN& igvn) {1962if (_boxing_late_inlines.length() > 0) {1963assert(has_boxed_value(), "inconsistent");19641965PhaseGVN* gvn = initial_gvn();1966set_inlining_incrementally(true);19671968assert( igvn._worklist.size() == 0, "should be done with igvn" );1969for_igvn()->clear();1970gvn->replace_with(&igvn);19711972_late_inlines_pos = _late_inlines.length();19731974while (_boxing_late_inlines.length() > 0) {1975CallGenerator* cg = _boxing_late_inlines.pop();1976cg->do_late_inline();1977if (failing()) return;1978}1979_boxing_late_inlines.trunc_to(0);19801981{1982ResourceMark rm;1983PhaseRemoveUseless pru(gvn, for_igvn());1984}19851986igvn = PhaseIterGVN(gvn);1987igvn.optimize();19881989set_inlining_progress(false);1990set_inlining_incrementally(false);1991}1992}19931994void Compile::inline_incrementally_one(PhaseIterGVN& igvn) {1995assert(IncrementalInline, "incremental inlining should be on");1996PhaseGVN* gvn = initial_gvn();19971998set_inlining_progress(false);1999for_igvn()->clear();2000gvn->replace_with(&igvn);20012002int i = 0;20032004for (; i <_late_inlines.length() && !inlining_progress(); i++) {2005CallGenerator* cg = _late_inlines.at(i);2006_late_inlines_pos = i+1;2007cg->do_late_inline();2008if (failing()) return;2009}2010int j = 0;2011for (; i < _late_inlines.length(); i++, j++) {2012_late_inlines.at_put(j, _late_inlines.at(i));2013}2014_late_inlines.trunc_to(j);20152016{2017ResourceMark rm;2018PhaseRemoveUseless pru(gvn, for_igvn());2019}20202021igvn = PhaseIterGVN(gvn);2022}20232024// Perform incremental inlining until bound on number of live nodes is reached2025void Compile::inline_incrementally(PhaseIterGVN& igvn) {2026PhaseGVN* gvn = initial_gvn();20272028set_inlining_incrementally(true);2029set_inlining_progress(true);2030uint low_live_nodes = 0;20312032while(inlining_progress() && _late_inlines.length() > 0) {20332034if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {2035if (low_live_nodes < (uint)LiveNodeCountInliningCutoff * 8 / 10) {2036// PhaseIdealLoop is expensive so we only try it once we are2037// out of live nodes and we only try it again if the previous2038// helped got the number of nodes down significantly2039PhaseIdealLoop ideal_loop( igvn, false, true );2040if (failing()) return;2041low_live_nodes = live_nodes();2042_major_progress = true;2043}20442045if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {2046break;2047}2048}20492050inline_incrementally_one(igvn);20512052if (failing()) return;20532054igvn.optimize();20552056if (failing()) return;2057}20582059assert( igvn._worklist.size() == 0, "should be done with igvn" );20602061if (_string_late_inlines.length() > 0) {2062assert(has_stringbuilder(), "inconsistent");2063for_igvn()->clear();2064initial_gvn()->replace_with(&igvn);20652066inline_string_calls(false);20672068if (failing()) return;20692070{2071ResourceMark rm;2072PhaseRemoveUseless pru(initial_gvn(), for_igvn());2073}20742075igvn = PhaseIterGVN(gvn);20762077igvn.optimize();2078}20792080set_inlining_incrementally(false);2081}208220832084// Remove edges from "root" to each SafePoint at a backward branch.2085// They were inserted during parsing (see add_safepoint()) to make2086// infinite loops without calls or exceptions visible to root, i.e.,2087// useful.2088void Compile::remove_root_to_sfpts_edges() {2089Node *r = root();2090if (r != NULL) {2091for (uint i = r->req(); i < r->len(); ++i) {2092Node *n = r->in(i);2093if (n != NULL && n->is_SafePoint()) {2094r->rm_prec(i);2095--i;2096}2097}2098}2099}21002101//------------------------------Optimize---------------------------------------2102// Given a graph, optimize it.2103void Compile::Optimize() {2104TracePhase t1("optimizer", &_t_optimizer, true);21052106#ifndef PRODUCT2107if (env()->break_at_compile()) {2108BREAKPOINT;2109}21102111#endif21122113ResourceMark rm;2114int loop_opts_cnt;21152116NOT_PRODUCT( verify_graph_edges(); )21172118print_method(PHASE_AFTER_PARSING);21192120{2121// Iterative Global Value Numbering, including ideal transforms2122// Initialize IterGVN with types and values from parse-time GVN2123PhaseIterGVN igvn(initial_gvn());2124{2125NOT_PRODUCT( TracePhase t2("iterGVN", &_t_iterGVN, TimeCompiler); )2126igvn.optimize();2127}21282129print_method(PHASE_ITER_GVN1, 2);21302131if (failing()) return;21322133{2134NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )2135inline_incrementally(igvn);2136}21372138print_method(PHASE_INCREMENTAL_INLINE, 2);21392140if (failing()) return;21412142if (eliminate_boxing()) {2143NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )2144// Inline valueOf() methods now.2145inline_boxing_calls(igvn);21462147if (AlwaysIncrementalInline) {2148inline_incrementally(igvn);2149}21502151print_method(PHASE_INCREMENTAL_BOXING_INLINE, 2);21522153if (failing()) return;2154}21552156// Now that all inlining is over, cut edge from root to loop2157// safepoints2158remove_root_to_sfpts_edges();21592160// Remove the speculative part of types and clean up the graph from2161// the extra CastPP nodes whose only purpose is to carry them. Do2162// that early so that optimizations are not disrupted by the extra2163// CastPP nodes.2164remove_speculative_types(igvn);21652166// No more new expensive nodes will be added to the list from here2167// so keep only the actual candidates for optimizations.2168cleanup_expensive_nodes(igvn);21692170if (!failing() && RenumberLiveNodes && live_nodes() + NodeLimitFudgeFactor < unique()) {2171NOT_PRODUCT(Compile::TracePhase t2("", &_t_renumberLive, TimeCompiler);)2172initial_gvn()->replace_with(&igvn);2173for_igvn()->clear();2174Unique_Node_List new_worklist(C->comp_arena());2175{2176ResourceMark rm;2177PhaseRenumberLive prl = PhaseRenumberLive(initial_gvn(), for_igvn(), &new_worklist);2178}2179set_for_igvn(&new_worklist);2180igvn = PhaseIterGVN(initial_gvn());2181igvn.optimize();2182}21832184// Perform escape analysis2185if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {2186if (has_loops()) {2187// Cleanup graph (remove dead nodes).2188TracePhase t2("idealLoop", &_t_idealLoop, true);2189PhaseIdealLoop ideal_loop( igvn, false, true );2190if (major_progress()) print_method(PHASE_PHASEIDEAL_BEFORE_EA, 2);2191if (failing()) return;2192}2193ConnectionGraph::do_analysis(this, &igvn);21942195if (failing()) return;21962197// Optimize out fields loads from scalar replaceable allocations.2198igvn.optimize();2199print_method(PHASE_ITER_GVN_AFTER_EA, 2);22002201if (failing()) return;22022203if (congraph() != NULL && macro_count() > 0) {2204NOT_PRODUCT( TracePhase t2("macroEliminate", &_t_macroEliminate, TimeCompiler); )2205PhaseMacroExpand mexp(igvn);2206mexp.eliminate_macro_nodes();2207igvn.set_delay_transform(false);22082209igvn.optimize();2210print_method(PHASE_ITER_GVN_AFTER_ELIMINATION, 2);22112212if (failing()) return;2213}2214}22152216// Loop transforms on the ideal graph. Range Check Elimination,2217// peeling, unrolling, etc.22182219// Set loop opts counter2220loop_opts_cnt = num_loop_opts();2221if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {2222{2223TracePhase t2("idealLoop", &_t_idealLoop, true);2224PhaseIdealLoop ideal_loop( igvn, true );2225loop_opts_cnt--;2226if (major_progress()) print_method(PHASE_PHASEIDEALLOOP1, 2);2227if (failing()) return;2228}2229// Loop opts pass if partial peeling occurred in previous pass2230if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {2231TracePhase t3("idealLoop", &_t_idealLoop, true);2232PhaseIdealLoop ideal_loop( igvn, false );2233loop_opts_cnt--;2234if (major_progress()) print_method(PHASE_PHASEIDEALLOOP2, 2);2235if (failing()) return;2236}2237// Loop opts pass for loop-unrolling before CCP2238if(major_progress() && (loop_opts_cnt > 0)) {2239TracePhase t4("idealLoop", &_t_idealLoop, true);2240PhaseIdealLoop ideal_loop( igvn, false );2241loop_opts_cnt--;2242if (major_progress()) print_method(PHASE_PHASEIDEALLOOP3, 2);2243}2244if (!failing()) {2245// Verify that last round of loop opts produced a valid graph2246NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )2247PhaseIdealLoop::verify(igvn);2248}2249}2250if (failing()) return;22512252// Conditional Constant Propagation;2253PhaseCCP ccp( &igvn );2254assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)");2255{2256TracePhase t2("ccp", &_t_ccp, true);2257ccp.do_transform();2258}2259print_method(PHASE_CPP1, 2);22602261assert( true, "Break here to ccp.dump_old2new_map()");22622263// Iterative Global Value Numbering, including ideal transforms2264{2265NOT_PRODUCT( TracePhase t2("iterGVN2", &_t_iterGVN2, TimeCompiler); )2266igvn = ccp;2267igvn.optimize();2268}22692270print_method(PHASE_ITER_GVN2, 2);22712272if (failing()) return;22732274// Loop transforms on the ideal graph. Range Check Elimination,2275// peeling, unrolling, etc.2276if(loop_opts_cnt > 0) {2277debug_only( int cnt = 0; );2278while(major_progress() && (loop_opts_cnt > 0)) {2279TracePhase t2("idealLoop", &_t_idealLoop, true);2280assert( cnt++ < 40, "infinite cycle in loop optimization" );2281PhaseIdealLoop ideal_loop( igvn, true);2282loop_opts_cnt--;2283if (major_progress()) print_method(PHASE_PHASEIDEALLOOP_ITERATIONS, 2);2284if (failing()) return;2285}2286}22872288{2289// Verify that all previous optimizations produced a valid graph2290// at least to this point, even if no loop optimizations were done.2291NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )2292PhaseIdealLoop::verify(igvn);2293}22942295if (range_check_cast_count() > 0) {2296// No more loop optimizations. Remove all range check dependent CastIINodes.2297C->remove_range_check_casts(igvn);2298igvn.optimize();2299}23002301{2302NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); )2303PhaseMacroExpand mex(igvn);2304if (mex.expand_macro_nodes()) {2305assert(failing(), "must bail out w/ explicit message");2306return;2307}2308}23092310} // (End scope of igvn; run destructor if necessary for asserts.)23112312dump_inlining();2313// A method with only infinite loops has no edges entering loops from root2314{2315NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); )2316if (final_graph_reshaping()) {2317assert(failing(), "must bail out w/ explicit message");2318return;2319}2320}23212322print_method(PHASE_OPTIMIZE_FINISHED, 2);2323}232423252326//------------------------------Code_Gen---------------------------------------2327// Given a graph, generate code for it2328void Compile::Code_Gen() {2329if (failing()) {2330return;2331}23322333// Perform instruction selection. You might think we could reclaim Matcher2334// memory PDQ, but actually the Matcher is used in generating spill code.2335// Internals of the Matcher (including some VectorSets) must remain live2336// for awhile - thus I cannot reclaim Matcher memory lest a VectorSet usage2337// set a bit in reclaimed memory.23382339// In debug mode can dump m._nodes.dump() for mapping of ideal to machine2340// nodes. Mapping is only valid at the root of each matched subtree.2341NOT_PRODUCT( verify_graph_edges(); )23422343Matcher matcher;2344_matcher = &matcher;2345{2346TracePhase t2("matcher", &_t_matcher, true);2347matcher.match();2348}2349// In debug mode can dump m._nodes.dump() for mapping of ideal to machine2350// nodes. Mapping is only valid at the root of each matched subtree.2351NOT_PRODUCT( verify_graph_edges(); )23522353// If you have too many nodes, or if matching has failed, bail out2354check_node_count(0, "out of nodes matching instructions");2355if (failing()) {2356return;2357}23582359// Build a proper-looking CFG2360PhaseCFG cfg(node_arena(), root(), matcher);2361_cfg = &cfg;2362{2363NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )2364bool success = cfg.do_global_code_motion();2365if (!success) {2366return;2367}23682369print_method(PHASE_GLOBAL_CODE_MOTION, 2);2370NOT_PRODUCT( verify_graph_edges(); )2371debug_only( cfg.verify(); )2372}23732374PhaseChaitin regalloc(unique(), cfg, matcher);2375_regalloc = ®alloc;2376{2377TracePhase t2("regalloc", &_t_registerAllocation, true);2378// Perform register allocation. After Chaitin, use-def chains are2379// no longer accurate (at spill code) and so must be ignored.2380// Node->LRG->reg mappings are still accurate.2381_regalloc->Register_Allocate();23822383// Bail out if the allocator builds too many nodes2384if (failing()) {2385return;2386}2387}23882389// Prior to register allocation we kept empty basic blocks in case the2390// the allocator needed a place to spill. After register allocation we2391// are not adding any new instructions. If any basic block is empty, we2392// can now safely remove it.2393{2394NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )2395cfg.remove_empty_blocks();2396if (do_freq_based_layout()) {2397PhaseBlockLayout layout(cfg);2398} else {2399cfg.set_loop_alignment();2400}2401cfg.fixup_flow();2402}24032404// Apply peephole optimizations2405if( OptoPeephole ) {2406NOT_PRODUCT( TracePhase t2("peephole", &_t_peephole, TimeCompiler); )2407PhasePeephole peep( _regalloc, cfg);2408peep.do_transform();2409}24102411// Do late expand if CPU requires this.2412if (Matcher::require_postalloc_expand) {2413NOT_PRODUCT(TracePhase t2c("postalloc_expand", &_t_postalloc_expand, true));2414cfg.postalloc_expand(_regalloc);2415}24162417// Convert Nodes to instruction bits in a buffer2418{2419// %%%% workspace merge brought two timers together for one job2420TracePhase t2a("output", &_t_output, true);2421NOT_PRODUCT( TraceTime t2b(NULL, &_t_codeGeneration, TimeCompiler, false); )2422Output();2423}24242425print_method(PHASE_FINAL_CODE);24262427// He's dead, Jim.2428_cfg = (PhaseCFG*)((intptr_t)0xdeadbeef);2429_regalloc = (PhaseChaitin*)((intptr_t)0xdeadbeef);2430}243124322433//------------------------------dump_asm---------------------------------------2434// Dump formatted assembly2435#ifndef PRODUCT2436void Compile::dump_asm(int *pcs, uint pc_limit) {2437bool cut_short = false;2438tty->print_cr("#");2439tty->print("# "); _tf->dump(); tty->cr();2440tty->print_cr("#");24412442// For all blocks2443int pc = 0x0; // Program counter2444char starts_bundle = ' ';2445_regalloc->dump_frame();24462447Node *n = NULL;2448for (uint i = 0; i < _cfg->number_of_blocks(); i++) {2449if (VMThread::should_terminate()) {2450cut_short = true;2451break;2452}2453Block* block = _cfg->get_block(i);2454if (block->is_connector() && !Verbose) {2455continue;2456}2457n = block->head();2458if (pcs && n->_idx < pc_limit) {2459tty->print("%3.3x ", pcs[n->_idx]);2460} else {2461tty->print(" ");2462}2463block->dump_head(_cfg);2464if (block->is_connector()) {2465tty->print_cr(" # Empty connector block");2466} else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {2467tty->print_cr(" # Block is sole successor of call");2468}24692470// For all instructions2471Node *delay = NULL;2472for (uint j = 0; j < block->number_of_nodes(); j++) {2473if (VMThread::should_terminate()) {2474cut_short = true;2475break;2476}2477n = block->get_node(j);2478if (valid_bundle_info(n)) {2479Bundle* bundle = node_bundling(n);2480if (bundle->used_in_unconditional_delay()) {2481delay = n;2482continue;2483}2484if (bundle->starts_bundle()) {2485starts_bundle = '+';2486}2487}24882489if (WizardMode) {2490n->dump();2491}24922493if( !n->is_Region() && // Dont print in the Assembly2494!n->is_Phi() && // a few noisely useless nodes2495!n->is_Proj() &&2496!n->is_MachTemp() &&2497!n->is_SafePointScalarObject() &&2498!n->is_Catch() && // Would be nice to print exception table targets2499!n->is_MergeMem() && // Not very interesting2500!n->is_top() && // Debug info table constants2501!(n->is_Con() && !n->is_Mach())// Debug info table constants2502) {2503if (pcs && n->_idx < pc_limit)2504tty->print("%3.3x", pcs[n->_idx]);2505else2506tty->print(" ");2507tty->print(" %c ", starts_bundle);2508starts_bundle = ' ';2509tty->print("\t");2510n->format(_regalloc, tty);2511tty->cr();2512}25132514// If we have an instruction with a delay slot, and have seen a delay,2515// then back up and print it2516if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {2517assert(delay != NULL, "no unconditional delay instruction");2518if (WizardMode) delay->dump();25192520if (node_bundling(delay)->starts_bundle())2521starts_bundle = '+';2522if (pcs && n->_idx < pc_limit)2523tty->print("%3.3x", pcs[n->_idx]);2524else2525tty->print(" ");2526tty->print(" %c ", starts_bundle);2527starts_bundle = ' ';2528tty->print("\t");2529delay->format(_regalloc, tty);2530tty->cr();2531delay = NULL;2532}25332534// Dump the exception table as well2535if( n->is_Catch() && (Verbose || WizardMode) ) {2536// Print the exception table for this offset2537_handler_table.print_subtable_for(pc);2538}2539}25402541if (pcs && n->_idx < pc_limit)2542tty->print_cr("%3.3x", pcs[n->_idx]);2543else2544tty->cr();25452546assert(cut_short || delay == NULL, "no unconditional delay branch");25472548} // End of per-block dump2549tty->cr();25502551if (cut_short) tty->print_cr("*** disassembly is cut short ***");2552}2553#endif25542555//------------------------------Final_Reshape_Counts---------------------------2556// This class defines counters to help identify when a method2557// may/must be executed using hardware with only 24-bit precision.2558struct Final_Reshape_Counts : public StackObj {2559int _call_count; // count non-inlined 'common' calls2560int _float_count; // count float ops requiring 24-bit precision2561int _double_count; // count double ops requiring more precision2562int _java_call_count; // count non-inlined 'java' calls2563int _inner_loop_count; // count loops which need alignment2564VectorSet _visited; // Visitation flags2565Node_List _tests; // Set of IfNodes & PCTableNodes25662567Final_Reshape_Counts() :2568_call_count(0), _float_count(0), _double_count(0),2569_java_call_count(0), _inner_loop_count(0),2570_visited( Thread::current()->resource_area() ) { }25712572void inc_call_count () { _call_count ++; }2573void inc_float_count () { _float_count ++; }2574void inc_double_count() { _double_count++; }2575void inc_java_call_count() { _java_call_count++; }2576void inc_inner_loop_count() { _inner_loop_count++; }25772578int get_call_count () const { return _call_count ; }2579int get_float_count () const { return _float_count ; }2580int get_double_count() const { return _double_count; }2581int get_java_call_count() const { return _java_call_count; }2582int get_inner_loop_count() const { return _inner_loop_count; }2583};25842585#ifdef ASSERT2586static bool oop_offset_is_sane(const TypeInstPtr* tp) {2587ciInstanceKlass *k = tp->klass()->as_instance_klass();2588// Make sure the offset goes inside the instance layout.2589return k->contains_field_offset(tp->offset());2590// Note that OffsetBot and OffsetTop are very negative.2591}2592#endif25932594// Eliminate trivially redundant StoreCMs and accumulate their2595// precedence edges.2596void Compile::eliminate_redundant_card_marks(Node* n) {2597assert(n->Opcode() == Op_StoreCM, "expected StoreCM");2598if (n->in(MemNode::Address)->outcnt() > 1) {2599// There are multiple users of the same address so it might be2600// possible to eliminate some of the StoreCMs2601Node* mem = n->in(MemNode::Memory);2602Node* adr = n->in(MemNode::Address);2603Node* val = n->in(MemNode::ValueIn);2604Node* prev = n;2605bool done = false;2606// Walk the chain of StoreCMs eliminating ones that match. As2607// long as it's a chain of single users then the optimization is2608// safe. Eliminating partially redundant StoreCMs would require2609// cloning copies down the other paths.2610while (mem->Opcode() == Op_StoreCM && mem->outcnt() == 1 && !done) {2611if (adr == mem->in(MemNode::Address) &&2612val == mem->in(MemNode::ValueIn)) {2613// redundant StoreCM2614if (mem->req() > MemNode::OopStore) {2615// Hasn't been processed by this code yet.2616n->add_prec(mem->in(MemNode::OopStore));2617} else {2618// Already converted to precedence edge2619for (uint i = mem->req(); i < mem->len(); i++) {2620// Accumulate any precedence edges2621if (mem->in(i) != NULL) {2622n->add_prec(mem->in(i));2623}2624}2625// Everything above this point has been processed.2626done = true;2627}2628// Eliminate the previous StoreCM2629prev->set_req(MemNode::Memory, mem->in(MemNode::Memory));2630assert(mem->outcnt() == 0, "should be dead");2631mem->disconnect_inputs(NULL, this);2632} else {2633prev = mem;2634}2635mem = prev->in(MemNode::Memory);2636}2637}2638}26392640//------------------------------final_graph_reshaping_impl----------------------2641// Implement items 1-5 from final_graph_reshaping below.2642void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {26432644if ( n->outcnt() == 0 ) return; // dead node2645uint nop = n->Opcode();26462647// Check for 2-input instruction with "last use" on right input.2648// Swap to left input. Implements item (2).2649if( n->req() == 3 && // two-input instruction2650n->in(1)->outcnt() > 1 && // left use is NOT a last use2651(!n->in(1)->is_Phi() || n->in(1)->in(2) != n) && // it is not data loop2652n->in(2)->outcnt() == 1 &&// right use IS a last use2653!n->in(2)->is_Con() ) { // right use is not a constant2654// Check for commutative opcode2655switch( nop ) {2656case Op_AddI: case Op_AddF: case Op_AddD: case Op_AddL:2657case Op_MaxI: case Op_MinI:2658case Op_MulI: case Op_MulF: case Op_MulD: case Op_MulL:2659case Op_AndL: case Op_XorL: case Op_OrL:2660case Op_AndI: case Op_XorI: case Op_OrI: {2661// Move "last use" input to left by swapping inputs2662n->swap_edges(1, 2);2663break;2664}2665default:2666break;2667}2668}26692670#ifdef ASSERT2671if( n->is_Mem() ) {2672int alias_idx = get_alias_index(n->as_Mem()->adr_type());2673assert( n->in(0) != NULL || alias_idx != Compile::AliasIdxRaw ||2674// oop will be recorded in oop map if load crosses safepoint2675n->is_Load() && (n->as_Load()->bottom_type()->isa_oopptr() ||2676LoadNode::is_immutable_value(n->in(MemNode::Address))),2677"raw memory operations should have control edge");2678}2679if (n->is_MemBar()) {2680MemBarNode* mb = n->as_MemBar();2681if (mb->trailing_store() || mb->trailing_load_store()) {2682assert(mb->leading_membar()->trailing_membar() == mb, "bad membar pair");2683Node* mem = mb->in(MemBarNode::Precedent);2684assert((mb->trailing_store() && mem->is_Store() && mem->as_Store()->is_release()) ||2685(mb->trailing_load_store() && mem->is_LoadStore()), "missing mem op");2686} else if (mb->leading()) {2687assert(mb->trailing_membar()->leading_membar() == mb, "bad membar pair");2688}2689}2690#endif2691// Count FPU ops and common calls, implements item (3)2692switch( nop ) {2693// Count all float operations that may use FPU2694case Op_AddF:2695case Op_SubF:2696case Op_MulF:2697case Op_DivF:2698case Op_NegF:2699case Op_ModF:2700case Op_ConvI2F:2701case Op_ConF:2702case Op_CmpF:2703case Op_CmpF3:2704// case Op_ConvL2F: // longs are split into 32-bit halves2705frc.inc_float_count();2706break;27072708case Op_ConvF2D:2709case Op_ConvD2F:2710frc.inc_float_count();2711frc.inc_double_count();2712break;27132714// Count all double operations that may use FPU2715case Op_AddD:2716case Op_SubD:2717case Op_MulD:2718case Op_DivD:2719case Op_NegD:2720case Op_ModD:2721case Op_ConvI2D:2722case Op_ConvD2I:2723// case Op_ConvL2D: // handled by leaf call2724// case Op_ConvD2L: // handled by leaf call2725case Op_ConD:2726case Op_CmpD:2727case Op_CmpD3:2728frc.inc_double_count();2729break;2730case Op_Opaque1: // Remove Opaque Nodes before matching2731case Op_Opaque2: // Remove Opaque Nodes before matching2732case Op_Opaque3:2733n->subsume_by(n->in(1), this);2734break;2735case Op_CallStaticJava:2736case Op_CallJava:2737case Op_CallDynamicJava:2738frc.inc_java_call_count(); // Count java call site;2739case Op_CallRuntime:2740case Op_CallLeaf:2741case Op_CallLeafNoFP: {2742assert( n->is_Call(), "" );2743CallNode *call = n->as_Call();2744// Count call sites where the FP mode bit would have to be flipped.2745// Do not count uncommon runtime calls:2746// uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,2747// _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...2748if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {2749frc.inc_call_count(); // Count the call site2750} else { // See if uncommon argument is shared2751Node *n = call->in(TypeFunc::Parms);2752int nop = n->Opcode();2753// Clone shared simple arguments to uncommon calls, item (1).2754if( n->outcnt() > 1 &&2755!n->is_Proj() &&2756nop != Op_CreateEx &&2757nop != Op_CheckCastPP &&2758nop != Op_DecodeN &&2759nop != Op_DecodeNKlass &&2760!n->is_Mem() ) {2761Node *x = n->clone();2762call->set_req( TypeFunc::Parms, x );2763}2764}2765break;2766}27672768case Op_StoreD:2769case Op_LoadD:2770case Op_LoadD_unaligned:2771frc.inc_double_count();2772goto handle_mem;2773case Op_StoreF:2774case Op_LoadF:2775frc.inc_float_count();2776goto handle_mem;27772778case Op_StoreCM:2779{2780// Convert OopStore dependence into precedence edge2781Node* prec = n->in(MemNode::OopStore);2782n->del_req(MemNode::OopStore);2783n->add_prec(prec);2784eliminate_redundant_card_marks(n);2785}27862787// fall through27882789case Op_StoreB:2790case Op_StoreC:2791case Op_StorePConditional:2792case Op_StoreI:2793case Op_StoreL:2794case Op_StoreIConditional:2795case Op_StoreLConditional:2796case Op_CompareAndSwapI:2797case Op_CompareAndSwapL:2798case Op_CompareAndSwapP:2799case Op_CompareAndSwapN:2800case Op_GetAndAddI:2801case Op_GetAndAddL:2802case Op_GetAndSetI:2803case Op_GetAndSetL:2804case Op_GetAndSetP:2805case Op_GetAndSetN:2806case Op_StoreP:2807case Op_StoreN:2808case Op_StoreNKlass:2809case Op_LoadB:2810case Op_LoadUB:2811case Op_LoadUS:2812case Op_LoadI:2813case Op_LoadKlass:2814case Op_LoadNKlass:2815case Op_LoadL:2816case Op_LoadL_unaligned:2817case Op_LoadPLocked:2818case Op_LoadP:2819case Op_LoadN:2820case Op_LoadRange:2821case Op_LoadS: {2822handle_mem:2823#ifdef ASSERT2824if( VerifyOptoOopOffsets ) {2825assert( n->is_Mem(), "" );2826MemNode *mem = (MemNode*)n;2827// Check to see if address types have grounded out somehow.2828const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr();2829assert( !tp || oop_offset_is_sane(tp), "" );2830}2831#endif2832break;2833}28342835case Op_AddP: { // Assert sane base pointers2836Node *addp = n->in(AddPNode::Address);2837assert( !addp->is_AddP() ||2838addp->in(AddPNode::Base)->is_top() || // Top OK for allocation2839addp->in(AddPNode::Base) == n->in(AddPNode::Base),2840"Base pointers must match" );2841#ifdef _LP642842if ((UseCompressedOops || UseCompressedClassPointers) &&2843addp->Opcode() == Op_ConP &&2844addp == n->in(AddPNode::Base) &&2845n->in(AddPNode::Offset)->is_Con()) {2846// Use addressing with narrow klass to load with offset on x86.2847// On sparc loading 32-bits constant and decoding it have less2848// instructions (4) then load 64-bits constant (7).2849// Do this transformation here since IGVN will convert ConN back to ConP.2850const Type* t = addp->bottom_type();2851if (t->isa_oopptr() || t->isa_klassptr()) {2852Node* nn = NULL;28532854int op = t->isa_oopptr() ? Op_ConN : Op_ConNKlass;28552856// Look for existing ConN node of the same exact type.2857Node* r = root();2858uint cnt = r->outcnt();2859for (uint i = 0; i < cnt; i++) {2860Node* m = r->raw_out(i);2861if (m!= NULL && m->Opcode() == op &&2862m->bottom_type()->make_ptr() == t) {2863nn = m;2864break;2865}2866}2867if (nn != NULL) {2868// Decode a narrow oop to match address2869// [R12 + narrow_oop_reg<<3 + offset]2870if (t->isa_oopptr()) {2871nn = new (this) DecodeNNode(nn, t);2872} else {2873nn = new (this) DecodeNKlassNode(nn, t);2874}2875n->set_req(AddPNode::Base, nn);2876n->set_req(AddPNode::Address, nn);2877if (addp->outcnt() == 0) {2878addp->disconnect_inputs(NULL, this);2879}2880}2881}2882}2883#endif2884break;2885}28862887#ifdef _LP642888case Op_CastPP:2889if (n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) {2890Node* in1 = n->in(1);2891const Type* t = n->bottom_type();2892Node* new_in1 = in1->clone();2893new_in1->as_DecodeN()->set_type(t);28942895if (!Matcher::narrow_oop_use_complex_address()) {2896//2897// x86, ARM and friends can handle 2 adds in addressing mode2898// and Matcher can fold a DecodeN node into address by using2899// a narrow oop directly and do implicit NULL check in address:2900//2901// [R12 + narrow_oop_reg<<3 + offset]2902// NullCheck narrow_oop_reg2903//2904// On other platforms (Sparc) we have to keep new DecodeN node and2905// use it to do implicit NULL check in address:2906//2907// decode_not_null narrow_oop_reg, base_reg2908// [base_reg + offset]2909// NullCheck base_reg2910//2911// Pin the new DecodeN node to non-null path on these platform (Sparc)2912// to keep the information to which NULL check the new DecodeN node2913// corresponds to use it as value in implicit_null_check().2914//2915new_in1->set_req(0, n->in(0));2916}29172918n->subsume_by(new_in1, this);2919if (in1->outcnt() == 0) {2920in1->disconnect_inputs(NULL, this);2921}2922}2923break;29242925case Op_CmpP:2926// Do this transformation here to preserve CmpPNode::sub() and2927// other TypePtr related Ideal optimizations (for example, ptr nullness).2928if (n->in(1)->is_DecodeNarrowPtr() || n->in(2)->is_DecodeNarrowPtr()) {2929Node* in1 = n->in(1);2930Node* in2 = n->in(2);2931if (!in1->is_DecodeNarrowPtr()) {2932in2 = in1;2933in1 = n->in(2);2934}2935assert(in1->is_DecodeNarrowPtr(), "sanity");29362937Node* new_in2 = NULL;2938if (in2->is_DecodeNarrowPtr()) {2939assert(in2->Opcode() == in1->Opcode(), "must be same node type");2940new_in2 = in2->in(1);2941} else if (in2->Opcode() == Op_ConP) {2942const Type* t = in2->bottom_type();2943if (t == TypePtr::NULL_PTR) {2944assert(in1->is_DecodeN(), "compare klass to null?");2945// Don't convert CmpP null check into CmpN if compressed2946// oops implicit null check is not generated.2947// This will allow to generate normal oop implicit null check.2948if (Matcher::gen_narrow_oop_implicit_null_checks())2949new_in2 = ConNode::make(this, TypeNarrowOop::NULL_PTR);2950//2951// This transformation together with CastPP transformation above2952// will generated code for implicit NULL checks for compressed oops.2953//2954// The original code after Optimize()2955//2956// LoadN memory, narrow_oop_reg2957// decode narrow_oop_reg, base_reg2958// CmpP base_reg, NULL2959// CastPP base_reg // NotNull2960// Load [base_reg + offset], val_reg2961//2962// after these transformations will be2963//2964// LoadN memory, narrow_oop_reg2965// CmpN narrow_oop_reg, NULL2966// decode_not_null narrow_oop_reg, base_reg2967// Load [base_reg + offset], val_reg2968//2969// and the uncommon path (== NULL) will use narrow_oop_reg directly2970// since narrow oops can be used in debug info now (see the code in2971// final_graph_reshaping_walk()).2972//2973// At the end the code will be matched to2974// on x86:2975//2976// Load_narrow_oop memory, narrow_oop_reg2977// Load [R12 + narrow_oop_reg<<3 + offset], val_reg2978// NullCheck narrow_oop_reg2979//2980// and on sparc:2981//2982// Load_narrow_oop memory, narrow_oop_reg2983// decode_not_null narrow_oop_reg, base_reg2984// Load [base_reg + offset], val_reg2985// NullCheck base_reg2986//2987} else if (t->isa_oopptr()) {2988new_in2 = ConNode::make(this, t->make_narrowoop());2989} else if (t->isa_klassptr()) {2990new_in2 = ConNode::make(this, t->make_narrowklass());2991}2992}2993if (new_in2 != NULL) {2994Node* cmpN = new (this) CmpNNode(in1->in(1), new_in2);2995n->subsume_by(cmpN, this);2996if (in1->outcnt() == 0) {2997in1->disconnect_inputs(NULL, this);2998}2999if (in2->outcnt() == 0) {3000in2->disconnect_inputs(NULL, this);3001}3002}3003}3004break;30053006case Op_DecodeN:3007case Op_DecodeNKlass:3008assert(!n->in(1)->is_EncodeNarrowPtr(), "should be optimized out");3009// DecodeN could be pinned when it can't be fold into3010// an address expression, see the code for Op_CastPP above.3011assert(n->in(0) == NULL || (UseCompressedOops && !Matcher::narrow_oop_use_complex_address()), "no control");3012break;30133014case Op_EncodeP:3015case Op_EncodePKlass: {3016Node* in1 = n->in(1);3017if (in1->is_DecodeNarrowPtr()) {3018n->subsume_by(in1->in(1), this);3019} else if (in1->Opcode() == Op_ConP) {3020const Type* t = in1->bottom_type();3021if (t == TypePtr::NULL_PTR) {3022assert(t->isa_oopptr(), "null klass?");3023n->subsume_by(ConNode::make(this, TypeNarrowOop::NULL_PTR), this);3024} else if (t->isa_oopptr()) {3025n->subsume_by(ConNode::make(this, t->make_narrowoop()), this);3026} else if (t->isa_klassptr()) {3027n->subsume_by(ConNode::make(this, t->make_narrowklass()), this);3028}3029}3030if (in1->outcnt() == 0) {3031in1->disconnect_inputs(NULL, this);3032}3033break;3034}30353036case Op_Proj: {3037if (OptimizeStringConcat) {3038ProjNode* p = n->as_Proj();3039if (p->_is_io_use) {3040// Separate projections were used for the exception path which3041// are normally removed by a late inline. If it wasn't inlined3042// then they will hang around and should just be replaced with3043// the original one.3044Node* proj = NULL;3045// Replace with just one3046for (SimpleDUIterator i(p->in(0)); i.has_next(); i.next()) {3047Node *use = i.get();3048if (use->is_Proj() && p != use && use->as_Proj()->_con == p->_con) {3049proj = use;3050break;3051}3052}3053assert(proj != NULL || p->_con == TypeFunc::I_O, "io may be dropped at an infinite loop");3054if (proj != NULL) {3055p->subsume_by(proj, this);3056}3057}3058}3059break;3060}30613062case Op_Phi:3063if (n->as_Phi()->bottom_type()->isa_narrowoop() || n->as_Phi()->bottom_type()->isa_narrowklass()) {3064// The EncodeP optimization may create Phi with the same edges3065// for all paths. It is not handled well by Register Allocator.3066Node* unique_in = n->in(1);3067assert(unique_in != NULL, "");3068uint cnt = n->req();3069for (uint i = 2; i < cnt; i++) {3070Node* m = n->in(i);3071assert(m != NULL, "");3072if (unique_in != m)3073unique_in = NULL;3074}3075if (unique_in != NULL) {3076n->subsume_by(unique_in, this);3077}3078}3079break;30803081#endif30823083#ifdef ASSERT3084case Op_CastII:3085// Verify that all range check dependent CastII nodes were removed.3086if (n->isa_CastII()->has_range_check()) {3087n->dump(3);3088assert(false, "Range check dependent CastII node was not removed");3089}3090break;3091#endif30923093case Op_ModI:3094if (UseDivMod) {3095// Check if a%b and a/b both exist3096Node* d = n->find_similar(Op_DivI);3097if (d) {3098// Replace them with a fused divmod if supported3099if (Matcher::has_match_rule(Op_DivModI)) {3100DivModINode* divmod = DivModINode::make(this, n);3101d->subsume_by(divmod->div_proj(), this);3102n->subsume_by(divmod->mod_proj(), this);3103} else {3104// replace a%b with a-((a/b)*b)3105Node* mult = new (this) MulINode(d, d->in(2));3106Node* sub = new (this) SubINode(d->in(1), mult);3107n->subsume_by(sub, this);3108}3109}3110}3111break;31123113case Op_ModL:3114if (UseDivMod) {3115// Check if a%b and a/b both exist3116Node* d = n->find_similar(Op_DivL);3117if (d) {3118// Replace them with a fused divmod if supported3119if (Matcher::has_match_rule(Op_DivModL)) {3120DivModLNode* divmod = DivModLNode::make(this, n);3121d->subsume_by(divmod->div_proj(), this);3122n->subsume_by(divmod->mod_proj(), this);3123} else {3124// replace a%b with a-((a/b)*b)3125Node* mult = new (this) MulLNode(d, d->in(2));3126Node* sub = new (this) SubLNode(d->in(1), mult);3127n->subsume_by(sub, this);3128}3129}3130}3131break;31323133case Op_LoadVector:3134case Op_StoreVector:3135break;31363137case Op_PackB:3138case Op_PackS:3139case Op_PackI:3140case Op_PackF:3141case Op_PackL:3142case Op_PackD:3143if (n->req()-1 > 2) {3144// Replace many operand PackNodes with a binary tree for matching3145PackNode* p = (PackNode*) n;3146Node* btp = p->binary_tree_pack(this, 1, n->req());3147n->subsume_by(btp, this);3148}3149break;3150case Op_Loop:3151case Op_CountedLoop:3152if (n->as_Loop()->is_inner_loop()) {3153frc.inc_inner_loop_count();3154}3155break;3156case Op_LShiftI:3157case Op_RShiftI:3158case Op_URShiftI:3159case Op_LShiftL:3160case Op_RShiftL:3161case Op_URShiftL:3162if (Matcher::need_masked_shift_count) {3163// The cpu's shift instructions don't restrict the count to the3164// lower 5/6 bits. We need to do the masking ourselves.3165Node* in2 = n->in(2);3166juint mask = (n->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);3167const TypeInt* t = in2->find_int_type();3168if (t != NULL && t->is_con()) {3169juint shift = t->get_con();3170if (shift > mask) { // Unsigned cmp3171n->set_req(2, ConNode::make(this, TypeInt::make(shift & mask)));3172}3173} else {3174if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {3175Node* shift = new (this) AndINode(in2, ConNode::make(this, TypeInt::make(mask)));3176n->set_req(2, shift);3177}3178}3179if (in2->outcnt() == 0) { // Remove dead node3180in2->disconnect_inputs(NULL, this);3181}3182}3183break;3184case Op_MemBarStoreStore:3185case Op_MemBarRelease:3186// Break the link with AllocateNode: it is no longer useful and3187// confuses register allocation.3188if (n->req() > MemBarNode::Precedent) {3189n->set_req(MemBarNode::Precedent, top());3190}3191break;3192default:3193assert( !n->is_Call(), "" );3194assert( !n->is_Mem(), "" );3195assert( nop != Op_ProfileBoolean, "should be eliminated during IGVN");3196break;3197}31983199// Collect CFG split points3200if (n->is_MultiBranch())3201frc._tests.push(n);3202}32033204//------------------------------final_graph_reshaping_walk---------------------3205// Replacing Opaque nodes with their input in final_graph_reshaping_impl(),3206// requires that the walk visits a node's inputs before visiting the node.3207void Compile::final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) {3208ResourceArea *area = Thread::current()->resource_area();3209Unique_Node_List sfpt(area);32103211frc._visited.set(root->_idx); // first, mark node as visited3212uint cnt = root->req();3213Node *n = root;3214uint i = 0;3215while (true) {3216if (i < cnt) {3217// Place all non-visited non-null inputs onto stack3218Node* m = n->in(i);3219++i;3220if (m != NULL && !frc._visited.test_set(m->_idx)) {3221if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) {3222// compute worst case interpreter size in case of a deoptimization3223update_interpreter_frame_size(m->as_SafePoint()->jvms()->interpreter_frame_size());32243225sfpt.push(m);3226}3227cnt = m->req();3228nstack.push(n, i); // put on stack parent and next input's index3229n = m;3230i = 0;3231}3232} else {3233// Now do post-visit work3234final_graph_reshaping_impl( n, frc );3235if (nstack.is_empty())3236break; // finished3237n = nstack.node(); // Get node from stack3238cnt = n->req();3239i = nstack.index();3240nstack.pop(); // Shift to the next node on stack3241}3242}32433244// Skip next transformation if compressed oops are not used.3245if ((UseCompressedOops && !Matcher::gen_narrow_oop_implicit_null_checks()) ||3246(!UseCompressedOops && !UseCompressedClassPointers))3247return;32483249// Go over safepoints nodes to skip DecodeN/DecodeNKlass nodes for debug edges.3250// It could be done for an uncommon traps or any safepoints/calls3251// if the DecodeN/DecodeNKlass node is referenced only in a debug info.3252while (sfpt.size() > 0) {3253n = sfpt.pop();3254JVMState *jvms = n->as_SafePoint()->jvms();3255assert(jvms != NULL, "sanity");3256int start = jvms->debug_start();3257int end = n->req();3258bool is_uncommon = (n->is_CallStaticJava() &&3259n->as_CallStaticJava()->uncommon_trap_request() != 0);3260for (int j = start; j < end; j++) {3261Node* in = n->in(j);3262if (in->is_DecodeNarrowPtr()) {3263bool safe_to_skip = true;3264if (!is_uncommon ) {3265// Is it safe to skip?3266for (uint i = 0; i < in->outcnt(); i++) {3267Node* u = in->raw_out(i);3268if (!u->is_SafePoint() ||3269u->is_Call() && u->as_Call()->has_non_debug_use(n)) {3270safe_to_skip = false;3271}3272}3273}3274if (safe_to_skip) {3275n->set_req(j, in->in(1));3276}3277if (in->outcnt() == 0) {3278in->disconnect_inputs(NULL, this);3279}3280}3281}3282}3283}32843285//------------------------------final_graph_reshaping--------------------------3286// Final Graph Reshaping.3287//3288// (1) Clone simple inputs to uncommon calls, so they can be scheduled late3289// and not commoned up and forced early. Must come after regular3290// optimizations to avoid GVN undoing the cloning. Clone constant3291// inputs to Loop Phis; these will be split by the allocator anyways.3292// Remove Opaque nodes.3293// (2) Move last-uses by commutative operations to the left input to encourage3294// Intel update-in-place two-address operations and better register usage3295// on RISCs. Must come after regular optimizations to avoid GVN Ideal3296// calls canonicalizing them back.3297// (3) Count the number of double-precision FP ops, single-precision FP ops3298// and call sites. On Intel, we can get correct rounding either by3299// forcing singles to memory (requires extra stores and loads after each3300// FP bytecode) or we can set a rounding mode bit (requires setting and3301// clearing the mode bit around call sites). The mode bit is only used3302// if the relative frequency of single FP ops to calls is low enough.3303// This is a key transform for SPEC mpeg_audio.3304// (4) Detect infinite loops; blobs of code reachable from above but not3305// below. Several of the Code_Gen algorithms fail on such code shapes,3306// so we simply bail out. Happens a lot in ZKM.jar, but also happens3307// from time to time in other codes (such as -Xcomp finalizer loops, etc).3308// Detection is by looking for IfNodes where only 1 projection is3309// reachable from below or CatchNodes missing some targets.3310// (5) Assert for insane oop offsets in debug mode.33113312bool Compile::final_graph_reshaping() {3313// an infinite loop may have been eliminated by the optimizer,3314// in which case the graph will be empty.3315if (root()->req() == 1) {3316record_method_not_compilable("trivial infinite loop");3317return true;3318}33193320// Expensive nodes have their control input set to prevent the GVN3321// from freely commoning them. There's no GVN beyond this point so3322// no need to keep the control input. We want the expensive nodes to3323// be freely moved to the least frequent code path by gcm.3324assert(OptimizeExpensiveOps || expensive_count() == 0, "optimization off but list non empty?");3325for (int i = 0; i < expensive_count(); i++) {3326_expensive_nodes->at(i)->set_req(0, NULL);3327}33283329Final_Reshape_Counts frc;33303331// Visit everybody reachable!3332// Allocate stack of size C->live_nodes()/2 to avoid frequent realloc3333Node_Stack nstack(live_nodes() >> 1);3334final_graph_reshaping_walk(nstack, root(), frc);33353336// Check for unreachable (from below) code (i.e., infinite loops).3337for( uint i = 0; i < frc._tests.size(); i++ ) {3338MultiBranchNode *n = frc._tests[i]->as_MultiBranch();3339// Get number of CFG targets.3340// Note that PCTables include exception targets after calls.3341uint required_outcnt = n->required_outcnt();3342if (n->outcnt() != required_outcnt) {3343// Check for a few special cases. Rethrow Nodes never take the3344// 'fall-thru' path, so expected kids is 1 less.3345if (n->is_PCTable() && n->in(0) && n->in(0)->in(0)) {3346if (n->in(0)->in(0)->is_Call()) {3347CallNode *call = n->in(0)->in(0)->as_Call();3348if (call->entry_point() == OptoRuntime::rethrow_stub()) {3349required_outcnt--; // Rethrow always has 1 less kid3350} else if (call->req() > TypeFunc::Parms &&3351call->is_CallDynamicJava()) {3352// Check for null receiver. In such case, the optimizer has3353// detected that the virtual call will always result in a null3354// pointer exception. The fall-through projection of this CatchNode3355// will not be populated.3356Node *arg0 = call->in(TypeFunc::Parms);3357if (arg0->is_Type() &&3358arg0->as_Type()->type()->higher_equal(TypePtr::NULL_PTR)) {3359required_outcnt--;3360}3361} else if (call->entry_point() == OptoRuntime::new_array_Java() &&3362call->req() > TypeFunc::Parms+1 &&3363call->is_CallStaticJava()) {3364// Check for negative array length. In such case, the optimizer has3365// detected that the allocation attempt will always result in an3366// exception. There is no fall-through projection of this CatchNode .3367Node *arg1 = call->in(TypeFunc::Parms+1);3368if (arg1->is_Type() &&3369arg1->as_Type()->type()->join(TypeInt::POS)->empty()) {3370required_outcnt--;3371}3372}3373}3374}3375// Recheck with a better notion of 'required_outcnt'3376if (n->outcnt() != required_outcnt) {3377record_method_not_compilable("malformed control flow");3378return true; // Not all targets reachable!3379}3380}3381// Check that I actually visited all kids. Unreached kids3382// must be infinite loops.3383for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++)3384if (!frc._visited.test(n->fast_out(j)->_idx)) {3385record_method_not_compilable("infinite loop");3386return true; // Found unvisited kid; must be unreach3387}3388}33893390// If original bytecodes contained a mixture of floats and doubles3391// check if the optimizer has made it homogenous, item (3).3392if( Use24BitFPMode && Use24BitFP && UseSSE == 0 &&3393frc.get_float_count() > 32 &&3394frc.get_double_count() == 0 &&3395(10 * frc.get_call_count() < frc.get_float_count()) ) {3396set_24_bit_selection_and_mode( false, true );3397}33983399set_java_calls(frc.get_java_call_count());3400set_inner_loops(frc.get_inner_loop_count());34013402// No infinite loops, no reason to bail out.3403return false;3404}34053406//-----------------------------too_many_traps----------------------------------3407// Report if there are too many traps at the current method and bci.3408// Return true if there was a trap, and/or PerMethodTrapLimit is exceeded.3409bool Compile::too_many_traps(ciMethod* method,3410int bci,3411Deoptimization::DeoptReason reason) {3412ciMethodData* md = method->method_data();3413if (md->is_empty()) {3414// Assume the trap has not occurred, or that it occurred only3415// because of a transient condition during start-up in the interpreter.3416return false;3417}3418ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;3419if (md->has_trap_at(bci, m, reason) != 0) {3420// Assume PerBytecodeTrapLimit==0, for a more conservative heuristic.3421// Also, if there are multiple reasons, or if there is no per-BCI record,3422// assume the worst.3423if (log())3424log()->elem("observe trap='%s' count='%d'",3425Deoptimization::trap_reason_name(reason),3426md->trap_count(reason));3427return true;3428} else {3429// Ignore method/bci and see if there have been too many globally.3430return too_many_traps(reason, md);3431}3432}34333434// Less-accurate variant which does not require a method and bci.3435bool Compile::too_many_traps(Deoptimization::DeoptReason reason,3436ciMethodData* logmd) {3437if (trap_count(reason) >= Deoptimization::per_method_trap_limit(reason)) {3438// Too many traps globally.3439// Note that we use cumulative trap_count, not just md->trap_count.3440if (log()) {3441int mcount = (logmd == NULL)? -1: (int)logmd->trap_count(reason);3442log()->elem("observe trap='%s' count='0' mcount='%d' ccount='%d'",3443Deoptimization::trap_reason_name(reason),3444mcount, trap_count(reason));3445}3446return true;3447} else {3448// The coast is clear.3449return false;3450}3451}34523453//--------------------------too_many_recompiles--------------------------------3454// Report if there are too many recompiles at the current method and bci.3455// Consults PerBytecodeRecompilationCutoff and PerMethodRecompilationCutoff.3456// Is not eager to return true, since this will cause the compiler to use3457// Action_none for a trap point, to avoid too many recompilations.3458bool Compile::too_many_recompiles(ciMethod* method,3459int bci,3460Deoptimization::DeoptReason reason) {3461ciMethodData* md = method->method_data();3462if (md->is_empty()) {3463// Assume the trap has not occurred, or that it occurred only3464// because of a transient condition during start-up in the interpreter.3465return false;3466}3467// Pick a cutoff point well within PerBytecodeRecompilationCutoff.3468uint bc_cutoff = (uint) PerBytecodeRecompilationCutoff / 8;3469uint m_cutoff = (uint) PerMethodRecompilationCutoff / 2 + 1; // not zero3470Deoptimization::DeoptReason per_bc_reason3471= Deoptimization::reason_recorded_per_bytecode_if_any(reason);3472ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;3473if ((per_bc_reason == Deoptimization::Reason_none3474|| md->has_trap_at(bci, m, reason) != 0)3475// The trap frequency measure we care about is the recompile count:3476&& md->trap_recompiled_at(bci, m)3477&& md->overflow_recompile_count() >= bc_cutoff) {3478// Do not emit a trap here if it has already caused recompilations.3479// Also, if there are multiple reasons, or if there is no per-BCI record,3480// assume the worst.3481if (log())3482log()->elem("observe trap='%s recompiled' count='%d' recompiles2='%d'",3483Deoptimization::trap_reason_name(reason),3484md->trap_count(reason),3485md->overflow_recompile_count());3486return true;3487} else if (trap_count(reason) != 03488&& decompile_count() >= m_cutoff) {3489// Too many recompiles globally, and we have seen this sort of trap.3490// Use cumulative decompile_count, not just md->decompile_count.3491if (log())3492log()->elem("observe trap='%s' count='%d' mcount='%d' decompiles='%d' mdecompiles='%d'",3493Deoptimization::trap_reason_name(reason),3494md->trap_count(reason), trap_count(reason),3495md->decompile_count(), decompile_count());3496return true;3497} else {3498// The coast is clear.3499return false;3500}3501}35023503// Compute when not to trap. Used by matching trap based nodes and3504// NullCheck optimization.3505void Compile::set_allowed_deopt_reasons() {3506_allowed_reasons = 0;3507if (is_method_compilation()) {3508for (int rs = (int)Deoptimization::Reason_none+1; rs < Compile::trapHistLength; rs++) {3509assert(rs < BitsPerInt, "recode bit map");3510if (!too_many_traps((Deoptimization::DeoptReason) rs)) {3511_allowed_reasons |= nth_bit(rs);3512}3513}3514}3515}35163517#ifndef PRODUCT3518//------------------------------verify_graph_edges---------------------------3519// Walk the Graph and verify that there is a one-to-one correspondence3520// between Use-Def edges and Def-Use edges in the graph.3521void Compile::verify_graph_edges(bool no_dead_code) {3522if (VerifyGraphEdges) {3523ResourceArea *area = Thread::current()->resource_area();3524Unique_Node_List visited(area);3525// Call recursive graph walk to check edges3526_root->verify_edges(visited);3527if (no_dead_code) {3528// Now make sure that no visited node is used by an unvisited node.3529bool dead_nodes = false;3530Unique_Node_List checked(area);3531while (visited.size() > 0) {3532Node* n = visited.pop();3533checked.push(n);3534for (uint i = 0; i < n->outcnt(); i++) {3535Node* use = n->raw_out(i);3536if (checked.member(use)) continue; // already checked3537if (visited.member(use)) continue; // already in the graph3538if (use->is_Con()) continue; // a dead ConNode is OK3539// At this point, we have found a dead node which is DU-reachable.3540if (!dead_nodes) {3541tty->print_cr("*** Dead nodes reachable via DU edges:");3542dead_nodes = true;3543}3544use->dump(2);3545tty->print_cr("---");3546checked.push(use); // No repeats; pretend it is now checked.3547}3548}3549assert(!dead_nodes, "using nodes must be reachable from root");3550}3551}3552}35533554// Verify GC barriers consistency3555// Currently supported:3556// - G1 pre-barriers (see GraphKit::g1_write_barrier_pre())3557void Compile::verify_barriers() {3558if (UseG1GC) {3559// Verify G1 pre-barriers3560const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active());35613562ResourceArea *area = Thread::current()->resource_area();3563Unique_Node_List visited(area);3564Node_List worklist(area);3565// We're going to walk control flow backwards starting from the Root3566worklist.push(_root);3567while (worklist.size() > 0) {3568Node* x = worklist.pop();3569if (x == NULL || x == top()) continue;3570if (visited.member(x)) {3571continue;3572} else {3573visited.push(x);3574}35753576if (x->is_Region()) {3577for (uint i = 1; i < x->req(); i++) {3578worklist.push(x->in(i));3579}3580} else {3581worklist.push(x->in(0));3582// We are looking for the pattern:3583// /->ThreadLocal3584// If->Bool->CmpI->LoadB->AddP->ConL(marking_offset)3585// \->ConI(0)3586// We want to verify that the If and the LoadB have the same control3587// See GraphKit::g1_write_barrier_pre()3588if (x->is_If()) {3589IfNode *iff = x->as_If();3590if (iff->in(1)->is_Bool() && iff->in(1)->in(1)->is_Cmp()) {3591CmpNode *cmp = iff->in(1)->in(1)->as_Cmp();3592if (cmp->Opcode() == Op_CmpI && cmp->in(2)->is_Con() && cmp->in(2)->bottom_type()->is_int()->get_con() == 03593&& cmp->in(1)->is_Load()) {3594LoadNode* load = cmp->in(1)->as_Load();3595if (load->Opcode() == Op_LoadB && load->in(2)->is_AddP() && load->in(2)->in(2)->Opcode() == Op_ThreadLocal3596&& load->in(2)->in(3)->is_Con()3597&& load->in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == marking_offset) {35983599Node* if_ctrl = iff->in(0);3600Node* load_ctrl = load->in(0);36013602if (if_ctrl != load_ctrl) {3603// Skip possible CProj->NeverBranch in infinite loops3604if ((if_ctrl->is_Proj() && if_ctrl->Opcode() == Op_CProj)3605&& (if_ctrl->in(0)->is_MultiBranch() && if_ctrl->in(0)->Opcode() == Op_NeverBranch)) {3606if_ctrl = if_ctrl->in(0)->in(0);3607}3608}3609assert(load_ctrl != NULL && if_ctrl == load_ctrl, "controls must match");3610}3611}3612}3613}3614}3615}3616}3617}36183619#endif36203621// The Compile object keeps track of failure reasons separately from the ciEnv.3622// This is required because there is not quite a 1-1 relation between the3623// ciEnv and its compilation task and the Compile object. Note that one3624// ciEnv might use two Compile objects, if C2Compiler::compile_method decides3625// to backtrack and retry without subsuming loads. Other than this backtracking3626// behavior, the Compile's failure reason is quietly copied up to the ciEnv3627// by the logic in C2Compiler.3628void Compile::record_failure(const char* reason) {3629if (log() != NULL) {3630log()->elem("failure reason='%s' phase='compile'", reason);3631}3632if (_failure_reason == NULL) {3633// Record the first failure reason.3634_failure_reason = reason;3635}36363637if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {3638C->print_method(PHASE_FAILURE);3639}3640_root = NULL; // flush the graph, too3641}36423643Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, bool dolog)3644: TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false),3645_phase_name(name), _dolog(dolog)3646{3647if (dolog) {3648C = Compile::current();3649_log = C->log();3650} else {3651C = NULL;3652_log = NULL;3653}3654if (_log != NULL) {3655_log->begin_head("phase name='%s' nodes='%d' live='%d'", _phase_name, C->unique(), C->live_nodes());3656_log->stamp();3657_log->end_head();3658}3659}36603661Compile::TracePhase::~TracePhase() {36623663C = Compile::current();3664if (_dolog) {3665_log = C->log();3666} else {3667_log = NULL;3668}36693670#ifdef ASSERT3671if (PrintIdealNodeCount) {3672tty->print_cr("phase name='%s' nodes='%d' live='%d' live_graph_walk='%d'",3673_phase_name, C->unique(), C->live_nodes(), C->count_live_nodes_by_graph_walk());3674}36753676if (VerifyIdealNodeCount) {3677Compile::current()->print_missing_nodes();3678}3679#endif36803681if (_log != NULL) {3682_log->done("phase name='%s' nodes='%d' live='%d'", _phase_name, C->unique(), C->live_nodes());3683}3684}36853686//=============================================================================3687// Two Constant's are equal when the type and the value are equal.3688bool Compile::Constant::operator==(const Constant& other) {3689if (type() != other.type() ) return false;3690if (can_be_reused() != other.can_be_reused()) return false;3691// For floating point values we compare the bit pattern.3692switch (type()) {3693case T_FLOAT: return (_v._value.i == other._v._value.i);3694case T_LONG:3695case T_DOUBLE: return (_v._value.j == other._v._value.j);3696case T_OBJECT:3697case T_ADDRESS: return (_v._value.l == other._v._value.l);3698case T_VOID: return (_v._value.l == other._v._value.l); // jump-table entries3699case T_METADATA: return (_v._metadata == other._v._metadata);3700default: ShouldNotReachHere();3701}3702return false;3703}37043705static int type_to_size_in_bytes(BasicType t) {3706switch (t) {3707case T_LONG: return sizeof(jlong );3708case T_FLOAT: return sizeof(jfloat );3709case T_DOUBLE: return sizeof(jdouble);3710case T_METADATA: return sizeof(Metadata*);3711// We use T_VOID as marker for jump-table entries (labels) which3712// need an internal word relocation.3713case T_VOID:3714case T_ADDRESS:3715case T_OBJECT: return sizeof(jobject);3716}37173718ShouldNotReachHere();3719return -1;3720}37213722int Compile::ConstantTable::qsort_comparator(Constant* a, Constant* b) {3723// sort descending3724if (a->freq() > b->freq()) return -1;3725if (a->freq() < b->freq()) return 1;3726return 0;3727}37283729void Compile::ConstantTable::calculate_offsets_and_size() {3730// First, sort the array by frequencies.3731_constants.sort(qsort_comparator);37323733#ifdef ASSERT3734// Make sure all jump-table entries were sorted to the end of the3735// array (they have a negative frequency).3736bool found_void = false;3737for (int i = 0; i < _constants.length(); i++) {3738Constant con = _constants.at(i);3739if (con.type() == T_VOID)3740found_void = true; // jump-tables3741else3742assert(!found_void, "wrong sorting");3743}3744#endif37453746int offset = 0;3747for (int i = 0; i < _constants.length(); i++) {3748Constant* con = _constants.adr_at(i);37493750// Align offset for type.3751int typesize = type_to_size_in_bytes(con->type());3752offset = align_size_up(offset, typesize);3753con->set_offset(offset); // set constant's offset37543755if (con->type() == T_VOID) {3756MachConstantNode* n = (MachConstantNode*) con->get_jobject();3757offset = offset + typesize * n->outcnt(); // expand jump-table3758} else {3759offset = offset + typesize;3760}3761}37623763// Align size up to the next section start (which is insts; see3764// CodeBuffer::align_at_start).3765assert(_size == -1, "already set?");3766_size = align_size_up(offset, CodeEntryAlignment);3767}37683769void Compile::ConstantTable::emit(CodeBuffer& cb) {3770MacroAssembler _masm(&cb);3771for (int i = 0; i < _constants.length(); i++) {3772Constant con = _constants.at(i);3773address constant_addr = NULL;3774switch (con.type()) {3775case T_LONG: constant_addr = _masm.long_constant( con.get_jlong() ); break;3776case T_FLOAT: constant_addr = _masm.float_constant( con.get_jfloat() ); break;3777case T_DOUBLE: constant_addr = _masm.double_constant(con.get_jdouble()); break;3778case T_OBJECT: {3779jobject obj = con.get_jobject();3780int oop_index = _masm.oop_recorder()->find_index(obj);3781constant_addr = _masm.address_constant((address) obj, oop_Relocation::spec(oop_index));3782break;3783}3784case T_ADDRESS: {3785address addr = (address) con.get_jobject();3786constant_addr = _masm.address_constant(addr);3787break;3788}3789// We use T_VOID as marker for jump-table entries (labels) which3790// need an internal word relocation.3791case T_VOID: {3792MachConstantNode* n = (MachConstantNode*) con.get_jobject();3793// Fill the jump-table with a dummy word. The real value is3794// filled in later in fill_jump_table.3795address dummy = (address) n;3796constant_addr = _masm.address_constant(dummy);3797// Expand jump-table3798for (uint i = 1; i < n->outcnt(); i++) {3799address temp_addr = _masm.address_constant(dummy + i);3800assert(temp_addr, "consts section too small");3801}3802break;3803}3804case T_METADATA: {3805Metadata* obj = con.get_metadata();3806int metadata_index = _masm.oop_recorder()->find_index(obj);3807constant_addr = _masm.address_constant((address) obj, metadata_Relocation::spec(metadata_index));3808break;3809}3810default: ShouldNotReachHere();3811}3812assert(constant_addr, "consts section too small");3813assert((constant_addr - _masm.code()->consts()->start()) == con.offset(),3814err_msg_res("must be: %d == %d", (int) (constant_addr - _masm.code()->consts()->start()), (int)(con.offset())));3815}3816}38173818int Compile::ConstantTable::find_offset(Constant& con) const {3819int idx = _constants.find(con);3820assert(idx != -1, "constant must be in constant table");3821int offset = _constants.at(idx).offset();3822assert(offset != -1, "constant table not emitted yet?");3823return offset;3824}38253826void Compile::ConstantTable::add(Constant& con) {3827if (con.can_be_reused()) {3828int idx = _constants.find(con);3829if (idx != -1 && _constants.at(idx).can_be_reused()) {3830_constants.adr_at(idx)->inc_freq(con.freq()); // increase the frequency by the current value3831return;3832}3833}3834(void) _constants.append(con);3835}38363837Compile::Constant Compile::ConstantTable::add(MachConstantNode* n, BasicType type, jvalue value) {3838Block* b = Compile::current()->cfg()->get_block_for_node(n);3839Constant con(type, value, b->_freq);3840add(con);3841return con;3842}38433844Compile::Constant Compile::ConstantTable::add(Metadata* metadata) {3845Constant con(metadata);3846add(con);3847return con;3848}38493850Compile::Constant Compile::ConstantTable::add(MachConstantNode* n, MachOper* oper) {3851jvalue value;3852BasicType type = oper->type()->basic_type();3853switch (type) {3854case T_LONG: value.j = oper->constantL(); break;3855case T_FLOAT: value.f = oper->constantF(); break;3856case T_DOUBLE: value.d = oper->constantD(); break;3857case T_OBJECT:3858case T_ADDRESS: value.l = (jobject) oper->constant(); break;3859case T_METADATA: return add((Metadata*)oper->constant()); break;3860default: guarantee(false, err_msg_res("unhandled type: %s", type2name(type)));3861}3862return add(n, type, value);3863}38643865Compile::Constant Compile::ConstantTable::add_jump_table(MachConstantNode* n) {3866jvalue value;3867// We can use the node pointer here to identify the right jump-table3868// as this method is called from Compile::Fill_buffer right before3869// the MachNodes are emitted and the jump-table is filled (means the3870// MachNode pointers do not change anymore).3871value.l = (jobject) n;3872Constant con(T_VOID, value, next_jump_table_freq(), false); // Labels of a jump-table cannot be reused.3873add(con);3874return con;3875}38763877void Compile::ConstantTable::fill_jump_table(CodeBuffer& cb, MachConstantNode* n, GrowableArray<Label*> labels) const {3878// If called from Compile::scratch_emit_size do nothing.3879if (Compile::current()->in_scratch_emit_size()) return;38803881assert(labels.is_nonempty(), "must be");3882assert((uint) labels.length() == n->outcnt(), err_msg_res("must be equal: %d == %d", labels.length(), n->outcnt()));38833884// Since MachConstantNode::constant_offset() also contains3885// table_base_offset() we need to subtract the table_base_offset()3886// to get the plain offset into the constant table.3887int offset = n->constant_offset() - table_base_offset();38883889MacroAssembler _masm(&cb);3890address* jump_table_base = (address*) (_masm.code()->consts()->start() + offset);38913892for (uint i = 0; i < n->outcnt(); i++) {3893address* constant_addr = &jump_table_base[i];3894assert(*constant_addr == (((address) n) + i), err_msg_res("all jump-table entries must contain adjusted node pointer: " INTPTR_FORMAT " == " INTPTR_FORMAT, p2i(*constant_addr), p2i(((address) n) + i)));3895*constant_addr = cb.consts()->target(*labels.at(i), (address) constant_addr);3896cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type);3897}3898}38993900void Compile::dump_inlining() {3901if (print_inlining() || print_intrinsics()) {3902// Print inlining message for candidates that we couldn't inline3903// for lack of space or non constant receiver3904for (int i = 0; i < _late_inlines.length(); i++) {3905CallGenerator* cg = _late_inlines.at(i);3906cg->print_inlining_late("live nodes > LiveNodeCountInliningCutoff");3907}3908Unique_Node_List useful;3909useful.push(root());3910for (uint next = 0; next < useful.size(); ++next) {3911Node* n = useful.at(next);3912if (n->is_Call() && n->as_Call()->generator() != NULL && n->as_Call()->generator()->call_node() == n) {3913CallNode* call = n->as_Call();3914CallGenerator* cg = call->generator();3915cg->print_inlining_late("receiver not constant");3916}3917uint max = n->len();3918for ( uint i = 0; i < max; ++i ) {3919Node *m = n->in(i);3920if ( m == NULL ) continue;3921useful.push(m);3922}3923}3924for (int i = 0; i < _print_inlining_list->length(); i++) {3925tty->print("%s", _print_inlining_list->adr_at(i)->ss()->as_string());3926}3927}3928}39293930// Dump inlining replay data to the stream.3931// Don't change thread state and acquire any locks.3932void Compile::dump_inline_data(outputStream* out) {3933InlineTree* inl_tree = ilt();3934if (inl_tree != NULL) {3935out->print(" inline %d", inl_tree->count());3936inl_tree->dump_replay_data(out);3937}3938}39393940int Compile::cmp_expensive_nodes(Node* n1, Node* n2) {3941if (n1->Opcode() < n2->Opcode()) return -1;3942else if (n1->Opcode() > n2->Opcode()) return 1;39433944assert(n1->req() == n2->req(), err_msg_res("can't compare %s nodes: n1->req() = %d, n2->req() = %d", NodeClassNames[n1->Opcode()], n1->req(), n2->req()));3945for (uint i = 1; i < n1->req(); i++) {3946if (n1->in(i) < n2->in(i)) return -1;3947else if (n1->in(i) > n2->in(i)) return 1;3948}39493950return 0;3951}39523953int Compile::cmp_expensive_nodes(Node** n1p, Node** n2p) {3954Node* n1 = *n1p;3955Node* n2 = *n2p;39563957return cmp_expensive_nodes(n1, n2);3958}39593960void Compile::sort_expensive_nodes() {3961if (!expensive_nodes_sorted()) {3962_expensive_nodes->sort(cmp_expensive_nodes);3963}3964}39653966bool Compile::expensive_nodes_sorted() const {3967for (int i = 1; i < _expensive_nodes->length(); i++) {3968if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i-1)) < 0) {3969return false;3970}3971}3972return true;3973}39743975bool Compile::should_optimize_expensive_nodes(PhaseIterGVN &igvn) {3976if (_expensive_nodes->length() == 0) {3977return false;3978}39793980assert(OptimizeExpensiveOps, "optimization off?");39813982// Take this opportunity to remove dead nodes from the list3983int j = 0;3984for (int i = 0; i < _expensive_nodes->length(); i++) {3985Node* n = _expensive_nodes->at(i);3986if (!n->is_unreachable(igvn)) {3987assert(n->is_expensive(), "should be expensive");3988_expensive_nodes->at_put(j, n);3989j++;3990}3991}3992_expensive_nodes->trunc_to(j);39933994// Then sort the list so that similar nodes are next to each other3995// and check for at least two nodes of identical kind with same data3996// inputs.3997sort_expensive_nodes();39983999for (int i = 0; i < _expensive_nodes->length()-1; i++) {4000if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i+1)) == 0) {4001return true;4002}4003}40044005return false;4006}40074008void Compile::cleanup_expensive_nodes(PhaseIterGVN &igvn) {4009if (_expensive_nodes->length() == 0) {4010return;4011}40124013assert(OptimizeExpensiveOps, "optimization off?");40144015// Sort to bring similar nodes next to each other and clear the4016// control input of nodes for which there's only a single copy.4017sort_expensive_nodes();40184019int j = 0;4020int identical = 0;4021int i = 0;4022for (; i < _expensive_nodes->length()-1; i++) {4023assert(j <= i, "can't write beyond current index");4024if (_expensive_nodes->at(i)->Opcode() == _expensive_nodes->at(i+1)->Opcode()) {4025identical++;4026_expensive_nodes->at_put(j++, _expensive_nodes->at(i));4027continue;4028}4029if (identical > 0) {4030_expensive_nodes->at_put(j++, _expensive_nodes->at(i));4031identical = 0;4032} else {4033Node* n = _expensive_nodes->at(i);4034igvn.hash_delete(n);4035n->set_req(0, NULL);4036igvn.hash_insert(n);4037}4038}4039if (identical > 0) {4040_expensive_nodes->at_put(j++, _expensive_nodes->at(i));4041} else if (_expensive_nodes->length() >= 1) {4042Node* n = _expensive_nodes->at(i);4043igvn.hash_delete(n);4044n->set_req(0, NULL);4045igvn.hash_insert(n);4046}4047_expensive_nodes->trunc_to(j);4048}40494050void Compile::add_expensive_node(Node * n) {4051assert(!_expensive_nodes->contains(n), "duplicate entry in expensive list");4052assert(n->is_expensive(), "expensive nodes with non-null control here only");4053assert(!n->is_CFG() && !n->is_Mem(), "no cfg or memory nodes here");4054if (OptimizeExpensiveOps) {4055_expensive_nodes->append(n);4056} else {4057// Clear control input and let IGVN optimize expensive nodes if4058// OptimizeExpensiveOps is off.4059n->set_req(0, NULL);4060}4061}40624063/**4064* Remove the speculative part of types and clean up the graph4065*/4066void Compile::remove_speculative_types(PhaseIterGVN &igvn) {4067if (UseTypeSpeculation) {4068Unique_Node_List worklist;4069worklist.push(root());4070int modified = 0;4071// Go over all type nodes that carry a speculative type, drop the4072// speculative part of the type and enqueue the node for an igvn4073// which may optimize it out.4074for (uint next = 0; next < worklist.size(); ++next) {4075Node *n = worklist.at(next);4076if (n->is_Type()) {4077TypeNode* tn = n->as_Type();4078const Type* t = tn->type();4079const Type* t_no_spec = t->remove_speculative();4080if (t_no_spec != t) {4081bool in_hash = igvn.hash_delete(n);4082assert(in_hash, "node should be in igvn hash table");4083tn->set_type(t_no_spec);4084igvn.hash_insert(n);4085igvn._worklist.push(n); // give it a chance to go away4086modified++;4087}4088}4089uint max = n->len();4090for( uint i = 0; i < max; ++i ) {4091Node *m = n->in(i);4092if (not_a_node(m)) continue;4093worklist.push(m);4094}4095}4096// Drop the speculative part of all types in the igvn's type table4097igvn.remove_speculative_types();4098if (modified > 0) {4099igvn.optimize();4100}4101#ifdef ASSERT4102// Verify that after the IGVN is over no speculative type has resurfaced4103worklist.clear();4104worklist.push(root());4105for (uint next = 0; next < worklist.size(); ++next) {4106Node *n = worklist.at(next);4107const Type* t = igvn.type_or_null(n);4108assert((t == NULL) || (t == t->remove_speculative()), "no more speculative types");4109if (n->is_Type()) {4110t = n->as_Type()->type();4111assert(t == t->remove_speculative(), "no more speculative types");4112}4113uint max = n->len();4114for( uint i = 0; i < max; ++i ) {4115Node *m = n->in(i);4116if (not_a_node(m)) continue;4117worklist.push(m);4118}4119}4120igvn.check_no_speculative_types();4121#endif4122}4123}41244125// Convert integer value to a narrowed long type dependent on ctrl (for example, a range check)4126Node* Compile::constrained_convI2L(PhaseGVN* phase, Node* value, const TypeInt* itype, Node* ctrl) {4127if (ctrl != NULL) {4128// Express control dependency by a CastII node with a narrow type.4129value = new (phase->C) CastIINode(value, itype, false, true /* range check dependency */);4130// Make the CastII node dependent on the control input to prevent the narrowed ConvI2L4131// node from floating above the range check during loop optimizations. Otherwise, the4132// ConvI2L node may be eliminated independently of the range check, causing the data path4133// to become TOP while the control path is still there (although it's unreachable).4134value->set_req(0, ctrl);4135// Save CastII node to remove it after loop optimizations.4136phase->C->add_range_check_cast(value);4137value = phase->transform(value);4138}4139const TypeLong* ltype = TypeLong::make(itype->_lo, itype->_hi, itype->_widen);4140return phase->transform(new (phase->C) ConvI2LNode(value, ltype));4141}41424143// Auxiliary method to support randomized stressing/fuzzing.4144//4145// This method can be called the arbitrary number of times, with current count4146// as the argument. The logic allows selecting a single candidate from the4147// running list of candidates as follows:4148// int count = 0;4149// Cand* selected = null;4150// while(cand = cand->next()) {4151// if (randomized_select(++count)) {4152// selected = cand;4153// }4154// }4155//4156// Including count equalizes the chances any candidate is "selected".4157// This is useful when we don't have the complete list of candidates to choose4158// from uniformly. In this case, we need to adjust the randomicity of the4159// selection, or else we will end up biasing the selection towards the latter4160// candidates.4161//4162// Quick back-envelope calculation shows that for the list of n candidates4163// the equal probability for the candidate to persist as "best" can be4164// achieved by replacing it with "next" k-th candidate with the probability4165// of 1/k. It can be easily shown that by the end of the run, the4166// probability for any candidate is converged to 1/n, thus giving the4167// uniform distribution among all the candidates.4168//4169// We don't care about the domain size as long as (RANDOMIZED_DOMAIN / count) is large.4170#define RANDOMIZED_DOMAIN_POW 294171#define RANDOMIZED_DOMAIN (1 << RANDOMIZED_DOMAIN_POW)4172#define RANDOMIZED_DOMAIN_MASK ((1 << (RANDOMIZED_DOMAIN_POW + 1)) - 1)4173bool Compile::randomized_select(int count) {4174assert(count > 0, "only positive");4175return (os::random() & RANDOMIZED_DOMAIN_MASK) < (RANDOMIZED_DOMAIN / count);4176}417741784179