Path: blob/aarch64-shenandoah-jdk8u272-b10/hotspot/src/share/vm/opto/compile.cpp
32285 views
/*1* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include "precompiled.hpp"25#include "asm/macroAssembler.hpp"26#include "asm/macroAssembler.inline.hpp"27#include "ci/ciReplay.hpp"28#include "classfile/systemDictionary.hpp"29#include "code/exceptionHandlerTable.hpp"30#include "code/nmethod.hpp"31#include "compiler/compileLog.hpp"32#include "compiler/disassembler.hpp"33#include "compiler/oopMap.hpp"34#include "jfr/jfrEvents.hpp"35#include "opto/addnode.hpp"36#include "opto/block.hpp"37#include "opto/c2compiler.hpp"38#include "opto/callGenerator.hpp"39#include "opto/callnode.hpp"40#include "opto/cfgnode.hpp"41#include "opto/chaitin.hpp"42#include "opto/compile.hpp"43#include "opto/connode.hpp"44#include "opto/divnode.hpp"45#include "opto/escape.hpp"46#include "opto/idealGraphPrinter.hpp"47#include "opto/loopnode.hpp"48#include "opto/machnode.hpp"49#include "opto/macro.hpp"50#include "opto/matcher.hpp"51#include "opto/mathexactnode.hpp"52#include "opto/memnode.hpp"53#include "opto/mulnode.hpp"54#include "opto/node.hpp"55#include "opto/opcodes.hpp"56#include "opto/output.hpp"57#include "opto/parse.hpp"58#include "opto/phaseX.hpp"59#include "opto/rootnode.hpp"60#include "opto/runtime.hpp"61#include "opto/stringopts.hpp"62#include "opto/type.hpp"63#include "opto/vectornode.hpp"64#include "runtime/arguments.hpp"65#include "runtime/signature.hpp"66#include "runtime/stubRoutines.hpp"67#include "runtime/timer.hpp"68#include "utilities/copy.hpp"69#if defined AD_MD_HPP70# include AD_MD_HPP71#elif defined TARGET_ARCH_MODEL_x86_3272# include "adfiles/ad_x86_32.hpp"73#elif defined TARGET_ARCH_MODEL_x86_6474# include "adfiles/ad_x86_64.hpp"75#elif defined TARGET_ARCH_MODEL_aarch6476# include "adfiles/ad_aarch64.hpp"77#elif defined TARGET_ARCH_MODEL_sparc78# include "adfiles/ad_sparc.hpp"79#elif defined TARGET_ARCH_MODEL_zero80# include "adfiles/ad_zero.hpp"81#elif defined TARGET_ARCH_MODEL_ppc_6482# include "adfiles/ad_ppc_64.hpp"83#endif8485#if INCLUDE_ALL_GCS86#include "gc_implementation/shenandoah/shenandoahForwarding.hpp"87#include "gc_implementation/shenandoah/c2/shenandoahSupport.hpp"88#endif8990// -------------------- Compile::mach_constant_base_node -----------------------91// Constant table base node singleton.92MachConstantBaseNode* Compile::mach_constant_base_node() {93if (_mach_constant_base_node == NULL) {94_mach_constant_base_node = new (C) MachConstantBaseNode();95_mach_constant_base_node->add_req(C->root());96}97return _mach_constant_base_node;98}99100101/// Support for intrinsics.102103// Return the index at which m must be inserted (or already exists).104// The sort order is by the address of the ciMethod, with is_virtual as minor key.105int Compile::intrinsic_insertion_index(ciMethod* m, bool is_virtual) {106#ifdef ASSERT107for (int i = 1; i < _intrinsics->length(); i++) {108CallGenerator* cg1 = _intrinsics->at(i-1);109CallGenerator* cg2 = _intrinsics->at(i);110assert(cg1->method() != cg2->method()111? cg1->method() < cg2->method()112: cg1->is_virtual() < cg2->is_virtual(),113"compiler intrinsics list must stay sorted");114}115#endif116// Binary search sorted list, in decreasing intervals [lo, hi].117int lo = 0, hi = _intrinsics->length()-1;118while (lo <= hi) {119int mid = (uint)(hi + lo) / 2;120ciMethod* mid_m = _intrinsics->at(mid)->method();121if (m < mid_m) {122hi = mid-1;123} else if (m > mid_m) {124lo = mid+1;125} else {126// look at minor sort key127bool mid_virt = _intrinsics->at(mid)->is_virtual();128if (is_virtual < mid_virt) {129hi = mid-1;130} else if (is_virtual > mid_virt) {131lo = mid+1;132} else {133return mid; // exact match134}135}136}137return lo; // inexact match138}139140void Compile::register_intrinsic(CallGenerator* cg) {141if (_intrinsics == NULL) {142_intrinsics = new (comp_arena())GrowableArray<CallGenerator*>(comp_arena(), 60, 0, NULL);143}144// This code is stolen from ciObjectFactory::insert.145// Really, GrowableArray should have methods for146// insert_at, remove_at, and binary_search.147int len = _intrinsics->length();148int index = intrinsic_insertion_index(cg->method(), cg->is_virtual());149if (index == len) {150_intrinsics->append(cg);151} else {152#ifdef ASSERT153CallGenerator* oldcg = _intrinsics->at(index);154assert(oldcg->method() != cg->method() || oldcg->is_virtual() != cg->is_virtual(), "don't register twice");155#endif156_intrinsics->append(_intrinsics->at(len-1));157int pos;158for (pos = len-2; pos >= index; pos--) {159_intrinsics->at_put(pos+1,_intrinsics->at(pos));160}161_intrinsics->at_put(index, cg);162}163assert(find_intrinsic(cg->method(), cg->is_virtual()) == cg, "registration worked");164}165166CallGenerator* Compile::find_intrinsic(ciMethod* m, bool is_virtual) {167assert(m->is_loaded(), "don't try this on unloaded methods");168if (_intrinsics != NULL) {169int index = intrinsic_insertion_index(m, is_virtual);170if (index < _intrinsics->length()171&& _intrinsics->at(index)->method() == m172&& _intrinsics->at(index)->is_virtual() == is_virtual) {173return _intrinsics->at(index);174}175}176// Lazily create intrinsics for intrinsic IDs well-known in the runtime.177if (m->intrinsic_id() != vmIntrinsics::_none &&178m->intrinsic_id() <= vmIntrinsics::LAST_COMPILER_INLINE) {179CallGenerator* cg = make_vm_intrinsic(m, is_virtual);180if (cg != NULL) {181// Save it for next time:182register_intrinsic(cg);183return cg;184} else {185gather_intrinsic_statistics(m->intrinsic_id(), is_virtual, _intrinsic_disabled);186}187}188return NULL;189}190191// Compile:: register_library_intrinsics and make_vm_intrinsic are defined192// in library_call.cpp.193194195#ifndef PRODUCT196// statistics gathering...197198juint Compile::_intrinsic_hist_count[vmIntrinsics::ID_LIMIT] = {0};199jubyte Compile::_intrinsic_hist_flags[vmIntrinsics::ID_LIMIT] = {0};200201bool Compile::gather_intrinsic_statistics(vmIntrinsics::ID id, bool is_virtual, int flags) {202assert(id > vmIntrinsics::_none && id < vmIntrinsics::ID_LIMIT, "oob");203int oflags = _intrinsic_hist_flags[id];204assert(flags != 0, "what happened?");205if (is_virtual) {206flags |= _intrinsic_virtual;207}208bool changed = (flags != oflags);209if ((flags & _intrinsic_worked) != 0) {210juint count = (_intrinsic_hist_count[id] += 1);211if (count == 1) {212changed = true; // first time213}214// increment the overall count also:215_intrinsic_hist_count[vmIntrinsics::_none] += 1;216}217if (changed) {218if (((oflags ^ flags) & _intrinsic_virtual) != 0) {219// Something changed about the intrinsic's virtuality.220if ((flags & _intrinsic_virtual) != 0) {221// This is the first use of this intrinsic as a virtual call.222if (oflags != 0) {223// We already saw it as a non-virtual, so note both cases.224flags |= _intrinsic_both;225}226} else if ((oflags & _intrinsic_both) == 0) {227// This is the first use of this intrinsic as a non-virtual228flags |= _intrinsic_both;229}230}231_intrinsic_hist_flags[id] = (jubyte) (oflags | flags);232}233// update the overall flags also:234_intrinsic_hist_flags[vmIntrinsics::_none] |= (jubyte) flags;235return changed;236}237238static char* format_flags(int flags, char* buf) {239buf[0] = 0;240if ((flags & Compile::_intrinsic_worked) != 0) strcat(buf, ",worked");241if ((flags & Compile::_intrinsic_failed) != 0) strcat(buf, ",failed");242if ((flags & Compile::_intrinsic_disabled) != 0) strcat(buf, ",disabled");243if ((flags & Compile::_intrinsic_virtual) != 0) strcat(buf, ",virtual");244if ((flags & Compile::_intrinsic_both) != 0) strcat(buf, ",nonvirtual");245if (buf[0] == 0) strcat(buf, ",");246assert(buf[0] == ',', "must be");247return &buf[1];248}249250void Compile::print_intrinsic_statistics() {251char flagsbuf[100];252ttyLocker ttyl;253if (xtty != NULL) xtty->head("statistics type='intrinsic'");254tty->print_cr("Compiler intrinsic usage:");255juint total = _intrinsic_hist_count[vmIntrinsics::_none];256if (total == 0) total = 1; // avoid div0 in case of no successes257#define PRINT_STAT_LINE(name, c, f) \258tty->print_cr(" %4d (%4.1f%%) %s (%s)", (int)(c), ((c) * 100.0) / total, name, f);259for (int index = 1 + (int)vmIntrinsics::_none; index < (int)vmIntrinsics::ID_LIMIT; index++) {260vmIntrinsics::ID id = (vmIntrinsics::ID) index;261int flags = _intrinsic_hist_flags[id];262juint count = _intrinsic_hist_count[id];263if ((flags | count) != 0) {264PRINT_STAT_LINE(vmIntrinsics::name_at(id), count, format_flags(flags, flagsbuf));265}266}267PRINT_STAT_LINE("total", total, format_flags(_intrinsic_hist_flags[vmIntrinsics::_none], flagsbuf));268if (xtty != NULL) xtty->tail("statistics");269}270271void Compile::print_statistics() {272{ ttyLocker ttyl;273if (xtty != NULL) xtty->head("statistics type='opto'");274Parse::print_statistics();275PhaseCCP::print_statistics();276PhaseRegAlloc::print_statistics();277Scheduling::print_statistics();278PhasePeephole::print_statistics();279PhaseIdealLoop::print_statistics();280if (xtty != NULL) xtty->tail("statistics");281}282if (_intrinsic_hist_flags[vmIntrinsics::_none] != 0) {283// put this under its own <statistics> element.284print_intrinsic_statistics();285}286}287#endif //PRODUCT288289// Support for bundling info290Bundle* Compile::node_bundling(const Node *n) {291assert(valid_bundle_info(n), "oob");292return &_node_bundling_base[n->_idx];293}294295bool Compile::valid_bundle_info(const Node *n) {296return (_node_bundling_limit > n->_idx);297}298299300void Compile::gvn_replace_by(Node* n, Node* nn) {301for (DUIterator_Last imin, i = n->last_outs(imin); i >= imin; ) {302Node* use = n->last_out(i);303bool is_in_table = initial_gvn()->hash_delete(use);304uint uses_found = 0;305for (uint j = 0; j < use->len(); j++) {306if (use->in(j) == n) {307if (j < use->req())308use->set_req(j, nn);309else310use->set_prec(j, nn);311uses_found++;312}313}314if (is_in_table) {315// reinsert into table316initial_gvn()->hash_find_insert(use);317}318record_for_igvn(use);319i -= uses_found; // we deleted 1 or more copies of this edge320}321}322323324static inline bool not_a_node(const Node* n) {325if (n == NULL) return true;326if (((intptr_t)n & 1) != 0) return true; // uninitialized, etc.327if (*(address*)n == badAddress) return true; // kill by Node::destruct328return false;329}330331// Identify all nodes that are reachable from below, useful.332// Use breadth-first pass that records state in a Unique_Node_List,333// recursive traversal is slower.334void Compile::identify_useful_nodes(Unique_Node_List &useful) {335int estimated_worklist_size = live_nodes();336useful.map( estimated_worklist_size, NULL ); // preallocate space337338// Initialize worklist339if (root() != NULL) { useful.push(root()); }340// If 'top' is cached, declare it useful to preserve cached node341if( cached_top_node() ) { useful.push(cached_top_node()); }342343// Push all useful nodes onto the list, breadthfirst344for( uint next = 0; next < useful.size(); ++next ) {345assert( next < unique(), "Unique useful nodes < total nodes");346Node *n = useful.at(next);347uint max = n->len();348for( uint i = 0; i < max; ++i ) {349Node *m = n->in(i);350if (not_a_node(m)) continue;351useful.push(m);352}353}354}355356// Update dead_node_list with any missing dead nodes using useful357// list. Consider all non-useful nodes to be useless i.e., dead nodes.358void Compile::update_dead_node_list(Unique_Node_List &useful) {359uint max_idx = unique();360VectorSet& useful_node_set = useful.member_set();361362for (uint node_idx = 0; node_idx < max_idx; node_idx++) {363// If node with index node_idx is not in useful set,364// mark it as dead in dead node list.365if (! useful_node_set.test(node_idx) ) {366record_dead_node(node_idx);367}368}369}370371void Compile::remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful) {372int shift = 0;373for (int i = 0; i < inlines->length(); i++) {374CallGenerator* cg = inlines->at(i);375CallNode* call = cg->call_node();376if (shift > 0) {377inlines->at_put(i-shift, cg);378}379if (!useful.member(call)) {380shift++;381}382}383inlines->trunc_to(inlines->length()-shift);384}385386// Disconnect all useless nodes by disconnecting those at the boundary.387void Compile::remove_useless_nodes(Unique_Node_List &useful) {388uint next = 0;389while (next < useful.size()) {390Node *n = useful.at(next++);391if (n->is_SafePoint()) {392// We're done with a parsing phase. Replaced nodes are not valid393// beyond that point.394n->as_SafePoint()->delete_replaced_nodes();395}396// Use raw traversal of out edges since this code removes out edges397int max = n->outcnt();398for (int j = 0; j < max; ++j) {399Node* child = n->raw_out(j);400if (! useful.member(child)) {401assert(!child->is_top() || child != top(),402"If top is cached in Compile object it is in useful list");403// Only need to remove this out-edge to the useless node404n->raw_del_out(j);405--j;406--max;407}408}409if (n->outcnt() == 1 && n->has_special_unique_user()) {410record_for_igvn(n->unique_out());411}412if (n->Opcode() == Op_AddP && CallLeafNode::has_only_g1_wb_pre_uses(n)) {413for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {414record_for_igvn(n->fast_out(i));415}416}417}418// Remove useless macro and predicate opaq nodes419for (int i = C->macro_count()-1; i >= 0; i--) {420Node* n = C->macro_node(i);421if (!useful.member(n)) {422remove_macro_node(n);423}424}425// Remove useless CastII nodes with range check dependency426for (int i = range_check_cast_count() - 1; i >= 0; i--) {427Node* cast = range_check_cast_node(i);428if (!useful.member(cast)) {429remove_range_check_cast(cast);430}431}432// Remove useless expensive node433for (int i = C->expensive_count()-1; i >= 0; i--) {434Node* n = C->expensive_node(i);435if (!useful.member(n)) {436remove_expensive_node(n);437}438}439for (int i = C->shenandoah_barriers_count()-1; i >= 0; i--) {440ShenandoahLoadReferenceBarrierNode* n = C->shenandoah_barrier(i);441if (!useful.member(n)) {442remove_shenandoah_barrier(n);443}444}445// clean up the late inline lists446remove_useless_late_inlines(&_string_late_inlines, useful);447remove_useless_late_inlines(&_boxing_late_inlines, useful);448remove_useless_late_inlines(&_late_inlines, useful);449debug_only(verify_graph_edges(true/*check for no_dead_code*/);)450}451452//------------------------------frame_size_in_words-----------------------------453// frame_slots in units of words454int Compile::frame_size_in_words() const {455// shift is 0 in LP32 and 1 in LP64456const int shift = (LogBytesPerWord - LogBytesPerInt);457int words = _frame_slots >> shift;458assert( words << shift == _frame_slots, "frame size must be properly aligned in LP64" );459return words;460}461462// To bang the stack of this compiled method we use the stack size463// that the interpreter would need in case of a deoptimization. This464// removes the need to bang the stack in the deoptimization blob which465// in turn simplifies stack overflow handling.466int Compile::bang_size_in_bytes() const {467return MAX2(_interpreter_frame_size, frame_size_in_bytes());468}469470// ============================================================================471//------------------------------CompileWrapper---------------------------------472class CompileWrapper : public StackObj {473Compile *const _compile;474public:475CompileWrapper(Compile* compile);476477~CompileWrapper();478};479480CompileWrapper::CompileWrapper(Compile* compile) : _compile(compile) {481// the Compile* pointer is stored in the current ciEnv:482ciEnv* env = compile->env();483assert(env == ciEnv::current(), "must already be a ciEnv active");484assert(env->compiler_data() == NULL, "compile already active?");485env->set_compiler_data(compile);486assert(compile == Compile::current(), "sanity");487488compile->set_type_dict(NULL);489compile->set_type_hwm(NULL);490compile->set_type_last_size(0);491compile->set_last_tf(NULL, NULL);492compile->set_indexSet_arena(NULL);493compile->set_indexSet_free_block_list(NULL);494compile->init_type_arena();495Type::Initialize(compile);496_compile->set_scratch_buffer_blob(NULL);497_compile->begin_method();498}499CompileWrapper::~CompileWrapper() {500_compile->end_method();501if (_compile->scratch_buffer_blob() != NULL)502BufferBlob::free(_compile->scratch_buffer_blob());503_compile->env()->set_compiler_data(NULL);504}505506507//----------------------------print_compile_messages---------------------------508void Compile::print_compile_messages() {509#ifndef PRODUCT510// Check if recompiling511if (_subsume_loads == false && PrintOpto) {512// Recompiling without allowing machine instructions to subsume loads513tty->print_cr("*********************************************************");514tty->print_cr("** Bailout: Recompile without subsuming loads **");515tty->print_cr("*********************************************************");516}517if (_do_escape_analysis != DoEscapeAnalysis && PrintOpto) {518// Recompiling without escape analysis519tty->print_cr("*********************************************************");520tty->print_cr("** Bailout: Recompile without escape analysis **");521tty->print_cr("*********************************************************");522}523if (_eliminate_boxing != EliminateAutoBox && PrintOpto) {524// Recompiling without boxing elimination525tty->print_cr("*********************************************************");526tty->print_cr("** Bailout: Recompile without boxing elimination **");527tty->print_cr("*********************************************************");528}529if (env()->break_at_compile()) {530// Open the debugger when compiling this method.531tty->print("### Breaking when compiling: ");532method()->print_short_name();533tty->cr();534BREAKPOINT;535}536537if( PrintOpto ) {538if (is_osr_compilation()) {539tty->print("[OSR]%3d", _compile_id);540} else {541tty->print("%3d", _compile_id);542}543}544#endif545}546547548//-----------------------init_scratch_buffer_blob------------------------------549// Construct a temporary BufferBlob and cache it for this compile.550void Compile::init_scratch_buffer_blob(int const_size) {551// If there is already a scratch buffer blob allocated and the552// constant section is big enough, use it. Otherwise free the553// current and allocate a new one.554BufferBlob* blob = scratch_buffer_blob();555if ((blob != NULL) && (const_size <= _scratch_const_size)) {556// Use the current blob.557} else {558if (blob != NULL) {559BufferBlob::free(blob);560}561562ResourceMark rm;563_scratch_const_size = const_size;564int size = (MAX_inst_size + MAX_stubs_size + _scratch_const_size);565blob = BufferBlob::create("Compile::scratch_buffer", size);566// Record the buffer blob for next time.567set_scratch_buffer_blob(blob);568// Have we run out of code space?569if (scratch_buffer_blob() == NULL) {570// Let CompilerBroker disable further compilations.571record_failure("Not enough space for scratch buffer in CodeCache");572return;573}574}575576// Initialize the relocation buffers577relocInfo* locs_buf = (relocInfo*) blob->content_end() - MAX_locs_size;578set_scratch_locs_memory(locs_buf);579}580581582//-----------------------scratch_emit_size-------------------------------------583// Helper function that computes size by emitting code584uint Compile::scratch_emit_size(const Node* n) {585// Start scratch_emit_size section.586set_in_scratch_emit_size(true);587588// Emit into a trash buffer and count bytes emitted.589// This is a pretty expensive way to compute a size,590// but it works well enough if seldom used.591// All common fixed-size instructions are given a size592// method by the AD file.593// Note that the scratch buffer blob and locs memory are594// allocated at the beginning of the compile task, and595// may be shared by several calls to scratch_emit_size.596// The allocation of the scratch buffer blob is particularly597// expensive, since it has to grab the code cache lock.598BufferBlob* blob = this->scratch_buffer_blob();599assert(blob != NULL, "Initialize BufferBlob at start");600assert(blob->size() > MAX_inst_size, "sanity");601relocInfo* locs_buf = scratch_locs_memory();602address blob_begin = blob->content_begin();603address blob_end = (address)locs_buf;604assert(blob->content_contains(blob_end), "sanity");605CodeBuffer buf(blob_begin, blob_end - blob_begin);606buf.initialize_consts_size(_scratch_const_size);607buf.initialize_stubs_size(MAX_stubs_size);608assert(locs_buf != NULL, "sanity");609int lsize = MAX_locs_size / 3;610buf.consts()->initialize_shared_locs(&locs_buf[lsize * 0], lsize);611buf.insts()->initialize_shared_locs( &locs_buf[lsize * 1], lsize);612buf.stubs()->initialize_shared_locs( &locs_buf[lsize * 2], lsize);613614// Do the emission.615616Label fakeL; // Fake label for branch instructions.617Label* saveL = NULL;618uint save_bnum = 0;619bool is_branch = n->is_MachBranch();620if (is_branch) {621MacroAssembler masm(&buf);622masm.bind(fakeL);623n->as_MachBranch()->save_label(&saveL, &save_bnum);624n->as_MachBranch()->label_set(&fakeL, 0);625}626n->emit(buf, this->regalloc());627628// Emitting into the scratch buffer should not fail629assert (!failing(), err_msg_res("Must not have pending failure. Reason is: %s", failure_reason()));630631if (is_branch) // Restore label.632n->as_MachBranch()->label_set(saveL, save_bnum);633634// End scratch_emit_size section.635set_in_scratch_emit_size(false);636637return buf.insts_size();638}639640641// ============================================================================642//------------------------------Compile standard-------------------------------643debug_only( int Compile::_debug_idx = 100000; )644645// Compile a method. entry_bci is -1 for normal compilations and indicates646// the continuation bci for on stack replacement.647648649Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci,650bool subsume_loads, bool do_escape_analysis, bool eliminate_boxing )651: Phase(Compiler),652_env(ci_env),653_log(ci_env->log()),654_compile_id(ci_env->compile_id()),655_save_argument_registers(false),656_stub_name(NULL),657_stub_function(NULL),658_stub_entry_point(NULL),659_method(target),660_entry_bci(osr_bci),661_initial_gvn(NULL),662_for_igvn(NULL),663_warm_calls(NULL),664_subsume_loads(subsume_loads),665_do_escape_analysis(do_escape_analysis),666_eliminate_boxing(eliminate_boxing),667_failure_reason(NULL),668_code_buffer("Compile::Fill_buffer"),669_orig_pc_slot(0),670_orig_pc_slot_offset_in_bytes(0),671_has_method_handle_invokes(false),672_mach_constant_base_node(NULL),673_node_bundling_limit(0),674_node_bundling_base(NULL),675_java_calls(0),676_inner_loops(0),677_scratch_const_size(-1),678_in_scratch_emit_size(false),679_dead_node_list(comp_arena()),680_dead_node_count(0),681#ifndef PRODUCT682_trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),683_in_dump_cnt(0),684_printer(IdealGraphPrinter::printer()),685#endif686_congraph(NULL),687_comp_arena(mtCompiler),688_node_arena(mtCompiler),689_old_arena(mtCompiler),690_Compile_types(mtCompiler),691_replay_inline_data(NULL),692_late_inlines(comp_arena(), 2, 0, NULL),693_string_late_inlines(comp_arena(), 2, 0, NULL),694_boxing_late_inlines(comp_arena(), 2, 0, NULL),695_late_inlines_pos(0),696_number_of_mh_late_inlines(0),697_inlining_progress(false),698_inlining_incrementally(false),699_print_inlining_list(NULL),700_print_inlining_idx(0),701_interpreter_frame_size(0),702_max_node_limit(MaxNodeLimit) {703C = this;704705CompileWrapper cw(this);706#ifndef PRODUCT707if (TimeCompiler2) {708tty->print(" ");709target->holder()->name()->print();710tty->print(".");711target->print_short_name();712tty->print(" ");713}714TraceTime t1("Total compilation time", &_t_totalCompilation, TimeCompiler, TimeCompiler2);715TraceTime t2(NULL, &_t_methodCompilation, TimeCompiler, false);716bool print_opto_assembly = PrintOptoAssembly || _method->has_option("PrintOptoAssembly");717if (!print_opto_assembly) {718bool print_assembly = (PrintAssembly || _method->should_print_assembly());719if (print_assembly && !Disassembler::can_decode()) {720tty->print_cr("PrintAssembly request changed to PrintOptoAssembly");721print_opto_assembly = true;722}723}724set_print_assembly(print_opto_assembly);725set_parsed_irreducible_loop(false);726727if (method()->has_option("ReplayInline")) {728_replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level());729}730#endif731set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining));732set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics"));733set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it734735if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) {736// Make sure the method being compiled gets its own MDO,737// so we can at least track the decompile_count().738// Need MDO to record RTM code generation state.739method()->ensure_method_data();740}741742Init(::AliasLevel);743744745print_compile_messages();746747_ilt = InlineTree::build_inline_tree_root();748749// Even if NO memory addresses are used, MergeMem nodes must have at least 1 slice750assert(num_alias_types() >= AliasIdxRaw, "");751752#define MINIMUM_NODE_HASH 1023753// Node list that Iterative GVN will start with754Unique_Node_List for_igvn(comp_arena());755set_for_igvn(&for_igvn);756757// GVN that will be run immediately on new nodes758uint estimated_size = method()->code_size()*4+64;759estimated_size = (estimated_size < MINIMUM_NODE_HASH ? MINIMUM_NODE_HASH : estimated_size);760PhaseGVN gvn(node_arena(), estimated_size);761set_initial_gvn(&gvn);762763if (print_inlining() || print_intrinsics()) {764_print_inlining_list = new (comp_arena())GrowableArray<PrintInliningBuffer>(comp_arena(), 1, 1, PrintInliningBuffer());765}766{ // Scope for timing the parser767TracePhase t3("parse", &_t_parser, true);768769// Put top into the hash table ASAP.770initial_gvn()->transform_no_reclaim(top());771772// Set up tf(), start(), and find a CallGenerator.773CallGenerator* cg = NULL;774if (is_osr_compilation()) {775const TypeTuple *domain = StartOSRNode::osr_domain();776const TypeTuple *range = TypeTuple::make_range(method()->signature());777init_tf(TypeFunc::make(domain, range));778StartNode* s = new (this) StartOSRNode(root(), domain);779initial_gvn()->set_type_bottom(s);780init_start(s);781cg = CallGenerator::for_osr(method(), entry_bci());782} else {783// Normal case.784init_tf(TypeFunc::make(method()));785StartNode* s = new (this) StartNode(root(), tf()->domain());786initial_gvn()->set_type_bottom(s);787init_start(s);788if (method()->intrinsic_id() == vmIntrinsics::_Reference_get && (UseG1GC || UseShenandoahGC)) {789// With java.lang.ref.reference.get() we must go through the790// intrinsic when G1 is enabled - even when get() is the root791// method of the compile - so that, if necessary, the value in792// the referent field of the reference object gets recorded by793// the pre-barrier code.794// Specifically, if G1 is enabled, the value in the referent795// field is recorded by the G1 SATB pre barrier. This will796// result in the referent being marked live and the reference797// object removed from the list of discovered references during798// reference processing.799cg = find_intrinsic(method(), false);800}801if (cg == NULL) {802float past_uses = method()->interpreter_invocation_count();803float expected_uses = past_uses;804cg = CallGenerator::for_inline(method(), expected_uses);805}806}807if (failing()) return;808if (cg == NULL) {809record_method_not_compilable_all_tiers("cannot parse method");810return;811}812JVMState* jvms = build_start_state(start(), tf());813if ((jvms = cg->generate(jvms)) == NULL) {814if (!failure_reason_is(C2Compiler::retry_class_loading_during_parsing())) {815record_method_not_compilable("method parse failed");816}817return;818}819GraphKit kit(jvms);820821if (!kit.stopped()) {822// Accept return values, and transfer control we know not where.823// This is done by a special, unique ReturnNode bound to root.824return_values(kit.jvms());825}826827if (kit.has_exceptions()) {828// Any exceptions that escape from this call must be rethrown829// to whatever caller is dynamically above us on the stack.830// This is done by a special, unique RethrowNode bound to root.831rethrow_exceptions(kit.transfer_exceptions_into_jvms());832}833834assert(IncrementalInline || (_late_inlines.length() == 0 && !has_mh_late_inlines()), "incremental inlining is off");835836if (_late_inlines.length() == 0 && !has_mh_late_inlines() && !failing() && has_stringbuilder()) {837inline_string_calls(true);838}839840if (failing()) return;841842print_method(PHASE_BEFORE_REMOVEUSELESS, 3);843844// Remove clutter produced by parsing.845if (!failing()) {846ResourceMark rm;847PhaseRemoveUseless pru(initial_gvn(), &for_igvn);848}849}850851// Note: Large methods are capped off in do_one_bytecode().852if (failing()) return;853854// After parsing, node notes are no longer automagic.855// They must be propagated by register_new_node_with_optimizer(),856// clone(), or the like.857set_default_node_notes(NULL);858859for (;;) {860int successes = Inline_Warm();861if (failing()) return;862if (successes == 0) break;863}864865// Drain the list.866Finish_Warm();867#ifndef PRODUCT868if (_printer) {869_printer->print_inlining(this);870}871#endif872873if (failing()) return;874NOT_PRODUCT( verify_graph_edges(); )875876// Now optimize877Optimize();878if (failing()) return;879NOT_PRODUCT( verify_graph_edges(); )880881#ifndef PRODUCT882if (PrintIdeal) {883ttyLocker ttyl; // keep the following output all in one block884// This output goes directly to the tty, not the compiler log.885// To enable tools to match it up with the compilation activity,886// be sure to tag this tty output with the compile ID.887if (xtty != NULL) {888xtty->head("ideal compile_id='%d'%s", compile_id(),889is_osr_compilation() ? " compile_kind='osr'" :890"");891}892root()->dump(9999);893if (xtty != NULL) {894xtty->tail("ideal");895}896}897#endif898899NOT_PRODUCT( verify_barriers(); )900901// Dump compilation data to replay it.902if (method()->has_option("DumpReplay")) {903env()->dump_replay_data(_compile_id);904}905if (method()->has_option("DumpInline") && (ilt() != NULL)) {906env()->dump_inline_data(_compile_id);907}908909// Now that we know the size of all the monitors we can add a fixed slot910// for the original deopt pc.911912_orig_pc_slot = fixed_slots();913int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size);914set_fixed_slots(next_slot);915916// Compute when to use implicit null checks. Used by matching trap based917// nodes and NullCheck optimization.918set_allowed_deopt_reasons();919920// Now generate code921Code_Gen();922if (failing()) return;923924// Check if we want to skip execution of all compiled code.925{926#ifndef PRODUCT927if (OptoNoExecute) {928record_method_not_compilable("+OptoNoExecute"); // Flag as failed929return;930}931TracePhase t2("install_code", &_t_registerMethod, TimeCompiler);932#endif933934if (is_osr_compilation()) {935_code_offsets.set_value(CodeOffsets::Verified_Entry, 0);936_code_offsets.set_value(CodeOffsets::OSR_Entry, _first_block_size);937} else {938_code_offsets.set_value(CodeOffsets::Verified_Entry, _first_block_size);939_code_offsets.set_value(CodeOffsets::OSR_Entry, 0);940}941942env()->register_method(_method, _entry_bci,943&_code_offsets,944_orig_pc_slot_offset_in_bytes,945code_buffer(),946frame_size_in_words(), _oop_map_set,947&_handler_table, &_inc_table,948compiler,949env()->comp_level(),950has_unsafe_access(),951SharedRuntime::is_wide_vector(max_vector_size()),952rtm_state()953);954955if (log() != NULL) // Print code cache state into compiler log956log()->code_cache_state();957}958}959960//------------------------------Compile----------------------------------------961// Compile a runtime stub962Compile::Compile( ciEnv* ci_env,963TypeFunc_generator generator,964address stub_function,965const char *stub_name,966int is_fancy_jump,967bool pass_tls,968bool save_arg_registers,969bool return_pc )970: Phase(Compiler),971_env(ci_env),972_log(ci_env->log()),973_compile_id(0),974_save_argument_registers(save_arg_registers),975_method(NULL),976_stub_name(stub_name),977_stub_function(stub_function),978_stub_entry_point(NULL),979_entry_bci(InvocationEntryBci),980_initial_gvn(NULL),981_for_igvn(NULL),982_warm_calls(NULL),983_orig_pc_slot(0),984_orig_pc_slot_offset_in_bytes(0),985_subsume_loads(true),986_do_escape_analysis(false),987_eliminate_boxing(false),988_failure_reason(NULL),989_code_buffer("Compile::Fill_buffer"),990_has_method_handle_invokes(false),991_mach_constant_base_node(NULL),992_node_bundling_limit(0),993_node_bundling_base(NULL),994_java_calls(0),995_inner_loops(0),996#ifndef PRODUCT997_trace_opto_output(TraceOptoOutput),998_in_dump_cnt(0),999_printer(NULL),1000#endif1001_comp_arena(mtCompiler),1002_node_arena(mtCompiler),1003_old_arena(mtCompiler),1004_Compile_types(mtCompiler),1005_dead_node_list(comp_arena()),1006_dead_node_count(0),1007_congraph(NULL),1008_replay_inline_data(NULL),1009_number_of_mh_late_inlines(0),1010_inlining_progress(false),1011_inlining_incrementally(false),1012_print_inlining_list(NULL),1013_print_inlining_idx(0),1014_allowed_reasons(0),1015_interpreter_frame_size(0),1016_max_node_limit(MaxNodeLimit) {1017C = this;10181019#ifndef PRODUCT1020TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);1021TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);1022set_print_assembly(PrintFrameConverterAssembly);1023set_parsed_irreducible_loop(false);1024#endif1025set_has_irreducible_loop(false); // no loops10261027CompileWrapper cw(this);1028Init(/*AliasLevel=*/ 0);1029init_tf((*generator)());10301031{1032// The following is a dummy for the sake of GraphKit::gen_stub1033Unique_Node_List for_igvn(comp_arena());1034set_for_igvn(&for_igvn); // not used, but some GraphKit guys push on this1035PhaseGVN gvn(Thread::current()->resource_area(),255);1036set_initial_gvn(&gvn); // not significant, but GraphKit guys use it pervasively1037gvn.transform_no_reclaim(top());10381039GraphKit kit;1040kit.gen_stub(stub_function, stub_name, is_fancy_jump, pass_tls, return_pc);1041}10421043NOT_PRODUCT( verify_graph_edges(); )1044Code_Gen();1045if (failing()) return;104610471048// Entry point will be accessed using compile->stub_entry_point();1049if (code_buffer() == NULL) {1050Matcher::soft_match_failure();1051} else {1052if (PrintAssembly && (WizardMode || Verbose))1053tty->print_cr("### Stub::%s", stub_name);10541055if (!failing()) {1056assert(_fixed_slots == 0, "no fixed slots used for runtime stubs");10571058// Make the NMethod1059// For now we mark the frame as never safe for profile stackwalking1060RuntimeStub *rs = RuntimeStub::new_runtime_stub(stub_name,1061code_buffer(),1062CodeOffsets::frame_never_safe,1063// _code_offsets.value(CodeOffsets::Frame_Complete),1064frame_size_in_words(),1065_oop_map_set,1066save_arg_registers);1067assert(rs != NULL && rs->is_runtime_stub(), "sanity check");10681069_stub_entry_point = rs->entry_point();1070}1071}1072}10731074//------------------------------Init-------------------------------------------1075// Prepare for a single compilation1076void Compile::Init(int aliaslevel) {1077_unique = 0;1078_regalloc = NULL;10791080_tf = NULL; // filled in later1081_top = NULL; // cached later1082_matcher = NULL; // filled in later1083_cfg = NULL; // filled in later10841085set_24_bit_selection_and_mode(Use24BitFP, false);10861087_node_note_array = NULL;1088_default_node_notes = NULL;10891090_immutable_memory = NULL; // filled in at first inquiry10911092// Globally visible Nodes1093// First set TOP to NULL to give safe behavior during creation of RootNode1094set_cached_top_node(NULL);1095set_root(new (this) RootNode());1096// Now that you have a Root to point to, create the real TOP1097set_cached_top_node( new (this) ConNode(Type::TOP) );1098set_recent_alloc(NULL, NULL);10991100// Create Debug Information Recorder to record scopes, oopmaps, etc.1101env()->set_oop_recorder(new OopRecorder(env()->arena()));1102env()->set_debug_info(new DebugInformationRecorder(env()->oop_recorder()));1103env()->set_dependencies(new Dependencies(env()));11041105_fixed_slots = 0;1106set_has_split_ifs(false);1107set_has_loops(has_method() && method()->has_loops()); // first approximation1108set_has_stringbuilder(false);1109set_has_boxed_value(false);1110_trap_can_recompile = false; // no traps emitted yet1111_major_progress = true; // start out assuming good things will happen1112set_has_unsafe_access(false);1113set_max_vector_size(0);1114Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));1115set_decompile_count(0);11161117set_do_freq_based_layout(BlockLayoutByFrequency || method_has_option("BlockLayoutByFrequency"));1118set_num_loop_opts(LoopOptsCount);1119set_do_inlining(Inline);1120set_max_inline_size(MaxInlineSize);1121set_freq_inline_size(FreqInlineSize);1122set_do_scheduling(OptoScheduling);1123set_do_count_invocations(false);1124set_do_method_data_update(false);1125set_rtm_state(NoRTM); // No RTM lock eliding by default1126method_has_option_value("MaxNodeLimit", _max_node_limit);1127#if INCLUDE_RTM_OPT1128if (UseRTMLocking && has_method() && (method()->method_data_or_null() != NULL)) {1129int rtm_state = method()->method_data()->rtm_state();1130if (method_has_option("NoRTMLockEliding") || ((rtm_state & NoRTM) != 0)) {1131// Don't generate RTM lock eliding code.1132set_rtm_state(NoRTM);1133} else if (method_has_option("UseRTMLockEliding") || ((rtm_state & UseRTM) != 0) || !UseRTMDeopt) {1134// Generate RTM lock eliding code without abort ratio calculation code.1135set_rtm_state(UseRTM);1136} else if (UseRTMDeopt) {1137// Generate RTM lock eliding code and include abort ratio calculation1138// code if UseRTMDeopt is on.1139set_rtm_state(ProfileRTM);1140}1141}1142#endif1143if (debug_info()->recording_non_safepoints()) {1144set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*>1145(comp_arena(), 8, 0, NULL));1146set_default_node_notes(Node_Notes::make(this));1147}11481149// // -- Initialize types before each compile --1150// // Update cached type information1151// if( _method && _method->constants() )1152// Type::update_loaded_types(_method, _method->constants());11531154// Init alias_type map.1155if (!_do_escape_analysis && aliaslevel == 3)1156aliaslevel = 2; // No unique types without escape analysis1157_AliasLevel = aliaslevel;1158const int grow_ats = 16;1159_max_alias_types = grow_ats;1160_alias_types = NEW_ARENA_ARRAY(comp_arena(), AliasType*, grow_ats);1161AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType, grow_ats);1162Copy::zero_to_bytes(ats, sizeof(AliasType)*grow_ats);1163{1164for (int i = 0; i < grow_ats; i++) _alias_types[i] = &ats[i];1165}1166// Initialize the first few types.1167_alias_types[AliasIdxTop]->Init(AliasIdxTop, NULL);1168_alias_types[AliasIdxBot]->Init(AliasIdxBot, TypePtr::BOTTOM);1169_alias_types[AliasIdxRaw]->Init(AliasIdxRaw, TypeRawPtr::BOTTOM);1170_num_alias_types = AliasIdxRaw+1;1171// Zero out the alias type cache.1172Copy::zero_to_bytes(_alias_cache, sizeof(_alias_cache));1173// A NULL adr_type hits in the cache right away. Preload the right answer.1174probe_alias_cache(NULL)->_index = AliasIdxTop;11751176_intrinsics = NULL;1177_macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);1178_predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);1179_expensive_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);1180_range_check_casts = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8, 0, NULL);1181_shenandoah_barriers = new(comp_arena()) GrowableArray<ShenandoahLoadReferenceBarrierNode*>(comp_arena(), 8, 0, NULL);1182register_library_intrinsics();1183#ifdef ASSERT1184_type_verify_symmetry = true;1185#endif1186}11871188//---------------------------init_start----------------------------------------1189// Install the StartNode on this compile object.1190void Compile::init_start(StartNode* s) {1191if (failing())1192return; // already failing1193assert(s == start(), "");1194}11951196StartNode* Compile::start() const {1197assert(!failing(), "");1198for (DUIterator_Fast imax, i = root()->fast_outs(imax); i < imax; i++) {1199Node* start = root()->fast_out(i);1200if( start->is_Start() )1201return start->as_Start();1202}1203fatal("Did not find Start node!");1204return NULL;1205}12061207//-------------------------------immutable_memory-------------------------------------1208// Access immutable memory1209Node* Compile::immutable_memory() {1210if (_immutable_memory != NULL) {1211return _immutable_memory;1212}1213StartNode* s = start();1214for (DUIterator_Fast imax, i = s->fast_outs(imax); true; i++) {1215Node *p = s->fast_out(i);1216if (p != s && p->as_Proj()->_con == TypeFunc::Memory) {1217_immutable_memory = p;1218return _immutable_memory;1219}1220}1221ShouldNotReachHere();1222return NULL;1223}12241225//----------------------set_cached_top_node------------------------------------1226// Install the cached top node, and make sure Node::is_top works correctly.1227void Compile::set_cached_top_node(Node* tn) {1228if (tn != NULL) verify_top(tn);1229Node* old_top = _top;1230_top = tn;1231// Calling Node::setup_is_top allows the nodes the chance to adjust1232// their _out arrays.1233if (_top != NULL) _top->setup_is_top();1234if (old_top != NULL) old_top->setup_is_top();1235assert(_top == NULL || top()->is_top(), "");1236}12371238#ifdef ASSERT1239uint Compile::count_live_nodes_by_graph_walk() {1240Unique_Node_List useful(comp_arena());1241// Get useful node list by walking the graph.1242identify_useful_nodes(useful);1243return useful.size();1244}12451246void Compile::print_missing_nodes() {12471248// Return if CompileLog is NULL and PrintIdealNodeCount is false.1249if ((_log == NULL) && (! PrintIdealNodeCount)) {1250return;1251}12521253// This is an expensive function. It is executed only when the user1254// specifies VerifyIdealNodeCount option or otherwise knows the1255// additional work that needs to be done to identify reachable nodes1256// by walking the flow graph and find the missing ones using1257// _dead_node_list.12581259Unique_Node_List useful(comp_arena());1260// Get useful node list by walking the graph.1261identify_useful_nodes(useful);12621263uint l_nodes = C->live_nodes();1264uint l_nodes_by_walk = useful.size();12651266if (l_nodes != l_nodes_by_walk) {1267if (_log != NULL) {1268_log->begin_head("mismatched_nodes count='%d'", abs((int) (l_nodes - l_nodes_by_walk)));1269_log->stamp();1270_log->end_head();1271}1272VectorSet& useful_member_set = useful.member_set();1273int last_idx = l_nodes_by_walk;1274for (int i = 0; i < last_idx; i++) {1275if (useful_member_set.test(i)) {1276if (_dead_node_list.test(i)) {1277if (_log != NULL) {1278_log->elem("mismatched_node_info node_idx='%d' type='both live and dead'", i);1279}1280if (PrintIdealNodeCount) {1281// Print the log message to tty1282tty->print_cr("mismatched_node idx='%d' both live and dead'", i);1283useful.at(i)->dump();1284}1285}1286}1287else if (! _dead_node_list.test(i)) {1288if (_log != NULL) {1289_log->elem("mismatched_node_info node_idx='%d' type='neither live nor dead'", i);1290}1291if (PrintIdealNodeCount) {1292// Print the log message to tty1293tty->print_cr("mismatched_node idx='%d' type='neither live nor dead'", i);1294}1295}1296}1297if (_log != NULL) {1298_log->tail("mismatched_nodes");1299}1300}1301}1302#endif13031304#ifndef PRODUCT1305void Compile::verify_top(Node* tn) const {1306if (tn != NULL) {1307assert(tn->is_Con(), "top node must be a constant");1308assert(((ConNode*)tn)->type() == Type::TOP, "top node must have correct type");1309assert(tn->in(0) != NULL, "must have live top node");1310}1311}1312#endif131313141315///-------------------Managing Per-Node Debug & Profile Info-------------------13161317void Compile::grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by) {1318guarantee(arr != NULL, "");1319int num_blocks = arr->length();1320if (grow_by < num_blocks) grow_by = num_blocks;1321int num_notes = grow_by * _node_notes_block_size;1322Node_Notes* notes = NEW_ARENA_ARRAY(node_arena(), Node_Notes, num_notes);1323Copy::zero_to_bytes(notes, num_notes * sizeof(Node_Notes));1324while (num_notes > 0) {1325arr->append(notes);1326notes += _node_notes_block_size;1327num_notes -= _node_notes_block_size;1328}1329assert(num_notes == 0, "exact multiple, please");1330}13311332bool Compile::copy_node_notes_to(Node* dest, Node* source) {1333if (source == NULL || dest == NULL) return false;13341335if (dest->is_Con())1336return false; // Do not push debug info onto constants.13371338#ifdef ASSERT1339// Leave a bread crumb trail pointing to the original node:1340if (dest != NULL && dest != source && dest->debug_orig() == NULL) {1341dest->set_debug_orig(source);1342}1343#endif13441345if (node_note_array() == NULL)1346return false; // Not collecting any notes now.13471348// This is a copy onto a pre-existing node, which may already have notes.1349// If both nodes have notes, do not overwrite any pre-existing notes.1350Node_Notes* source_notes = node_notes_at(source->_idx);1351if (source_notes == NULL || source_notes->is_clear()) return false;1352Node_Notes* dest_notes = node_notes_at(dest->_idx);1353if (dest_notes == NULL || dest_notes->is_clear()) {1354return set_node_notes_at(dest->_idx, source_notes);1355}13561357Node_Notes merged_notes = (*source_notes);1358// The order of operations here ensures that dest notes will win...1359merged_notes.update_from(dest_notes);1360return set_node_notes_at(dest->_idx, &merged_notes);1361}136213631364//--------------------------allow_range_check_smearing-------------------------1365// Gating condition for coalescing similar range checks.1366// Sometimes we try 'speculatively' replacing a series of a range checks by a1367// single covering check that is at least as strong as any of them.1368// If the optimization succeeds, the simplified (strengthened) range check1369// will always succeed. If it fails, we will deopt, and then give up1370// on the optimization.1371bool Compile::allow_range_check_smearing() const {1372// If this method has already thrown a range-check,1373// assume it was because we already tried range smearing1374// and it failed.1375uint already_trapped = trap_count(Deoptimization::Reason_range_check);1376return !already_trapped;1377}137813791380//------------------------------flatten_alias_type-----------------------------1381const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const {1382int offset = tj->offset();1383TypePtr::PTR ptr = tj->ptr();13841385// Known instance (scalarizable allocation) alias only with itself.1386bool is_known_inst = tj->isa_oopptr() != NULL &&1387tj->is_oopptr()->is_known_instance();13881389// Process weird unsafe references.1390if (offset == Type::OffsetBot && (tj->isa_instptr() /*|| tj->isa_klassptr()*/)) {1391assert(InlineUnsafeOps, "indeterminate pointers come only from unsafe ops");1392assert(!is_known_inst, "scalarizable allocation should not have unsafe references");1393tj = TypeOopPtr::BOTTOM;1394ptr = tj->ptr();1395offset = tj->offset();1396}13971398// Array pointers need some flattening1399const TypeAryPtr *ta = tj->isa_aryptr();1400if (ta && ta->is_stable()) {1401// Erase stability property for alias analysis.1402tj = ta = ta->cast_to_stable(false);1403}1404if( ta && is_known_inst ) {1405if ( offset != Type::OffsetBot &&1406offset > arrayOopDesc::length_offset_in_bytes() ) {1407offset = Type::OffsetBot; // Flatten constant access into array body only1408tj = ta = TypeAryPtr::make(ptr, ta->ary(), ta->klass(), true, offset, ta->instance_id());1409}1410} else if( ta && _AliasLevel >= 2 ) {1411// For arrays indexed by constant indices, we flatten the alias1412// space to include all of the array body. Only the header, klass1413// and array length can be accessed un-aliased.1414if( offset != Type::OffsetBot ) {1415if( ta->const_oop() ) { // MethodData* or Method*1416offset = Type::OffsetBot; // Flatten constant access into array body1417tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),ta->ary(),ta->klass(),false,offset);1418} else if( offset == arrayOopDesc::length_offset_in_bytes() ) {1419// range is OK as-is.1420tj = ta = TypeAryPtr::RANGE;1421} else if( offset == oopDesc::klass_offset_in_bytes() ) {1422tj = TypeInstPtr::KLASS; // all klass loads look alike1423ta = TypeAryPtr::RANGE; // generic ignored junk1424ptr = TypePtr::BotPTR;1425} else if( offset == oopDesc::mark_offset_in_bytes() ) {1426tj = TypeInstPtr::MARK;1427ta = TypeAryPtr::RANGE; // generic ignored junk1428ptr = TypePtr::BotPTR;1429} else { // Random constant offset into array body1430offset = Type::OffsetBot; // Flatten constant access into array body1431tj = ta = TypeAryPtr::make(ptr,ta->ary(),ta->klass(),false,offset);1432}1433}1434// Arrays of fixed size alias with arrays of unknown size.1435if (ta->size() != TypeInt::POS) {1436const TypeAry *tary = TypeAry::make(ta->elem(), TypeInt::POS);1437tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,ta->klass(),false,offset);1438}1439// Arrays of known objects become arrays of unknown objects.1440if (ta->elem()->isa_narrowoop() && ta->elem() != TypeNarrowOop::BOTTOM) {1441const TypeAry *tary = TypeAry::make(TypeNarrowOop::BOTTOM, ta->size());1442tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);1443}1444if (ta->elem()->isa_oopptr() && ta->elem() != TypeInstPtr::BOTTOM) {1445const TypeAry *tary = TypeAry::make(TypeInstPtr::BOTTOM, ta->size());1446tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);1447}1448// Arrays of bytes and of booleans both use 'bastore' and 'baload' so1449// cannot be distinguished by bytecode alone.1450if (ta->elem() == TypeInt::BOOL) {1451const TypeAry *tary = TypeAry::make(TypeInt::BYTE, ta->size());1452ciKlass* aklass = ciTypeArrayKlass::make(T_BYTE);1453tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,aklass,false,offset);1454}1455// During the 2nd round of IterGVN, NotNull castings are removed.1456// Make sure the Bottom and NotNull variants alias the same.1457// Also, make sure exact and non-exact variants alias the same.1458if (ptr == TypePtr::NotNull || ta->klass_is_exact() || ta->speculative() != NULL) {1459tj = ta = TypeAryPtr::make(TypePtr::BotPTR,ta->ary(),ta->klass(),false,offset);1460}1461}14621463// Oop pointers need some flattening1464const TypeInstPtr *to = tj->isa_instptr();1465if( to && _AliasLevel >= 2 && to != TypeOopPtr::BOTTOM ) {1466ciInstanceKlass *k = to->klass()->as_instance_klass();1467if( ptr == TypePtr::Constant ) {1468if (to->klass() != ciEnv::current()->Class_klass() ||1469offset < k->size_helper() * wordSize) {1470// No constant oop pointers (such as Strings); they alias with1471// unknown strings.1472assert(!is_known_inst, "not scalarizable allocation");1473tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);1474}1475} else if( is_known_inst ) {1476tj = to; // Keep NotNull and klass_is_exact for instance type1477} else if( ptr == TypePtr::NotNull || to->klass_is_exact() ) {1478// During the 2nd round of IterGVN, NotNull castings are removed.1479// Make sure the Bottom and NotNull variants alias the same.1480// Also, make sure exact and non-exact variants alias the same.1481tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);1482}1483if (to->speculative() != NULL) {1484tj = to = TypeInstPtr::make(to->ptr(),to->klass(),to->klass_is_exact(),to->const_oop(),to->offset(), to->instance_id());1485}1486// Canonicalize the holder of this field1487if (offset >= 0 && offset < instanceOopDesc::base_offset_in_bytes()) {1488// First handle header references such as a LoadKlassNode, even if the1489// object's klass is unloaded at compile time (4965979).1490if (!is_known_inst) { // Do it only for non-instance types1491tj = to = TypeInstPtr::make(TypePtr::BotPTR, env()->Object_klass(), false, NULL, offset);1492}1493} else if (offset < 0 || offset >= k->size_helper() * wordSize) {1494// Static fields are in the space above the normal instance1495// fields in the java.lang.Class instance.1496if (to->klass() != ciEnv::current()->Class_klass()) {1497to = NULL;1498tj = TypeOopPtr::BOTTOM;1499offset = tj->offset();1500}1501} else {1502ciInstanceKlass *canonical_holder = k->get_canonical_holder(offset);1503if (!k->equals(canonical_holder) || tj->offset() != offset) {1504if( is_known_inst ) {1505tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, true, NULL, offset, to->instance_id());1506} else {1507tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, false, NULL, offset);1508}1509}1510}1511}15121513// Klass pointers to object array klasses need some flattening1514const TypeKlassPtr *tk = tj->isa_klassptr();1515if( tk ) {1516// If we are referencing a field within a Klass, we need1517// to assume the worst case of an Object. Both exact and1518// inexact types must flatten to the same alias class so1519// use NotNull as the PTR.1520if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) {15211522tj = tk = TypeKlassPtr::make(TypePtr::NotNull,1523TypeKlassPtr::OBJECT->klass(),1524offset);1525}15261527ciKlass* klass = tk->klass();1528if( klass->is_obj_array_klass() ) {1529ciKlass* k = TypeAryPtr::OOPS->klass();1530if( !k || !k->is_loaded() ) // Only fails for some -Xcomp runs1531k = TypeInstPtr::BOTTOM->klass();1532tj = tk = TypeKlassPtr::make( TypePtr::NotNull, k, offset );1533}15341535// Check for precise loads from the primary supertype array and force them1536// to the supertype cache alias index. Check for generic array loads from1537// the primary supertype array and also force them to the supertype cache1538// alias index. Since the same load can reach both, we need to merge1539// these 2 disparate memories into the same alias class. Since the1540// primary supertype array is read-only, there's no chance of confusion1541// where we bypass an array load and an array store.1542int primary_supers_offset = in_bytes(Klass::primary_supers_offset());1543if (offset == Type::OffsetBot ||1544(offset >= primary_supers_offset &&1545offset < (int)(primary_supers_offset + Klass::primary_super_limit() * wordSize)) ||1546offset == (int)in_bytes(Klass::secondary_super_cache_offset())) {1547offset = in_bytes(Klass::secondary_super_cache_offset());1548tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset );1549}1550}15511552// Flatten all Raw pointers together.1553if (tj->base() == Type::RawPtr)1554tj = TypeRawPtr::BOTTOM;15551556if (tj->base() == Type::AnyPtr)1557tj = TypePtr::BOTTOM; // An error, which the caller must check for.15581559// Flatten all to bottom for now1560switch( _AliasLevel ) {1561case 0:1562tj = TypePtr::BOTTOM;1563break;1564case 1: // Flatten to: oop, static, field or array1565switch (tj->base()) {1566//case Type::AryPtr: tj = TypeAryPtr::RANGE; break;1567case Type::RawPtr: tj = TypeRawPtr::BOTTOM; break;1568case Type::AryPtr: // do not distinguish arrays at all1569case Type::InstPtr: tj = TypeInstPtr::BOTTOM; break;1570case Type::KlassPtr: tj = TypeKlassPtr::OBJECT; break;1571case Type::AnyPtr: tj = TypePtr::BOTTOM; break; // caller checks it1572default: ShouldNotReachHere();1573}1574break;1575case 2: // No collapsing at level 2; keep all splits1576case 3: // No collapsing at level 3; keep all splits1577break;1578default:1579Unimplemented();1580}15811582offset = tj->offset();1583assert( offset != Type::OffsetTop, "Offset has fallen from constant" );15841585assert( (offset != Type::OffsetBot && tj->base() != Type::AryPtr) ||1586(offset == Type::OffsetBot && tj->base() == Type::AryPtr) ||1587(offset == Type::OffsetBot && tj == TypeOopPtr::BOTTOM) ||1588(offset == Type::OffsetBot && tj == TypePtr::BOTTOM) ||1589(offset == oopDesc::mark_offset_in_bytes() && tj->base() == Type::AryPtr) ||1590(offset == oopDesc::klass_offset_in_bytes() && tj->base() == Type::AryPtr) ||1591(offset == arrayOopDesc::length_offset_in_bytes() && tj->base() == Type::AryPtr) ,1592"For oops, klasses, raw offset must be constant; for arrays the offset is never known" );1593assert( tj->ptr() != TypePtr::TopPTR &&1594tj->ptr() != TypePtr::AnyNull &&1595tj->ptr() != TypePtr::Null, "No imprecise addresses" );1596// assert( tj->ptr() != TypePtr::Constant ||1597// tj->base() == Type::RawPtr ||1598// tj->base() == Type::KlassPtr, "No constant oop addresses" );15991600return tj;1601}16021603void Compile::AliasType::Init(int i, const TypePtr* at) {1604_index = i;1605_adr_type = at;1606_field = NULL;1607_element = NULL;1608_is_rewritable = true; // default1609const TypeOopPtr *atoop = (at != NULL) ? at->isa_oopptr() : NULL;1610if (atoop != NULL && atoop->is_known_instance()) {1611const TypeOopPtr *gt = atoop->cast_to_instance_id(TypeOopPtr::InstanceBot);1612_general_index = Compile::current()->get_alias_index(gt);1613} else {1614_general_index = 0;1615}1616}16171618BasicType Compile::AliasType::basic_type() const {1619if (element() != NULL) {1620const Type* element = adr_type()->is_aryptr()->elem();1621return element->isa_narrowoop() ? T_OBJECT : element->array_element_basic_type();1622} if (field() != NULL) {1623return field()->layout_type();1624} else {1625return T_ILLEGAL; // unknown1626}1627}16281629//---------------------------------print_on------------------------------------1630#ifndef PRODUCT1631void Compile::AliasType::print_on(outputStream* st) {1632if (index() < 10)1633st->print("@ <%d> ", index());1634else st->print("@ <%d>", index());1635st->print(is_rewritable() ? " " : " RO");1636int offset = adr_type()->offset();1637if (offset == Type::OffsetBot)1638st->print(" +any");1639else st->print(" +%-3d", offset);1640st->print(" in ");1641adr_type()->dump_on(st);1642const TypeOopPtr* tjp = adr_type()->isa_oopptr();1643if (field() != NULL && tjp) {1644if (tjp->klass() != field()->holder() ||1645tjp->offset() != field()->offset_in_bytes()) {1646st->print(" != ");1647field()->print();1648st->print(" ***");1649}1650}1651}16521653void print_alias_types() {1654Compile* C = Compile::current();1655tty->print_cr("--- Alias types, AliasIdxBot .. %d", C->num_alias_types()-1);1656for (int idx = Compile::AliasIdxBot; idx < C->num_alias_types(); idx++) {1657C->alias_type(idx)->print_on(tty);1658tty->cr();1659}1660}1661#endif166216631664//----------------------------probe_alias_cache--------------------------------1665Compile::AliasCacheEntry* Compile::probe_alias_cache(const TypePtr* adr_type) {1666intptr_t key = (intptr_t) adr_type;1667key ^= key >> logAliasCacheSize;1668return &_alias_cache[key & right_n_bits(logAliasCacheSize)];1669}167016711672//-----------------------------grow_alias_types--------------------------------1673void Compile::grow_alias_types() {1674const int old_ats = _max_alias_types; // how many before?1675const int new_ats = old_ats; // how many more?1676const int grow_ats = old_ats+new_ats; // how many now?1677_max_alias_types = grow_ats;1678_alias_types = REALLOC_ARENA_ARRAY(comp_arena(), AliasType*, _alias_types, old_ats, grow_ats);1679AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType, new_ats);1680Copy::zero_to_bytes(ats, sizeof(AliasType)*new_ats);1681for (int i = 0; i < new_ats; i++) _alias_types[old_ats+i] = &ats[i];1682}168316841685//--------------------------------find_alias_type------------------------------1686Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_create, ciField* original_field) {1687if (_AliasLevel == 0)1688return alias_type(AliasIdxBot);16891690AliasCacheEntry* ace = probe_alias_cache(adr_type);1691if (ace->_adr_type == adr_type) {1692return alias_type(ace->_index);1693}16941695// Handle special cases.1696if (adr_type == NULL) return alias_type(AliasIdxTop);1697if (adr_type == TypePtr::BOTTOM) return alias_type(AliasIdxBot);16981699// Do it the slow way.1700const TypePtr* flat = flatten_alias_type(adr_type);17011702#ifdef ASSERT1703{1704ResourceMark rm;1705assert(flat == flatten_alias_type(flat),1706err_msg("not idempotent: adr_type = %s; flat = %s => %s", Type::str(adr_type),1707Type::str(flat), Type::str(flatten_alias_type(flat))));1708assert(flat != TypePtr::BOTTOM,1709err_msg("cannot alias-analyze an untyped ptr: adr_type = %s", Type::str(adr_type)));1710if (flat->isa_oopptr() && !flat->isa_klassptr()) {1711const TypeOopPtr* foop = flat->is_oopptr();1712// Scalarizable allocations have exact klass always.1713bool exact = !foop->klass_is_exact() || foop->is_known_instance();1714const TypePtr* xoop = foop->cast_to_exactness(exact)->is_ptr();1715assert(foop == flatten_alias_type(xoop),1716err_msg("exactness must not affect alias type: foop = %s; xoop = %s",1717Type::str(foop), Type::str(xoop)));1718}1719}1720#endif17211722int idx = AliasIdxTop;1723for (int i = 0; i < num_alias_types(); i++) {1724if (alias_type(i)->adr_type() == flat) {1725idx = i;1726break;1727}1728}17291730if (idx == AliasIdxTop) {1731if (no_create) return NULL;1732// Grow the array if necessary.1733if (_num_alias_types == _max_alias_types) grow_alias_types();1734// Add a new alias type.1735idx = _num_alias_types++;1736_alias_types[idx]->Init(idx, flat);1737if (flat == TypeInstPtr::KLASS) alias_type(idx)->set_rewritable(false);1738if (flat == TypeAryPtr::RANGE) alias_type(idx)->set_rewritable(false);1739if (flat->isa_instptr()) {1740if (flat->offset() == java_lang_Class::klass_offset_in_bytes()1741&& flat->is_instptr()->klass() == env()->Class_klass())1742alias_type(idx)->set_rewritable(false);1743}1744if (flat->isa_aryptr()) {1745#ifdef ASSERT1746const int header_size_min = arrayOopDesc::base_offset_in_bytes(T_BYTE);1747// (T_BYTE has the weakest alignment and size restrictions...)1748assert(flat->offset() < header_size_min, "array body reference must be OffsetBot");1749#endif1750if (flat->offset() == TypePtr::OffsetBot) {1751alias_type(idx)->set_element(flat->is_aryptr()->elem());1752}1753}1754if (flat->isa_klassptr()) {1755if (flat->offset() == in_bytes(Klass::super_check_offset_offset()))1756alias_type(idx)->set_rewritable(false);1757if (flat->offset() == in_bytes(Klass::modifier_flags_offset()))1758alias_type(idx)->set_rewritable(false);1759if (flat->offset() == in_bytes(Klass::access_flags_offset()))1760alias_type(idx)->set_rewritable(false);1761if (flat->offset() == in_bytes(Klass::java_mirror_offset()))1762alias_type(idx)->set_rewritable(false);1763}1764// %%% (We would like to finalize JavaThread::threadObj_offset(),1765// but the base pointer type is not distinctive enough to identify1766// references into JavaThread.)17671768// Check for final fields.1769const TypeInstPtr* tinst = flat->isa_instptr();1770if (tinst && tinst->offset() >= instanceOopDesc::base_offset_in_bytes()) {1771ciField* field;1772if (tinst->const_oop() != NULL &&1773tinst->klass() == ciEnv::current()->Class_klass() &&1774tinst->offset() >= (tinst->klass()->as_instance_klass()->size_helper() * wordSize)) {1775// static field1776ciInstanceKlass* k = tinst->const_oop()->as_instance()->java_lang_Class_klass()->as_instance_klass();1777field = k->get_field_by_offset(tinst->offset(), true);1778} else {1779ciInstanceKlass *k = tinst->klass()->as_instance_klass();1780field = k->get_field_by_offset(tinst->offset(), false);1781}1782assert(field == NULL ||1783original_field == NULL ||1784(field->holder() == original_field->holder() &&1785field->offset() == original_field->offset() &&1786field->is_static() == original_field->is_static()), "wrong field?");1787// Set field() and is_rewritable() attributes.1788if (field != NULL) alias_type(idx)->set_field(field);1789}1790}17911792// Fill the cache for next time.1793ace->_adr_type = adr_type;1794ace->_index = idx;1795assert(alias_type(adr_type) == alias_type(idx), "type must be installed");17961797// Might as well try to fill the cache for the flattened version, too.1798AliasCacheEntry* face = probe_alias_cache(flat);1799if (face->_adr_type == NULL) {1800face->_adr_type = flat;1801face->_index = idx;1802assert(alias_type(flat) == alias_type(idx), "flat type must work too");1803}18041805return alias_type(idx);1806}180718081809Compile::AliasType* Compile::alias_type(ciField* field) {1810const TypeOopPtr* t;1811if (field->is_static())1812t = TypeInstPtr::make(field->holder()->java_mirror());1813else1814t = TypeOopPtr::make_from_klass_raw(field->holder());1815AliasType* atp = alias_type(t->add_offset(field->offset_in_bytes()), field);1816assert((field->is_final() || field->is_stable()) == !atp->is_rewritable(), "must get the rewritable bits correct");1817return atp;1818}181918201821//------------------------------have_alias_type--------------------------------1822bool Compile::have_alias_type(const TypePtr* adr_type) {1823AliasCacheEntry* ace = probe_alias_cache(adr_type);1824if (ace->_adr_type == adr_type) {1825return true;1826}18271828// Handle special cases.1829if (adr_type == NULL) return true;1830if (adr_type == TypePtr::BOTTOM) return true;18311832return find_alias_type(adr_type, true, NULL) != NULL;1833}18341835//-----------------------------must_alias--------------------------------------1836// True if all values of the given address type are in the given alias category.1837bool Compile::must_alias(const TypePtr* adr_type, int alias_idx) {1838if (alias_idx == AliasIdxBot) return true; // the universal category1839if (adr_type == NULL) return true; // NULL serves as TypePtr::TOP1840if (alias_idx == AliasIdxTop) return false; // the empty category1841if (adr_type->base() == Type::AnyPtr) return false; // TypePtr::BOTTOM or its twins18421843// the only remaining possible overlap is identity1844int adr_idx = get_alias_index(adr_type);1845assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");1846assert(adr_idx == alias_idx ||1847(alias_type(alias_idx)->adr_type() != TypeOopPtr::BOTTOM1848&& adr_type != TypeOopPtr::BOTTOM),1849"should not be testing for overlap with an unsafe pointer");1850return adr_idx == alias_idx;1851}18521853//------------------------------can_alias--------------------------------------1854// True if any values of the given address type are in the given alias category.1855bool Compile::can_alias(const TypePtr* adr_type, int alias_idx) {1856if (alias_idx == AliasIdxTop) return false; // the empty category1857if (adr_type == NULL) return false; // NULL serves as TypePtr::TOP1858if (alias_idx == AliasIdxBot) return true; // the universal category1859if (adr_type->base() == Type::AnyPtr) return true; // TypePtr::BOTTOM or its twins18601861// the only remaining possible overlap is identity1862int adr_idx = get_alias_index(adr_type);1863assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");1864return adr_idx == alias_idx;1865}1866186718681869//---------------------------pop_warm_call-------------------------------------1870WarmCallInfo* Compile::pop_warm_call() {1871WarmCallInfo* wci = _warm_calls;1872if (wci != NULL) _warm_calls = wci->remove_from(wci);1873return wci;1874}18751876//----------------------------Inline_Warm--------------------------------------1877int Compile::Inline_Warm() {1878// If there is room, try to inline some more warm call sites.1879// %%% Do a graph index compaction pass when we think we're out of space?1880if (!InlineWarmCalls) return 0;18811882int calls_made_hot = 0;1883int room_to_grow = NodeCountInliningCutoff - unique();1884int amount_to_grow = MIN2(room_to_grow, (int)NodeCountInliningStep);1885int amount_grown = 0;1886WarmCallInfo* call;1887while (amount_to_grow > 0 && (call = pop_warm_call()) != NULL) {1888int est_size = (int)call->size();1889if (est_size > (room_to_grow - amount_grown)) {1890// This one won't fit anyway. Get rid of it.1891call->make_cold();1892continue;1893}1894call->make_hot();1895calls_made_hot++;1896amount_grown += est_size;1897amount_to_grow -= est_size;1898}18991900if (calls_made_hot > 0) set_major_progress();1901return calls_made_hot;1902}190319041905//----------------------------Finish_Warm--------------------------------------1906void Compile::Finish_Warm() {1907if (!InlineWarmCalls) return;1908if (failing()) return;1909if (warm_calls() == NULL) return;19101911// Clean up loose ends, if we are out of space for inlining.1912WarmCallInfo* call;1913while ((call = pop_warm_call()) != NULL) {1914call->make_cold();1915}1916}19171918//---------------------cleanup_loop_predicates-----------------------1919// Remove the opaque nodes that protect the predicates so that all unused1920// checks and uncommon_traps will be eliminated from the ideal graph1921void Compile::cleanup_loop_predicates(PhaseIterGVN &igvn) {1922if (predicate_count()==0) return;1923for (int i = predicate_count(); i > 0; i--) {1924Node * n = predicate_opaque1_node(i-1);1925assert(n->Opcode() == Op_Opaque1, "must be");1926igvn.replace_node(n, n->in(1));1927}1928assert(predicate_count()==0, "should be clean!");1929}19301931void Compile::add_range_check_cast(Node* n) {1932assert(n->isa_CastII()->has_range_check(), "CastII should have range check dependency");1933assert(!_range_check_casts->contains(n), "duplicate entry in range check casts");1934_range_check_casts->append(n);1935}19361937// Remove all range check dependent CastIINodes.1938void Compile::remove_range_check_casts(PhaseIterGVN &igvn) {1939for (int i = range_check_cast_count(); i > 0; i--) {1940Node* cast = range_check_cast_node(i-1);1941assert(cast->isa_CastII()->has_range_check(), "CastII should have range check dependency");1942igvn.replace_node(cast, cast->in(1));1943}1944assert(range_check_cast_count() == 0, "should be empty");1945}19461947// StringOpts and late inlining of string methods1948void Compile::inline_string_calls(bool parse_time) {1949{1950// remove useless nodes to make the usage analysis simpler1951ResourceMark rm;1952PhaseRemoveUseless pru(initial_gvn(), for_igvn());1953}19541955{1956ResourceMark rm;1957print_method(PHASE_BEFORE_STRINGOPTS, 3);1958PhaseStringOpts pso(initial_gvn(), for_igvn());1959print_method(PHASE_AFTER_STRINGOPTS, 3);1960}19611962// now inline anything that we skipped the first time around1963if (!parse_time) {1964_late_inlines_pos = _late_inlines.length();1965}19661967while (_string_late_inlines.length() > 0) {1968CallGenerator* cg = _string_late_inlines.pop();1969cg->do_late_inline();1970if (failing()) return;1971}1972_string_late_inlines.trunc_to(0);1973}19741975// Late inlining of boxing methods1976void Compile::inline_boxing_calls(PhaseIterGVN& igvn) {1977if (_boxing_late_inlines.length() > 0) {1978assert(has_boxed_value(), "inconsistent");19791980PhaseGVN* gvn = initial_gvn();1981set_inlining_incrementally(true);19821983assert( igvn._worklist.size() == 0, "should be done with igvn" );1984for_igvn()->clear();1985gvn->replace_with(&igvn);19861987_late_inlines_pos = _late_inlines.length();19881989while (_boxing_late_inlines.length() > 0) {1990CallGenerator* cg = _boxing_late_inlines.pop();1991cg->do_late_inline();1992if (failing()) return;1993}1994_boxing_late_inlines.trunc_to(0);19951996{1997ResourceMark rm;1998PhaseRemoveUseless pru(gvn, for_igvn());1999}20002001igvn = PhaseIterGVN(gvn);2002igvn.optimize();20032004set_inlining_progress(false);2005set_inlining_incrementally(false);2006}2007}20082009void Compile::inline_incrementally_one(PhaseIterGVN& igvn) {2010assert(IncrementalInline, "incremental inlining should be on");2011PhaseGVN* gvn = initial_gvn();20122013set_inlining_progress(false);2014for_igvn()->clear();2015gvn->replace_with(&igvn);20162017int i = 0;20182019for (; i <_late_inlines.length() && !inlining_progress(); i++) {2020CallGenerator* cg = _late_inlines.at(i);2021_late_inlines_pos = i+1;2022cg->do_late_inline();2023if (failing()) return;2024}2025int j = 0;2026for (; i < _late_inlines.length(); i++, j++) {2027_late_inlines.at_put(j, _late_inlines.at(i));2028}2029_late_inlines.trunc_to(j);20302031{2032ResourceMark rm;2033PhaseRemoveUseless pru(gvn, for_igvn());2034}20352036igvn = PhaseIterGVN(gvn);2037}20382039// Perform incremental inlining until bound on number of live nodes is reached2040void Compile::inline_incrementally(PhaseIterGVN& igvn) {2041PhaseGVN* gvn = initial_gvn();20422043set_inlining_incrementally(true);2044set_inlining_progress(true);2045uint low_live_nodes = 0;20462047while(inlining_progress() && _late_inlines.length() > 0) {20482049if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {2050if (low_live_nodes < (uint)LiveNodeCountInliningCutoff * 8 / 10) {2051// PhaseIdealLoop is expensive so we only try it once we are2052// out of live nodes and we only try it again if the previous2053// helped got the number of nodes down significantly2054PhaseIdealLoop ideal_loop( igvn, false, true );2055if (failing()) return;2056low_live_nodes = live_nodes();2057_major_progress = true;2058}20592060if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {2061break;2062}2063}20642065inline_incrementally_one(igvn);20662067if (failing()) return;20682069igvn.optimize();20702071if (failing()) return;2072}20732074assert( igvn._worklist.size() == 0, "should be done with igvn" );20752076if (_string_late_inlines.length() > 0) {2077assert(has_stringbuilder(), "inconsistent");2078for_igvn()->clear();2079initial_gvn()->replace_with(&igvn);20802081inline_string_calls(false);20822083if (failing()) return;20842085{2086ResourceMark rm;2087PhaseRemoveUseless pru(initial_gvn(), for_igvn());2088}20892090igvn = PhaseIterGVN(gvn);20912092igvn.optimize();2093}20942095set_inlining_incrementally(false);2096}209720982099// Remove edges from "root" to each SafePoint at a backward branch.2100// They were inserted during parsing (see add_safepoint()) to make2101// infinite loops without calls or exceptions visible to root, i.e.,2102// useful.2103void Compile::remove_root_to_sfpts_edges(PhaseIterGVN& igvn) {2104Node *r = root();2105if (r != NULL) {2106for (uint i = r->req(); i < r->len(); ++i) {2107Node *n = r->in(i);2108if (n != NULL && n->is_SafePoint()) {2109r->rm_prec(i);2110if (n->outcnt() == 0) {2111igvn.remove_dead_node(n);2112}2113--i;2114}2115}2116}2117}21182119//------------------------------Optimize---------------------------------------2120// Given a graph, optimize it.2121void Compile::Optimize() {2122TracePhase t1("optimizer", &_t_optimizer, true);21232124#ifndef PRODUCT2125if (env()->break_at_compile()) {2126BREAKPOINT;2127}21282129#endif21302131ResourceMark rm;2132int loop_opts_cnt;21332134NOT_PRODUCT( verify_graph_edges(); )21352136print_method(PHASE_AFTER_PARSING);21372138{2139// Iterative Global Value Numbering, including ideal transforms2140// Initialize IterGVN with types and values from parse-time GVN2141PhaseIterGVN igvn(initial_gvn());2142{2143NOT_PRODUCT( TracePhase t2("iterGVN", &_t_iterGVN, TimeCompiler); )2144igvn.optimize();2145}21462147print_method(PHASE_ITER_GVN1, 2);21482149if (failing()) return;21502151{2152NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )2153inline_incrementally(igvn);2154}21552156print_method(PHASE_INCREMENTAL_INLINE, 2);21572158if (failing()) return;21592160if (eliminate_boxing()) {2161NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )2162// Inline valueOf() methods now.2163inline_boxing_calls(igvn);21642165if (AlwaysIncrementalInline) {2166inline_incrementally(igvn);2167}21682169print_method(PHASE_INCREMENTAL_BOXING_INLINE, 2);21702171if (failing()) return;2172}21732174// Now that all inlining is over, cut edge from root to loop2175// safepoints2176remove_root_to_sfpts_edges(igvn);21772178// Remove the speculative part of types and clean up the graph from2179// the extra CastPP nodes whose only purpose is to carry them. Do2180// that early so that optimizations are not disrupted by the extra2181// CastPP nodes.2182remove_speculative_types(igvn);21832184// No more new expensive nodes will be added to the list from here2185// so keep only the actual candidates for optimizations.2186cleanup_expensive_nodes(igvn);21872188if (!failing() && RenumberLiveNodes && live_nodes() + NodeLimitFudgeFactor < unique()) {2189NOT_PRODUCT(Compile::TracePhase t2("", &_t_renumberLive, TimeCompiler);)2190initial_gvn()->replace_with(&igvn);2191for_igvn()->clear();2192Unique_Node_List new_worklist(C->comp_arena());2193{2194ResourceMark rm;2195PhaseRenumberLive prl = PhaseRenumberLive(initial_gvn(), for_igvn(), &new_worklist);2196}2197set_for_igvn(&new_worklist);2198igvn = PhaseIterGVN(initial_gvn());2199igvn.optimize();2200}22012202// Perform escape analysis2203if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {2204if (has_loops()) {2205// Cleanup graph (remove dead nodes).2206TracePhase t2("idealLoop", &_t_idealLoop, true);2207PhaseIdealLoop ideal_loop( igvn, false, true );2208if (major_progress()) print_method(PHASE_PHASEIDEAL_BEFORE_EA, 2);2209if (failing()) return;2210}2211ConnectionGraph::do_analysis(this, &igvn);22122213if (failing()) return;22142215// Optimize out fields loads from scalar replaceable allocations.2216igvn.optimize();2217print_method(PHASE_ITER_GVN_AFTER_EA, 2);22182219if (failing()) return;22202221if (congraph() != NULL && macro_count() > 0) {2222NOT_PRODUCT( TracePhase t2("macroEliminate", &_t_macroEliminate, TimeCompiler); )2223PhaseMacroExpand mexp(igvn);2224mexp.eliminate_macro_nodes();2225igvn.set_delay_transform(false);22262227igvn.optimize();2228print_method(PHASE_ITER_GVN_AFTER_ELIMINATION, 2);22292230if (failing()) return;2231}2232}22332234// Loop transforms on the ideal graph. Range Check Elimination,2235// peeling, unrolling, etc.22362237// Set loop opts counter2238loop_opts_cnt = num_loop_opts();2239if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {2240{2241TracePhase t2("idealLoop", &_t_idealLoop, true);2242PhaseIdealLoop ideal_loop( igvn, true );2243loop_opts_cnt--;2244if (major_progress()) print_method(PHASE_PHASEIDEALLOOP1, 2);2245if (failing()) return;2246}2247// Loop opts pass if partial peeling occurred in previous pass2248if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {2249TracePhase t3("idealLoop", &_t_idealLoop, true);2250PhaseIdealLoop ideal_loop( igvn, false );2251loop_opts_cnt--;2252if (major_progress()) print_method(PHASE_PHASEIDEALLOOP2, 2);2253if (failing()) return;2254}2255// Loop opts pass for loop-unrolling before CCP2256if(major_progress() && (loop_opts_cnt > 0)) {2257TracePhase t4("idealLoop", &_t_idealLoop, true);2258PhaseIdealLoop ideal_loop( igvn, false );2259loop_opts_cnt--;2260if (major_progress()) print_method(PHASE_PHASEIDEALLOOP3, 2);2261}2262if (!failing()) {2263// Verify that last round of loop opts produced a valid graph2264NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )2265PhaseIdealLoop::verify(igvn);2266}2267}2268if (failing()) return;22692270// Conditional Constant Propagation;2271PhaseCCP ccp( &igvn );2272assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)");2273{2274TracePhase t2("ccp", &_t_ccp, true);2275ccp.do_transform();2276}2277print_method(PHASE_CPP1, 2);22782279assert( true, "Break here to ccp.dump_old2new_map()");22802281// Iterative Global Value Numbering, including ideal transforms2282{2283NOT_PRODUCT( TracePhase t2("iterGVN2", &_t_iterGVN2, TimeCompiler); )2284igvn = ccp;2285igvn.optimize();2286}22872288print_method(PHASE_ITER_GVN2, 2);22892290if (failing()) return;22912292// Loop transforms on the ideal graph. Range Check Elimination,2293// peeling, unrolling, etc.2294if(loop_opts_cnt > 0) {2295debug_only( int cnt = 0; );2296while(major_progress() && (loop_opts_cnt > 0)) {2297TracePhase t2("idealLoop", &_t_idealLoop, true);2298assert( cnt++ < 40, "infinite cycle in loop optimization" );2299PhaseIdealLoop ideal_loop( igvn, true);2300loop_opts_cnt--;2301if (major_progress()) print_method(PHASE_PHASEIDEALLOOP_ITERATIONS, 2);2302if (failing()) return;2303}2304}23052306{2307// Verify that all previous optimizations produced a valid graph2308// at least to this point, even if no loop optimizations were done.2309NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )2310PhaseIdealLoop::verify(igvn);2311}23122313if (range_check_cast_count() > 0) {2314// No more loop optimizations. Remove all range check dependent CastIINodes.2315C->remove_range_check_casts(igvn);2316igvn.optimize();2317}23182319#ifdef ASSERT2320if (UseShenandoahGC && ShenandoahVerifyOptoBarriers) {2321ShenandoahBarrierC2Support::verify(C->root());2322}2323#endif23242325{2326NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); )2327PhaseMacroExpand mex(igvn);2328if (mex.expand_macro_nodes()) {2329assert(failing(), "must bail out w/ explicit message");2330return;2331}2332}23332334#if INCLUDE_ALL_GCS2335if (UseShenandoahGC) {2336ShenandoahBarrierC2Support::expand(this, igvn);2337}2338#endif23392340} // (End scope of igvn; run destructor if necessary for asserts.)23412342dump_inlining();2343// A method with only infinite loops has no edges entering loops from root2344{2345NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); )2346if (final_graph_reshaping()) {2347assert(failing(), "must bail out w/ explicit message");2348return;2349}2350}23512352print_method(PHASE_OPTIMIZE_FINISHED, 2);2353}235423552356//------------------------------Code_Gen---------------------------------------2357// Given a graph, generate code for it2358void Compile::Code_Gen() {2359if (failing()) {2360return;2361}23622363// Perform instruction selection. You might think we could reclaim Matcher2364// memory PDQ, but actually the Matcher is used in generating spill code.2365// Internals of the Matcher (including some VectorSets) must remain live2366// for awhile - thus I cannot reclaim Matcher memory lest a VectorSet usage2367// set a bit in reclaimed memory.23682369// In debug mode can dump m._nodes.dump() for mapping of ideal to machine2370// nodes. Mapping is only valid at the root of each matched subtree.2371NOT_PRODUCT( verify_graph_edges(); )23722373Matcher matcher;2374_matcher = &matcher;2375{2376TracePhase t2("matcher", &_t_matcher, true);2377matcher.match();2378}2379// In debug mode can dump m._nodes.dump() for mapping of ideal to machine2380// nodes. Mapping is only valid at the root of each matched subtree.2381NOT_PRODUCT( verify_graph_edges(); )23822383// If you have too many nodes, or if matching has failed, bail out2384check_node_count(0, "out of nodes matching instructions");2385if (failing()) {2386return;2387}23882389// Build a proper-looking CFG2390PhaseCFG cfg(node_arena(), root(), matcher);2391_cfg = &cfg;2392{2393NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )2394bool success = cfg.do_global_code_motion();2395if (!success) {2396return;2397}23982399print_method(PHASE_GLOBAL_CODE_MOTION, 2);2400NOT_PRODUCT( verify_graph_edges(); )2401debug_only( cfg.verify(); )2402}24032404PhaseChaitin regalloc(unique(), cfg, matcher);2405_regalloc = ®alloc;2406{2407TracePhase t2("regalloc", &_t_registerAllocation, true);2408// Perform register allocation. After Chaitin, use-def chains are2409// no longer accurate (at spill code) and so must be ignored.2410// Node->LRG->reg mappings are still accurate.2411_regalloc->Register_Allocate();24122413// Bail out if the allocator builds too many nodes2414if (failing()) {2415return;2416}2417}24182419// Prior to register allocation we kept empty basic blocks in case the2420// the allocator needed a place to spill. After register allocation we2421// are not adding any new instructions. If any basic block is empty, we2422// can now safely remove it.2423{2424NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )2425cfg.remove_empty_blocks();2426if (do_freq_based_layout()) {2427PhaseBlockLayout layout(cfg);2428} else {2429cfg.set_loop_alignment();2430}2431cfg.fixup_flow();2432}24332434// Apply peephole optimizations2435if( OptoPeephole ) {2436NOT_PRODUCT( TracePhase t2("peephole", &_t_peephole, TimeCompiler); )2437PhasePeephole peep( _regalloc, cfg);2438peep.do_transform();2439}24402441// Do late expand if CPU requires this.2442if (Matcher::require_postalloc_expand) {2443NOT_PRODUCT(TracePhase t2c("postalloc_expand", &_t_postalloc_expand, true));2444cfg.postalloc_expand(_regalloc);2445}24462447// Convert Nodes to instruction bits in a buffer2448{2449// %%%% workspace merge brought two timers together for one job2450TracePhase t2a("output", &_t_output, true);2451NOT_PRODUCT( TraceTime t2b(NULL, &_t_codeGeneration, TimeCompiler, false); )2452Output();2453}24542455print_method(PHASE_FINAL_CODE);24562457// He's dead, Jim.2458_cfg = (PhaseCFG*)((intptr_t)0xdeadbeef);2459_regalloc = (PhaseChaitin*)((intptr_t)0xdeadbeef);2460}246124622463//------------------------------dump_asm---------------------------------------2464// Dump formatted assembly2465#ifndef PRODUCT2466void Compile::dump_asm(int *pcs, uint pc_limit) {2467bool cut_short = false;2468tty->print_cr("#");2469tty->print("# "); _tf->dump(); tty->cr();2470tty->print_cr("#");24712472// For all blocks2473int pc = 0x0; // Program counter2474char starts_bundle = ' ';2475_regalloc->dump_frame();24762477Node *n = NULL;2478for (uint i = 0; i < _cfg->number_of_blocks(); i++) {2479if (VMThread::should_terminate()) {2480cut_short = true;2481break;2482}2483Block* block = _cfg->get_block(i);2484if (block->is_connector() && !Verbose) {2485continue;2486}2487n = block->head();2488if (pcs && n->_idx < pc_limit) {2489tty->print("%3.3x ", pcs[n->_idx]);2490} else {2491tty->print(" ");2492}2493block->dump_head(_cfg);2494if (block->is_connector()) {2495tty->print_cr(" # Empty connector block");2496} else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {2497tty->print_cr(" # Block is sole successor of call");2498}24992500// For all instructions2501Node *delay = NULL;2502for (uint j = 0; j < block->number_of_nodes(); j++) {2503if (VMThread::should_terminate()) {2504cut_short = true;2505break;2506}2507n = block->get_node(j);2508if (valid_bundle_info(n)) {2509Bundle* bundle = node_bundling(n);2510if (bundle->used_in_unconditional_delay()) {2511delay = n;2512continue;2513}2514if (bundle->starts_bundle()) {2515starts_bundle = '+';2516}2517}25182519if (WizardMode) {2520n->dump();2521}25222523if( !n->is_Region() && // Dont print in the Assembly2524!n->is_Phi() && // a few noisely useless nodes2525!n->is_Proj() &&2526!n->is_MachTemp() &&2527!n->is_SafePointScalarObject() &&2528!n->is_Catch() && // Would be nice to print exception table targets2529!n->is_MergeMem() && // Not very interesting2530!n->is_top() && // Debug info table constants2531!(n->is_Con() && !n->is_Mach())// Debug info table constants2532) {2533if (pcs && n->_idx < pc_limit)2534tty->print("%3.3x", pcs[n->_idx]);2535else2536tty->print(" ");2537tty->print(" %c ", starts_bundle);2538starts_bundle = ' ';2539tty->print("\t");2540n->format(_regalloc, tty);2541tty->cr();2542}25432544// If we have an instruction with a delay slot, and have seen a delay,2545// then back up and print it2546if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {2547assert(delay != NULL, "no unconditional delay instruction");2548if (WizardMode) delay->dump();25492550if (node_bundling(delay)->starts_bundle())2551starts_bundle = '+';2552if (pcs && n->_idx < pc_limit)2553tty->print("%3.3x", pcs[n->_idx]);2554else2555tty->print(" ");2556tty->print(" %c ", starts_bundle);2557starts_bundle = ' ';2558tty->print("\t");2559delay->format(_regalloc, tty);2560tty->cr();2561delay = NULL;2562}25632564// Dump the exception table as well2565if( n->is_Catch() && (Verbose || WizardMode) ) {2566// Print the exception table for this offset2567_handler_table.print_subtable_for(pc);2568}2569}25702571if (pcs && n->_idx < pc_limit)2572tty->print_cr("%3.3x", pcs[n->_idx]);2573else2574tty->cr();25752576assert(cut_short || delay == NULL, "no unconditional delay branch");25772578} // End of per-block dump2579tty->cr();25802581if (cut_short) tty->print_cr("*** disassembly is cut short ***");2582}2583#endif25842585//------------------------------Final_Reshape_Counts---------------------------2586// This class defines counters to help identify when a method2587// may/must be executed using hardware with only 24-bit precision.2588struct Final_Reshape_Counts : public StackObj {2589int _call_count; // count non-inlined 'common' calls2590int _float_count; // count float ops requiring 24-bit precision2591int _double_count; // count double ops requiring more precision2592int _java_call_count; // count non-inlined 'java' calls2593int _inner_loop_count; // count loops which need alignment2594VectorSet _visited; // Visitation flags2595Node_List _tests; // Set of IfNodes & PCTableNodes25962597Final_Reshape_Counts() :2598_call_count(0), _float_count(0), _double_count(0),2599_java_call_count(0), _inner_loop_count(0),2600_visited( Thread::current()->resource_area() ) { }26012602void inc_call_count () { _call_count ++; }2603void inc_float_count () { _float_count ++; }2604void inc_double_count() { _double_count++; }2605void inc_java_call_count() { _java_call_count++; }2606void inc_inner_loop_count() { _inner_loop_count++; }26072608int get_call_count () const { return _call_count ; }2609int get_float_count () const { return _float_count ; }2610int get_double_count() const { return _double_count; }2611int get_java_call_count() const { return _java_call_count; }2612int get_inner_loop_count() const { return _inner_loop_count; }2613};26142615#ifdef ASSERT2616static bool oop_offset_is_sane(const TypeInstPtr* tp) {2617ciInstanceKlass *k = tp->klass()->as_instance_klass();2618// Make sure the offset goes inside the instance layout.2619return k->contains_field_offset(tp->offset());2620// Note that OffsetBot and OffsetTop are very negative.2621}2622#endif26232624// Eliminate trivially redundant StoreCMs and accumulate their2625// precedence edges.2626void Compile::eliminate_redundant_card_marks(Node* n) {2627assert(n->Opcode() == Op_StoreCM, "expected StoreCM");2628if (n->in(MemNode::Address)->outcnt() > 1) {2629// There are multiple users of the same address so it might be2630// possible to eliminate some of the StoreCMs2631Node* mem = n->in(MemNode::Memory);2632Node* adr = n->in(MemNode::Address);2633Node* val = n->in(MemNode::ValueIn);2634Node* prev = n;2635bool done = false;2636// Walk the chain of StoreCMs eliminating ones that match. As2637// long as it's a chain of single users then the optimization is2638// safe. Eliminating partially redundant StoreCMs would require2639// cloning copies down the other paths.2640while (mem->Opcode() == Op_StoreCM && mem->outcnt() == 1 && !done) {2641if (adr == mem->in(MemNode::Address) &&2642val == mem->in(MemNode::ValueIn)) {2643// redundant StoreCM2644if (mem->req() > MemNode::OopStore) {2645// Hasn't been processed by this code yet.2646n->add_prec(mem->in(MemNode::OopStore));2647} else {2648// Already converted to precedence edge2649for (uint i = mem->req(); i < mem->len(); i++) {2650// Accumulate any precedence edges2651if (mem->in(i) != NULL) {2652n->add_prec(mem->in(i));2653}2654}2655// Everything above this point has been processed.2656done = true;2657}2658// Eliminate the previous StoreCM2659prev->set_req(MemNode::Memory, mem->in(MemNode::Memory));2660assert(mem->outcnt() == 0, "should be dead");2661mem->disconnect_inputs(NULL, this);2662} else {2663prev = mem;2664}2665mem = prev->in(MemNode::Memory);2666}2667}2668}26692670//------------------------------final_graph_reshaping_impl----------------------2671// Implement items 1-5 from final_graph_reshaping below.2672void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {26732674if ( n->outcnt() == 0 ) return; // dead node2675uint nop = n->Opcode();26762677// Check for 2-input instruction with "last use" on right input.2678// Swap to left input. Implements item (2).2679if( n->req() == 3 && // two-input instruction2680n->in(1)->outcnt() > 1 && // left use is NOT a last use2681(!n->in(1)->is_Phi() || n->in(1)->in(2) != n) && // it is not data loop2682n->in(2)->outcnt() == 1 &&// right use IS a last use2683!n->in(2)->is_Con() ) { // right use is not a constant2684// Check for commutative opcode2685switch( nop ) {2686case Op_AddI: case Op_AddF: case Op_AddD: case Op_AddL:2687case Op_MaxI: case Op_MinI:2688case Op_MulI: case Op_MulF: case Op_MulD: case Op_MulL:2689case Op_AndL: case Op_XorL: case Op_OrL:2690case Op_AndI: case Op_XorI: case Op_OrI: {2691// Move "last use" input to left by swapping inputs2692n->swap_edges(1, 2);2693break;2694}2695default:2696break;2697}2698}26992700#ifdef ASSERT2701if( n->is_Mem() ) {2702int alias_idx = get_alias_index(n->as_Mem()->adr_type());2703assert( n->in(0) != NULL || alias_idx != Compile::AliasIdxRaw ||2704// oop will be recorded in oop map if load crosses safepoint2705n->is_Load() && (n->as_Load()->bottom_type()->isa_oopptr() ||2706LoadNode::is_immutable_value(n->in(MemNode::Address))),2707"raw memory operations should have control edge");2708}2709if (n->is_MemBar()) {2710MemBarNode* mb = n->as_MemBar();2711if (mb->trailing_store() || mb->trailing_load_store()) {2712assert(mb->leading_membar()->trailing_membar() == mb, "bad membar pair");2713Node* mem = mb->in(MemBarNode::Precedent);2714assert((mb->trailing_store() && mem->is_Store() && mem->as_Store()->is_release()) ||2715(mb->trailing_load_store() && mem->is_LoadStore()), "missing mem op");2716} else if (mb->leading()) {2717assert(mb->trailing_membar()->leading_membar() == mb, "bad membar pair");2718}2719}2720#endif2721// Count FPU ops and common calls, implements item (3)2722switch( nop ) {2723// Count all float operations that may use FPU2724case Op_AddF:2725case Op_SubF:2726case Op_MulF:2727case Op_DivF:2728case Op_NegF:2729case Op_ModF:2730case Op_ConvI2F:2731case Op_ConF:2732case Op_CmpF:2733case Op_CmpF3:2734// case Op_ConvL2F: // longs are split into 32-bit halves2735frc.inc_float_count();2736break;27372738case Op_ConvF2D:2739case Op_ConvD2F:2740frc.inc_float_count();2741frc.inc_double_count();2742break;27432744// Count all double operations that may use FPU2745case Op_AddD:2746case Op_SubD:2747case Op_MulD:2748case Op_DivD:2749case Op_NegD:2750case Op_ModD:2751case Op_ConvI2D:2752case Op_ConvD2I:2753// case Op_ConvL2D: // handled by leaf call2754// case Op_ConvD2L: // handled by leaf call2755case Op_ConD:2756case Op_CmpD:2757case Op_CmpD3:2758frc.inc_double_count();2759break;2760case Op_Opaque1: // Remove Opaque Nodes before matching2761case Op_Opaque2: // Remove Opaque Nodes before matching2762case Op_Opaque3:2763n->subsume_by(n->in(1), this);2764break;2765case Op_CallStaticJava:2766case Op_CallJava:2767case Op_CallDynamicJava:2768frc.inc_java_call_count(); // Count java call site;2769case Op_CallRuntime:2770case Op_CallLeaf:2771case Op_CallLeafNoFP: {2772assert( n->is_Call(), "" );2773CallNode *call = n->as_Call();2774if (UseShenandoahGC && call->is_g1_wb_pre_call()) {2775uint cnt = OptoRuntime::g1_wb_pre_Type()->domain()->cnt();2776if (call->req() > cnt) {2777assert(call->req() == cnt+1, "only one extra input");2778Node* addp = call->in(cnt);2779assert(!CallLeafNode::has_only_g1_wb_pre_uses(addp), "useless address computation?");2780call->del_req(cnt);2781}2782}2783// Count call sites where the FP mode bit would have to be flipped.2784// Do not count uncommon runtime calls:2785// uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,2786// _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...2787if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {2788frc.inc_call_count(); // Count the call site2789} else { // See if uncommon argument is shared2790Node *n = call->in(TypeFunc::Parms);2791int nop = n->Opcode();2792// Clone shared simple arguments to uncommon calls, item (1).2793if( n->outcnt() > 1 &&2794!n->is_Proj() &&2795nop != Op_CreateEx &&2796nop != Op_CheckCastPP &&2797nop != Op_DecodeN &&2798nop != Op_DecodeNKlass &&2799!n->is_Mem() ) {2800Node *x = n->clone();2801call->set_req( TypeFunc::Parms, x );2802}2803}2804break;2805}28062807case Op_StoreD:2808case Op_LoadD:2809case Op_LoadD_unaligned:2810frc.inc_double_count();2811goto handle_mem;2812case Op_StoreF:2813case Op_LoadF:2814frc.inc_float_count();2815goto handle_mem;28162817case Op_StoreCM:2818{2819// Convert OopStore dependence into precedence edge2820Node* prec = n->in(MemNode::OopStore);2821n->del_req(MemNode::OopStore);2822n->add_prec(prec);2823eliminate_redundant_card_marks(n);2824}28252826// fall through28272828case Op_StoreB:2829case Op_StoreC:2830case Op_StorePConditional:2831case Op_StoreI:2832case Op_StoreL:2833case Op_StoreIConditional:2834case Op_StoreLConditional:2835case Op_CompareAndSwapI:2836case Op_CompareAndSwapL:2837case Op_CompareAndSwapP:2838case Op_CompareAndSwapN:2839case Op_GetAndAddI:2840case Op_GetAndAddL:2841case Op_GetAndSetI:2842case Op_GetAndSetL:2843case Op_GetAndSetP:2844case Op_GetAndSetN:2845case Op_StoreP:2846case Op_StoreN:2847case Op_StoreNKlass:2848case Op_LoadB:2849case Op_LoadUB:2850case Op_LoadUS:2851case Op_LoadI:2852case Op_LoadKlass:2853case Op_LoadNKlass:2854case Op_LoadL:2855case Op_LoadL_unaligned:2856case Op_LoadPLocked:2857case Op_LoadP:2858case Op_LoadN:2859case Op_LoadRange:2860case Op_LoadS: {2861handle_mem:2862#ifdef ASSERT2863if( VerifyOptoOopOffsets ) {2864assert( n->is_Mem(), "" );2865MemNode *mem = (MemNode*)n;2866// Check to see if address types have grounded out somehow.2867const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr();2868assert( !tp || oop_offset_is_sane(tp), "" );2869}2870#endif2871break;2872}28732874case Op_AddP: { // Assert sane base pointers2875Node *addp = n->in(AddPNode::Address);2876assert( !addp->is_AddP() ||2877addp->in(AddPNode::Base)->is_top() || // Top OK for allocation2878addp->in(AddPNode::Base) == n->in(AddPNode::Base),2879"Base pointers must match" );2880#ifdef _LP642881if ((UseCompressedOops || UseCompressedClassPointers) &&2882addp->Opcode() == Op_ConP &&2883addp == n->in(AddPNode::Base) &&2884n->in(AddPNode::Offset)->is_Con()) {2885// Use addressing with narrow klass to load with offset on x86.2886// On sparc loading 32-bits constant and decoding it have less2887// instructions (4) then load 64-bits constant (7).2888// Do this transformation here since IGVN will convert ConN back to ConP.2889const Type* t = addp->bottom_type();2890if (t->isa_oopptr() || t->isa_klassptr()) {2891Node* nn = NULL;28922893int op = t->isa_oopptr() ? Op_ConN : Op_ConNKlass;28942895// Look for existing ConN node of the same exact type.2896Node* r = root();2897uint cnt = r->outcnt();2898for (uint i = 0; i < cnt; i++) {2899Node* m = r->raw_out(i);2900if (m!= NULL && m->Opcode() == op &&2901m->bottom_type()->make_ptr() == t) {2902nn = m;2903break;2904}2905}2906if (nn != NULL) {2907// Decode a narrow oop to match address2908// [R12 + narrow_oop_reg<<3 + offset]2909if (t->isa_oopptr()) {2910nn = new (this) DecodeNNode(nn, t);2911} else {2912nn = new (this) DecodeNKlassNode(nn, t);2913}2914n->set_req(AddPNode::Base, nn);2915n->set_req(AddPNode::Address, nn);2916if (addp->outcnt() == 0) {2917addp->disconnect_inputs(NULL, this);2918}2919}2920}2921}2922#endif2923break;2924}29252926case Op_CastPP: {2927// Remove CastPP nodes to gain more freedom during scheduling but2928// keep the dependency they encode as control or precedence edges2929// (if control is set already) on memory operations. Some CastPP2930// nodes don't have a control (don't carry a dependency): skip2931// those.2932if (n->in(0) != NULL) {2933ResourceMark rm;2934Unique_Node_List wq;2935wq.push(n);2936for (uint next = 0; next < wq.size(); ++next) {2937Node *m = wq.at(next);2938for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {2939Node* use = m->fast_out(i);2940if (use->is_Mem() || use->is_EncodeNarrowPtr() || use->Opcode() == Op_ShenandoahLoadReferenceBarrier) {2941use->ensure_control_or_add_prec(n->in(0));2942} else if (use->in(0) == NULL) {2943switch(use->Opcode()) {2944case Op_AddP:2945case Op_DecodeN:2946case Op_DecodeNKlass:2947case Op_CheckCastPP:2948case Op_CastPP:2949wq.push(use);2950break;2951}2952}2953}2954}2955}2956const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);2957if (is_LP64 && n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) {2958Node* in1 = n->in(1);2959const Type* t = n->bottom_type();2960Node* new_in1 = in1->clone();2961new_in1->as_DecodeN()->set_type(t);29622963if (!Matcher::narrow_oop_use_complex_address()) {2964//2965// x86, ARM and friends can handle 2 adds in addressing mode2966// and Matcher can fold a DecodeN node into address by using2967// a narrow oop directly and do implicit NULL check in address:2968//2969// [R12 + narrow_oop_reg<<3 + offset]2970// NullCheck narrow_oop_reg2971//2972// On other platforms (Sparc) we have to keep new DecodeN node and2973// use it to do implicit NULL check in address:2974//2975// decode_not_null narrow_oop_reg, base_reg2976// [base_reg + offset]2977// NullCheck base_reg2978//2979// Pin the new DecodeN node to non-null path on these platform (Sparc)2980// to keep the information to which NULL check the new DecodeN node2981// corresponds to use it as value in implicit_null_check().2982//2983new_in1->set_req(0, n->in(0));2984}29852986n->subsume_by(new_in1, this);2987if (in1->outcnt() == 0) {2988in1->disconnect_inputs(NULL, this);2989}2990} else {2991n->subsume_by(n->in(1), this);2992if (n->outcnt() == 0) {2993n->disconnect_inputs(NULL, this);2994}2995}2996break;2997}2998#ifdef _LP642999case Op_CmpP:3000// Do this transformation here to preserve CmpPNode::sub() and3001// other TypePtr related Ideal optimizations (for example, ptr nullness).3002if (n->in(1)->is_DecodeNarrowPtr() || n->in(2)->is_DecodeNarrowPtr()) {3003Node* in1 = n->in(1);3004Node* in2 = n->in(2);3005if (!in1->is_DecodeNarrowPtr()) {3006in2 = in1;3007in1 = n->in(2);3008}3009assert(in1->is_DecodeNarrowPtr(), "sanity");30103011Node* new_in2 = NULL;3012if (in2->is_DecodeNarrowPtr()) {3013assert(in2->Opcode() == in1->Opcode(), "must be same node type");3014new_in2 = in2->in(1);3015} else if (in2->Opcode() == Op_ConP) {3016const Type* t = in2->bottom_type();3017if (t == TypePtr::NULL_PTR) {3018assert(in1->is_DecodeN(), "compare klass to null?");3019// Don't convert CmpP null check into CmpN if compressed3020// oops implicit null check is not generated.3021// This will allow to generate normal oop implicit null check.3022if (Matcher::gen_narrow_oop_implicit_null_checks())3023new_in2 = ConNode::make(this, TypeNarrowOop::NULL_PTR);3024//3025// This transformation together with CastPP transformation above3026// will generated code for implicit NULL checks for compressed oops.3027//3028// The original code after Optimize()3029//3030// LoadN memory, narrow_oop_reg3031// decode narrow_oop_reg, base_reg3032// CmpP base_reg, NULL3033// CastPP base_reg // NotNull3034// Load [base_reg + offset], val_reg3035//3036// after these transformations will be3037//3038// LoadN memory, narrow_oop_reg3039// CmpN narrow_oop_reg, NULL3040// decode_not_null narrow_oop_reg, base_reg3041// Load [base_reg + offset], val_reg3042//3043// and the uncommon path (== NULL) will use narrow_oop_reg directly3044// since narrow oops can be used in debug info now (see the code in3045// final_graph_reshaping_walk()).3046//3047// At the end the code will be matched to3048// on x86:3049//3050// Load_narrow_oop memory, narrow_oop_reg3051// Load [R12 + narrow_oop_reg<<3 + offset], val_reg3052// NullCheck narrow_oop_reg3053//3054// and on sparc:3055//3056// Load_narrow_oop memory, narrow_oop_reg3057// decode_not_null narrow_oop_reg, base_reg3058// Load [base_reg + offset], val_reg3059// NullCheck base_reg3060//3061} else if (t->isa_oopptr()) {3062new_in2 = ConNode::make(this, t->make_narrowoop());3063} else if (t->isa_klassptr()) {3064new_in2 = ConNode::make(this, t->make_narrowklass());3065}3066}3067if (new_in2 != NULL) {3068Node* cmpN = new (this) CmpNNode(in1->in(1), new_in2);3069n->subsume_by(cmpN, this);3070if (in1->outcnt() == 0) {3071in1->disconnect_inputs(NULL, this);3072}3073if (in2->outcnt() == 0) {3074in2->disconnect_inputs(NULL, this);3075}3076}3077}3078break;30793080case Op_DecodeN:3081case Op_DecodeNKlass:3082assert(!n->in(1)->is_EncodeNarrowPtr(), "should be optimized out");3083// DecodeN could be pinned when it can't be fold into3084// an address expression, see the code for Op_CastPP above.3085assert(n->in(0) == NULL || (UseCompressedOops && !Matcher::narrow_oop_use_complex_address()), "no control");3086break;30873088case Op_EncodeP:3089case Op_EncodePKlass: {3090Node* in1 = n->in(1);3091if (in1->is_DecodeNarrowPtr()) {3092n->subsume_by(in1->in(1), this);3093} else if (in1->Opcode() == Op_ConP) {3094const Type* t = in1->bottom_type();3095if (t == TypePtr::NULL_PTR) {3096assert(t->isa_oopptr(), "null klass?");3097n->subsume_by(ConNode::make(this, TypeNarrowOop::NULL_PTR), this);3098} else if (t->isa_oopptr()) {3099n->subsume_by(ConNode::make(this, t->make_narrowoop()), this);3100} else if (t->isa_klassptr()) {3101n->subsume_by(ConNode::make(this, t->make_narrowklass()), this);3102}3103}3104if (in1->outcnt() == 0) {3105in1->disconnect_inputs(NULL, this);3106}3107break;3108}31093110case Op_Proj: {3111if (OptimizeStringConcat) {3112ProjNode* p = n->as_Proj();3113if (p->_is_io_use) {3114// Separate projections were used for the exception path which3115// are normally removed by a late inline. If it wasn't inlined3116// then they will hang around and should just be replaced with3117// the original one.3118Node* proj = NULL;3119// Replace with just one3120for (SimpleDUIterator i(p->in(0)); i.has_next(); i.next()) {3121Node *use = i.get();3122if (use->is_Proj() && p != use && use->as_Proj()->_con == p->_con) {3123proj = use;3124break;3125}3126}3127assert(proj != NULL || p->_con == TypeFunc::I_O, "io may be dropped at an infinite loop");3128if (proj != NULL) {3129p->subsume_by(proj, this);3130}3131}3132}3133break;3134}31353136case Op_Phi:3137if (n->as_Phi()->bottom_type()->isa_narrowoop() || n->as_Phi()->bottom_type()->isa_narrowklass()) {3138// The EncodeP optimization may create Phi with the same edges3139// for all paths. It is not handled well by Register Allocator.3140Node* unique_in = n->in(1);3141assert(unique_in != NULL, "");3142uint cnt = n->req();3143for (uint i = 2; i < cnt; i++) {3144Node* m = n->in(i);3145assert(m != NULL, "");3146if (unique_in != m)3147unique_in = NULL;3148}3149if (unique_in != NULL) {3150n->subsume_by(unique_in, this);3151}3152}3153break;31543155#endif31563157#ifdef ASSERT3158case Op_CastII:3159// Verify that all range check dependent CastII nodes were removed.3160if (n->isa_CastII()->has_range_check()) {3161n->dump(3);3162assert(false, "Range check dependent CastII node was not removed");3163}3164break;3165#endif31663167case Op_ModI:3168if (UseDivMod) {3169// Check if a%b and a/b both exist3170Node* d = n->find_similar(Op_DivI);3171if (d) {3172// Replace them with a fused divmod if supported3173if (Matcher::has_match_rule(Op_DivModI)) {3174DivModINode* divmod = DivModINode::make(this, n);3175d->subsume_by(divmod->div_proj(), this);3176n->subsume_by(divmod->mod_proj(), this);3177} else {3178// replace a%b with a-((a/b)*b)3179Node* mult = new (this) MulINode(d, d->in(2));3180Node* sub = new (this) SubINode(d->in(1), mult);3181n->subsume_by(sub, this);3182}3183}3184}3185break;31863187case Op_ModL:3188if (UseDivMod) {3189// Check if a%b and a/b both exist3190Node* d = n->find_similar(Op_DivL);3191if (d) {3192// Replace them with a fused divmod if supported3193if (Matcher::has_match_rule(Op_DivModL)) {3194DivModLNode* divmod = DivModLNode::make(this, n);3195d->subsume_by(divmod->div_proj(), this);3196n->subsume_by(divmod->mod_proj(), this);3197} else {3198// replace a%b with a-((a/b)*b)3199Node* mult = new (this) MulLNode(d, d->in(2));3200Node* sub = new (this) SubLNode(d->in(1), mult);3201n->subsume_by(sub, this);3202}3203}3204}3205break;32063207case Op_LoadVector:3208case Op_StoreVector:3209break;32103211case Op_PackB:3212case Op_PackS:3213case Op_PackI:3214case Op_PackF:3215case Op_PackL:3216case Op_PackD:3217if (n->req()-1 > 2) {3218// Replace many operand PackNodes with a binary tree for matching3219PackNode* p = (PackNode*) n;3220Node* btp = p->binary_tree_pack(this, 1, n->req());3221n->subsume_by(btp, this);3222}3223break;3224case Op_Loop:3225case Op_CountedLoop:3226if (n->as_Loop()->is_inner_loop()) {3227frc.inc_inner_loop_count();3228}3229break;3230case Op_LShiftI:3231case Op_RShiftI:3232case Op_URShiftI:3233case Op_LShiftL:3234case Op_RShiftL:3235case Op_URShiftL:3236if (Matcher::need_masked_shift_count) {3237// The cpu's shift instructions don't restrict the count to the3238// lower 5/6 bits. We need to do the masking ourselves.3239Node* in2 = n->in(2);3240juint mask = (n->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);3241const TypeInt* t = in2->find_int_type();3242if (t != NULL && t->is_con()) {3243juint shift = t->get_con();3244if (shift > mask) { // Unsigned cmp3245n->set_req(2, ConNode::make(this, TypeInt::make(shift & mask)));3246}3247} else {3248if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {3249Node* shift = new (this) AndINode(in2, ConNode::make(this, TypeInt::make(mask)));3250n->set_req(2, shift);3251}3252}3253if (in2->outcnt() == 0) { // Remove dead node3254in2->disconnect_inputs(NULL, this);3255}3256}3257break;3258case Op_MemBarStoreStore:3259case Op_MemBarRelease:3260// Break the link with AllocateNode: it is no longer useful and3261// confuses register allocation.3262if (n->req() > MemBarNode::Precedent) {3263n->set_req(MemBarNode::Precedent, top());3264}3265break;3266case Op_ShenandoahLoadReferenceBarrier:3267assert(false, "should have been expanded already");3268break;3269default:3270assert( !n->is_Call(), "" );3271assert( !n->is_Mem(), "" );3272assert( nop != Op_ProfileBoolean, "should be eliminated during IGVN");3273break;3274}32753276// Collect CFG split points3277if (n->is_MultiBranch())3278frc._tests.push(n);3279}32803281//------------------------------final_graph_reshaping_walk---------------------3282// Replacing Opaque nodes with their input in final_graph_reshaping_impl(),3283// requires that the walk visits a node's inputs before visiting the node.3284void Compile::final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) {3285ResourceArea *area = Thread::current()->resource_area();3286Unique_Node_List sfpt(area);32873288frc._visited.set(root->_idx); // first, mark node as visited3289uint cnt = root->req();3290Node *n = root;3291uint i = 0;3292while (true) {3293if (i < cnt) {3294// Place all non-visited non-null inputs onto stack3295Node* m = n->in(i);3296++i;3297if (m != NULL && !frc._visited.test_set(m->_idx)) {3298if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) {3299// compute worst case interpreter size in case of a deoptimization3300update_interpreter_frame_size(m->as_SafePoint()->jvms()->interpreter_frame_size());33013302sfpt.push(m);3303}3304cnt = m->req();3305nstack.push(n, i); // put on stack parent and next input's index3306n = m;3307i = 0;3308}3309} else {3310// Now do post-visit work3311final_graph_reshaping_impl( n, frc );3312if (nstack.is_empty())3313break; // finished3314n = nstack.node(); // Get node from stack3315cnt = n->req();3316i = nstack.index();3317nstack.pop(); // Shift to the next node on stack3318}3319}33203321// Skip next transformation if compressed oops are not used.3322if ((UseCompressedOops && !Matcher::gen_narrow_oop_implicit_null_checks()) ||3323(!UseCompressedOops && !UseCompressedClassPointers))3324return;33253326// Go over safepoints nodes to skip DecodeN/DecodeNKlass nodes for debug edges.3327// It could be done for an uncommon traps or any safepoints/calls3328// if the DecodeN/DecodeNKlass node is referenced only in a debug info.3329while (sfpt.size() > 0) {3330n = sfpt.pop();3331JVMState *jvms = n->as_SafePoint()->jvms();3332assert(jvms != NULL, "sanity");3333int start = jvms->debug_start();3334int end = n->req();3335bool is_uncommon = (n->is_CallStaticJava() &&3336n->as_CallStaticJava()->uncommon_trap_request() != 0);3337for (int j = start; j < end; j++) {3338Node* in = n->in(j);3339if (in->is_DecodeNarrowPtr()) {3340bool safe_to_skip = true;3341if (!is_uncommon ) {3342// Is it safe to skip?3343for (uint i = 0; i < in->outcnt(); i++) {3344Node* u = in->raw_out(i);3345if (!u->is_SafePoint() ||3346u->is_Call() && u->as_Call()->has_non_debug_use(n)) {3347safe_to_skip = false;3348}3349}3350}3351if (safe_to_skip) {3352n->set_req(j, in->in(1));3353}3354if (in->outcnt() == 0) {3355in->disconnect_inputs(NULL, this);3356}3357}3358}3359}3360}33613362//------------------------------final_graph_reshaping--------------------------3363// Final Graph Reshaping.3364//3365// (1) Clone simple inputs to uncommon calls, so they can be scheduled late3366// and not commoned up and forced early. Must come after regular3367// optimizations to avoid GVN undoing the cloning. Clone constant3368// inputs to Loop Phis; these will be split by the allocator anyways.3369// Remove Opaque nodes.3370// (2) Move last-uses by commutative operations to the left input to encourage3371// Intel update-in-place two-address operations and better register usage3372// on RISCs. Must come after regular optimizations to avoid GVN Ideal3373// calls canonicalizing them back.3374// (3) Count the number of double-precision FP ops, single-precision FP ops3375// and call sites. On Intel, we can get correct rounding either by3376// forcing singles to memory (requires extra stores and loads after each3377// FP bytecode) or we can set a rounding mode bit (requires setting and3378// clearing the mode bit around call sites). The mode bit is only used3379// if the relative frequency of single FP ops to calls is low enough.3380// This is a key transform for SPEC mpeg_audio.3381// (4) Detect infinite loops; blobs of code reachable from above but not3382// below. Several of the Code_Gen algorithms fail on such code shapes,3383// so we simply bail out. Happens a lot in ZKM.jar, but also happens3384// from time to time in other codes (such as -Xcomp finalizer loops, etc).3385// Detection is by looking for IfNodes where only 1 projection is3386// reachable from below or CatchNodes missing some targets.3387// (5) Assert for insane oop offsets in debug mode.33883389bool Compile::final_graph_reshaping() {3390// an infinite loop may have been eliminated by the optimizer,3391// in which case the graph will be empty.3392if (root()->req() == 1) {3393record_method_not_compilable("trivial infinite loop");3394return true;3395}33963397// Expensive nodes have their control input set to prevent the GVN3398// from freely commoning them. There's no GVN beyond this point so3399// no need to keep the control input. We want the expensive nodes to3400// be freely moved to the least frequent code path by gcm.3401assert(OptimizeExpensiveOps || expensive_count() == 0, "optimization off but list non empty?");3402for (int i = 0; i < expensive_count(); i++) {3403_expensive_nodes->at(i)->set_req(0, NULL);3404}34053406Final_Reshape_Counts frc;34073408// Visit everybody reachable!3409// Allocate stack of size C->live_nodes()/2 to avoid frequent realloc3410Node_Stack nstack(live_nodes() >> 1);3411final_graph_reshaping_walk(nstack, root(), frc);34123413// Check for unreachable (from below) code (i.e., infinite loops).3414for( uint i = 0; i < frc._tests.size(); i++ ) {3415MultiBranchNode *n = frc._tests[i]->as_MultiBranch();3416// Get number of CFG targets.3417// Note that PCTables include exception targets after calls.3418uint required_outcnt = n->required_outcnt();3419if (n->outcnt() != required_outcnt) {3420// Check for a few special cases. Rethrow Nodes never take the3421// 'fall-thru' path, so expected kids is 1 less.3422if (n->is_PCTable() && n->in(0) && n->in(0)->in(0)) {3423if (n->in(0)->in(0)->is_Call()) {3424CallNode *call = n->in(0)->in(0)->as_Call();3425if (call->entry_point() == OptoRuntime::rethrow_stub()) {3426required_outcnt--; // Rethrow always has 1 less kid3427} else if (call->req() > TypeFunc::Parms &&3428call->is_CallDynamicJava()) {3429// Check for null receiver. In such case, the optimizer has3430// detected that the virtual call will always result in a null3431// pointer exception. The fall-through projection of this CatchNode3432// will not be populated.3433Node *arg0 = call->in(TypeFunc::Parms);3434if (arg0->is_Type() &&3435arg0->as_Type()->type()->higher_equal(TypePtr::NULL_PTR)) {3436required_outcnt--;3437}3438} else if (call->entry_point() == OptoRuntime::new_array_Java() &&3439call->req() > TypeFunc::Parms+1 &&3440call->is_CallStaticJava()) {3441// Check for negative array length. In such case, the optimizer has3442// detected that the allocation attempt will always result in an3443// exception. There is no fall-through projection of this CatchNode .3444Node *arg1 = call->in(TypeFunc::Parms+1);3445if (arg1->is_Type() &&3446arg1->as_Type()->type()->join(TypeInt::POS)->empty()) {3447required_outcnt--;3448}3449}3450}3451}3452// Recheck with a better notion of 'required_outcnt'3453if (n->outcnt() != required_outcnt) {3454record_method_not_compilable("malformed control flow");3455return true; // Not all targets reachable!3456}3457}3458// Check that I actually visited all kids. Unreached kids3459// must be infinite loops.3460for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++)3461if (!frc._visited.test(n->fast_out(j)->_idx)) {3462record_method_not_compilable("infinite loop");3463return true; // Found unvisited kid; must be unreach3464}3465}34663467// If original bytecodes contained a mixture of floats and doubles3468// check if the optimizer has made it homogenous, item (3).3469if( Use24BitFPMode && Use24BitFP && UseSSE == 0 &&3470frc.get_float_count() > 32 &&3471frc.get_double_count() == 0 &&3472(10 * frc.get_call_count() < frc.get_float_count()) ) {3473set_24_bit_selection_and_mode( false, true );3474}34753476set_java_calls(frc.get_java_call_count());3477set_inner_loops(frc.get_inner_loop_count());34783479// No infinite loops, no reason to bail out.3480return false;3481}34823483//-----------------------------too_many_traps----------------------------------3484// Report if there are too many traps at the current method and bci.3485// Return true if there was a trap, and/or PerMethodTrapLimit is exceeded.3486bool Compile::too_many_traps(ciMethod* method,3487int bci,3488Deoptimization::DeoptReason reason) {3489ciMethodData* md = method->method_data();3490if (md->is_empty()) {3491// Assume the trap has not occurred, or that it occurred only3492// because of a transient condition during start-up in the interpreter.3493return false;3494}3495ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;3496if (md->has_trap_at(bci, m, reason) != 0) {3497// Assume PerBytecodeTrapLimit==0, for a more conservative heuristic.3498// Also, if there are multiple reasons, or if there is no per-BCI record,3499// assume the worst.3500if (log())3501log()->elem("observe trap='%s' count='%d'",3502Deoptimization::trap_reason_name(reason),3503md->trap_count(reason));3504return true;3505} else {3506// Ignore method/bci and see if there have been too many globally.3507return too_many_traps(reason, md);3508}3509}35103511// Less-accurate variant which does not require a method and bci.3512bool Compile::too_many_traps(Deoptimization::DeoptReason reason,3513ciMethodData* logmd) {3514if (trap_count(reason) >= Deoptimization::per_method_trap_limit(reason)) {3515// Too many traps globally.3516// Note that we use cumulative trap_count, not just md->trap_count.3517if (log()) {3518int mcount = (logmd == NULL)? -1: (int)logmd->trap_count(reason);3519log()->elem("observe trap='%s' count='0' mcount='%d' ccount='%d'",3520Deoptimization::trap_reason_name(reason),3521mcount, trap_count(reason));3522}3523return true;3524} else {3525// The coast is clear.3526return false;3527}3528}35293530//--------------------------too_many_recompiles--------------------------------3531// Report if there are too many recompiles at the current method and bci.3532// Consults PerBytecodeRecompilationCutoff and PerMethodRecompilationCutoff.3533// Is not eager to return true, since this will cause the compiler to use3534// Action_none for a trap point, to avoid too many recompilations.3535bool Compile::too_many_recompiles(ciMethod* method,3536int bci,3537Deoptimization::DeoptReason reason) {3538ciMethodData* md = method->method_data();3539if (md->is_empty()) {3540// Assume the trap has not occurred, or that it occurred only3541// because of a transient condition during start-up in the interpreter.3542return false;3543}3544// Pick a cutoff point well within PerBytecodeRecompilationCutoff.3545uint bc_cutoff = (uint) PerBytecodeRecompilationCutoff / 8;3546uint m_cutoff = (uint) PerMethodRecompilationCutoff / 2 + 1; // not zero3547Deoptimization::DeoptReason per_bc_reason3548= Deoptimization::reason_recorded_per_bytecode_if_any(reason);3549ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;3550if ((per_bc_reason == Deoptimization::Reason_none3551|| md->has_trap_at(bci, m, reason) != 0)3552// The trap frequency measure we care about is the recompile count:3553&& md->trap_recompiled_at(bci, m)3554&& md->overflow_recompile_count() >= bc_cutoff) {3555// Do not emit a trap here if it has already caused recompilations.3556// Also, if there are multiple reasons, or if there is no per-BCI record,3557// assume the worst.3558if (log())3559log()->elem("observe trap='%s recompiled' count='%d' recompiles2='%d'",3560Deoptimization::trap_reason_name(reason),3561md->trap_count(reason),3562md->overflow_recompile_count());3563return true;3564} else if (trap_count(reason) != 03565&& decompile_count() >= m_cutoff) {3566// Too many recompiles globally, and we have seen this sort of trap.3567// Use cumulative decompile_count, not just md->decompile_count.3568if (log())3569log()->elem("observe trap='%s' count='%d' mcount='%d' decompiles='%d' mdecompiles='%d'",3570Deoptimization::trap_reason_name(reason),3571md->trap_count(reason), trap_count(reason),3572md->decompile_count(), decompile_count());3573return true;3574} else {3575// The coast is clear.3576return false;3577}3578}35793580// Compute when not to trap. Used by matching trap based nodes and3581// NullCheck optimization.3582void Compile::set_allowed_deopt_reasons() {3583_allowed_reasons = 0;3584if (is_method_compilation()) {3585for (int rs = (int)Deoptimization::Reason_none+1; rs < Compile::trapHistLength; rs++) {3586assert(rs < BitsPerInt, "recode bit map");3587if (!too_many_traps((Deoptimization::DeoptReason) rs)) {3588_allowed_reasons |= nth_bit(rs);3589}3590}3591}3592}35933594#ifndef PRODUCT3595//------------------------------verify_graph_edges---------------------------3596// Walk the Graph and verify that there is a one-to-one correspondence3597// between Use-Def edges and Def-Use edges in the graph.3598void Compile::verify_graph_edges(bool no_dead_code) {3599if (VerifyGraphEdges) {3600ResourceArea *area = Thread::current()->resource_area();3601Unique_Node_List visited(area);3602// Call recursive graph walk to check edges3603_root->verify_edges(visited);3604if (no_dead_code) {3605// Now make sure that no visited node is used by an unvisited node.3606bool dead_nodes = false;3607Unique_Node_List checked(area);3608while (visited.size() > 0) {3609Node* n = visited.pop();3610checked.push(n);3611for (uint i = 0; i < n->outcnt(); i++) {3612Node* use = n->raw_out(i);3613if (checked.member(use)) continue; // already checked3614if (visited.member(use)) continue; // already in the graph3615if (use->is_Con()) continue; // a dead ConNode is OK3616// At this point, we have found a dead node which is DU-reachable.3617if (!dead_nodes) {3618tty->print_cr("*** Dead nodes reachable via DU edges:");3619dead_nodes = true;3620}3621use->dump(2);3622tty->print_cr("---");3623checked.push(use); // No repeats; pretend it is now checked.3624}3625}3626assert(!dead_nodes, "using nodes must be reachable from root");3627}3628}3629}36303631// Verify GC barriers consistency3632// Currently supported:3633// - G1 pre-barriers (see GraphKit::g1_write_barrier_pre())3634void Compile::verify_barriers() {3635if (UseG1GC || UseShenandoahGC) {3636// Verify G1 pre-barriers3637const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active());36383639ResourceArea *area = Thread::current()->resource_area();3640Unique_Node_List visited(area);3641Node_List worklist(area);3642// We're going to walk control flow backwards starting from the Root3643worklist.push(_root);3644while (worklist.size() > 0) {3645Node* x = worklist.pop();3646if (x == NULL || x == top()) continue;3647if (visited.member(x)) {3648continue;3649} else {3650visited.push(x);3651}36523653if (x->is_Region()) {3654for (uint i = 1; i < x->req(); i++) {3655worklist.push(x->in(i));3656}3657} else {3658worklist.push(x->in(0));3659// We are looking for the pattern:3660// /->ThreadLocal3661// If->Bool->CmpI->LoadB->AddP->ConL(marking_offset)3662// \->ConI(0)3663// We want to verify that the If and the LoadB have the same control3664// See GraphKit::g1_write_barrier_pre()3665if (x->is_If()) {3666IfNode *iff = x->as_If();3667if (iff->in(1)->is_Bool() && iff->in(1)->in(1)->is_Cmp()) {3668CmpNode *cmp = iff->in(1)->in(1)->as_Cmp();3669if (cmp->Opcode() == Op_CmpI && cmp->in(2)->is_Con() && cmp->in(2)->bottom_type()->is_int()->get_con() == 03670&& cmp->in(1)->is_Load()) {3671LoadNode* load = cmp->in(1)->as_Load();3672if (load->Opcode() == Op_LoadB && load->in(2)->is_AddP() && load->in(2)->in(2)->Opcode() == Op_ThreadLocal3673&& load->in(2)->in(3)->is_Con()3674&& load->in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == marking_offset) {36753676Node* if_ctrl = iff->in(0);3677Node* load_ctrl = load->in(0);36783679if (if_ctrl != load_ctrl) {3680// Skip possible CProj->NeverBranch in infinite loops3681if ((if_ctrl->is_Proj() && if_ctrl->Opcode() == Op_CProj)3682&& (if_ctrl->in(0)->is_MultiBranch() && if_ctrl->in(0)->Opcode() == Op_NeverBranch)) {3683if_ctrl = if_ctrl->in(0)->in(0);3684}3685}3686assert(load_ctrl != NULL && if_ctrl == load_ctrl, "controls must match");3687}3688}3689}3690}3691}3692}3693}3694}36953696#endif36973698// The Compile object keeps track of failure reasons separately from the ciEnv.3699// This is required because there is not quite a 1-1 relation between the3700// ciEnv and its compilation task and the Compile object. Note that one3701// ciEnv might use two Compile objects, if C2Compiler::compile_method decides3702// to backtrack and retry without subsuming loads. Other than this backtracking3703// behavior, the Compile's failure reason is quietly copied up to the ciEnv3704// by the logic in C2Compiler.3705void Compile::record_failure(const char* reason) {3706if (log() != NULL) {3707log()->elem("failure reason='%s' phase='compile'", reason);3708}3709if (_failure_reason == NULL) {3710// Record the first failure reason.3711_failure_reason = reason;3712}37133714if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {3715C->print_method(PHASE_FAILURE);3716}3717_root = NULL; // flush the graph, too3718}37193720Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, bool dolog)3721: TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false),3722_phase_name(name), _dolog(dolog)3723{3724if (dolog) {3725C = Compile::current();3726_log = C->log();3727} else {3728C = NULL;3729_log = NULL;3730}3731if (_log != NULL) {3732_log->begin_head("phase name='%s' nodes='%d' live='%d'", _phase_name, C->unique(), C->live_nodes());3733_log->stamp();3734_log->end_head();3735}3736}37373738Compile::TracePhase::~TracePhase() {37393740C = Compile::current();3741if (_dolog) {3742_log = C->log();3743} else {3744_log = NULL;3745}37463747#ifdef ASSERT3748if (PrintIdealNodeCount) {3749tty->print_cr("phase name='%s' nodes='%d' live='%d' live_graph_walk='%d'",3750_phase_name, C->unique(), C->live_nodes(), C->count_live_nodes_by_graph_walk());3751}37523753if (VerifyIdealNodeCount) {3754Compile::current()->print_missing_nodes();3755}3756#endif37573758if (_log != NULL) {3759_log->done("phase name='%s' nodes='%d' live='%d'", _phase_name, C->unique(), C->live_nodes());3760}3761}37623763//=============================================================================3764// Two Constant's are equal when the type and the value are equal.3765bool Compile::Constant::operator==(const Constant& other) {3766if (type() != other.type() ) return false;3767if (can_be_reused() != other.can_be_reused()) return false;3768// For floating point values we compare the bit pattern.3769switch (type()) {3770case T_FLOAT: return (_v._value.i == other._v._value.i);3771case T_LONG:3772case T_DOUBLE: return (_v._value.j == other._v._value.j);3773case T_OBJECT:3774case T_ADDRESS: return (_v._value.l == other._v._value.l);3775case T_VOID: return (_v._value.l == other._v._value.l); // jump-table entries3776case T_METADATA: return (_v._metadata == other._v._metadata);3777default: ShouldNotReachHere();3778}3779return false;3780}37813782static int type_to_size_in_bytes(BasicType t) {3783switch (t) {3784case T_LONG: return sizeof(jlong );3785case T_FLOAT: return sizeof(jfloat );3786case T_DOUBLE: return sizeof(jdouble);3787case T_METADATA: return sizeof(Metadata*);3788// We use T_VOID as marker for jump-table entries (labels) which3789// need an internal word relocation.3790case T_VOID:3791case T_ADDRESS:3792case T_OBJECT: return sizeof(jobject);3793}37943795ShouldNotReachHere();3796return -1;3797}37983799int Compile::ConstantTable::qsort_comparator(Constant* a, Constant* b) {3800// sort descending3801if (a->freq() > b->freq()) return -1;3802if (a->freq() < b->freq()) return 1;3803return 0;3804}38053806void Compile::ConstantTable::calculate_offsets_and_size() {3807// First, sort the array by frequencies.3808_constants.sort(qsort_comparator);38093810#ifdef ASSERT3811// Make sure all jump-table entries were sorted to the end of the3812// array (they have a negative frequency).3813bool found_void = false;3814for (int i = 0; i < _constants.length(); i++) {3815Constant con = _constants.at(i);3816if (con.type() == T_VOID)3817found_void = true; // jump-tables3818else3819assert(!found_void, "wrong sorting");3820}3821#endif38223823int offset = 0;3824for (int i = 0; i < _constants.length(); i++) {3825Constant* con = _constants.adr_at(i);38263827// Align offset for type.3828int typesize = type_to_size_in_bytes(con->type());3829offset = align_size_up(offset, typesize);3830con->set_offset(offset); // set constant's offset38313832if (con->type() == T_VOID) {3833MachConstantNode* n = (MachConstantNode*) con->get_jobject();3834offset = offset + typesize * n->outcnt(); // expand jump-table3835} else {3836offset = offset + typesize;3837}3838}38393840// Align size up to the next section start (which is insts; see3841// CodeBuffer::align_at_start).3842assert(_size == -1, "already set?");3843_size = align_size_up(offset, CodeEntryAlignment);3844}38453846void Compile::ConstantTable::emit(CodeBuffer& cb) {3847MacroAssembler _masm(&cb);3848for (int i = 0; i < _constants.length(); i++) {3849Constant con = _constants.at(i);3850address constant_addr = NULL;3851switch (con.type()) {3852case T_LONG: constant_addr = _masm.long_constant( con.get_jlong() ); break;3853case T_FLOAT: constant_addr = _masm.float_constant( con.get_jfloat() ); break;3854case T_DOUBLE: constant_addr = _masm.double_constant(con.get_jdouble()); break;3855case T_OBJECT: {3856jobject obj = con.get_jobject();3857int oop_index = _masm.oop_recorder()->find_index(obj);3858constant_addr = _masm.address_constant((address) obj, oop_Relocation::spec(oop_index));3859break;3860}3861case T_ADDRESS: {3862address addr = (address) con.get_jobject();3863constant_addr = _masm.address_constant(addr);3864break;3865}3866// We use T_VOID as marker for jump-table entries (labels) which3867// need an internal word relocation.3868case T_VOID: {3869MachConstantNode* n = (MachConstantNode*) con.get_jobject();3870// Fill the jump-table with a dummy word. The real value is3871// filled in later in fill_jump_table.3872address dummy = (address) n;3873constant_addr = _masm.address_constant(dummy);3874// Expand jump-table3875for (uint i = 1; i < n->outcnt(); i++) {3876address temp_addr = _masm.address_constant(dummy + i);3877assert(temp_addr, "consts section too small");3878}3879break;3880}3881case T_METADATA: {3882Metadata* obj = con.get_metadata();3883int metadata_index = _masm.oop_recorder()->find_index(obj);3884constant_addr = _masm.address_constant((address) obj, metadata_Relocation::spec(metadata_index));3885break;3886}3887default: ShouldNotReachHere();3888}3889assert(constant_addr, "consts section too small");3890assert((constant_addr - _masm.code()->consts()->start()) == con.offset(),3891err_msg_res("must be: %d == %d", (int) (constant_addr - _masm.code()->consts()->start()), (int)(con.offset())));3892}3893}38943895int Compile::ConstantTable::find_offset(Constant& con) const {3896int idx = _constants.find(con);3897assert(idx != -1, "constant must be in constant table");3898int offset = _constants.at(idx).offset();3899assert(offset != -1, "constant table not emitted yet?");3900return offset;3901}39023903void Compile::ConstantTable::add(Constant& con) {3904if (con.can_be_reused()) {3905int idx = _constants.find(con);3906if (idx != -1 && _constants.at(idx).can_be_reused()) {3907_constants.adr_at(idx)->inc_freq(con.freq()); // increase the frequency by the current value3908return;3909}3910}3911(void) _constants.append(con);3912}39133914Compile::Constant Compile::ConstantTable::add(MachConstantNode* n, BasicType type, jvalue value) {3915Block* b = Compile::current()->cfg()->get_block_for_node(n);3916Constant con(type, value, b->_freq);3917add(con);3918return con;3919}39203921Compile::Constant Compile::ConstantTable::add(Metadata* metadata) {3922Constant con(metadata);3923add(con);3924return con;3925}39263927Compile::Constant Compile::ConstantTable::add(MachConstantNode* n, MachOper* oper) {3928jvalue value;3929BasicType type = oper->type()->basic_type();3930switch (type) {3931case T_LONG: value.j = oper->constantL(); break;3932case T_FLOAT: value.f = oper->constantF(); break;3933case T_DOUBLE: value.d = oper->constantD(); break;3934case T_OBJECT:3935case T_ADDRESS: value.l = (jobject) oper->constant(); break;3936case T_METADATA: return add((Metadata*)oper->constant()); break;3937default: guarantee(false, err_msg_res("unhandled type: %s", type2name(type)));3938}3939return add(n, type, value);3940}39413942Compile::Constant Compile::ConstantTable::add_jump_table(MachConstantNode* n) {3943jvalue value;3944// We can use the node pointer here to identify the right jump-table3945// as this method is called from Compile::Fill_buffer right before3946// the MachNodes are emitted and the jump-table is filled (means the3947// MachNode pointers do not change anymore).3948value.l = (jobject) n;3949Constant con(T_VOID, value, next_jump_table_freq(), false); // Labels of a jump-table cannot be reused.3950add(con);3951return con;3952}39533954void Compile::ConstantTable::fill_jump_table(CodeBuffer& cb, MachConstantNode* n, GrowableArray<Label*> labels) const {3955// If called from Compile::scratch_emit_size do nothing.3956if (Compile::current()->in_scratch_emit_size()) return;39573958assert(labels.is_nonempty(), "must be");3959assert((uint) labels.length() == n->outcnt(), err_msg_res("must be equal: %d == %d", labels.length(), n->outcnt()));39603961// Since MachConstantNode::constant_offset() also contains3962// table_base_offset() we need to subtract the table_base_offset()3963// to get the plain offset into the constant table.3964int offset = n->constant_offset() - table_base_offset();39653966MacroAssembler _masm(&cb);3967address* jump_table_base = (address*) (_masm.code()->consts()->start() + offset);39683969for (uint i = 0; i < n->outcnt(); i++) {3970address* constant_addr = &jump_table_base[i];3971assert(*constant_addr == (((address) n) + i), err_msg_res("all jump-table entries must contain adjusted node pointer: " INTPTR_FORMAT " == " INTPTR_FORMAT, p2i(*constant_addr), p2i(((address) n) + i)));3972*constant_addr = cb.consts()->target(*labels.at(i), (address) constant_addr);3973cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type);3974}3975}39763977void Compile::dump_inlining() {3978if (print_inlining() || print_intrinsics()) {3979// Print inlining message for candidates that we couldn't inline3980// for lack of space or non constant receiver3981for (int i = 0; i < _late_inlines.length(); i++) {3982CallGenerator* cg = _late_inlines.at(i);3983cg->print_inlining_late("live nodes > LiveNodeCountInliningCutoff");3984}3985Unique_Node_List useful;3986useful.push(root());3987for (uint next = 0; next < useful.size(); ++next) {3988Node* n = useful.at(next);3989if (n->is_Call() && n->as_Call()->generator() != NULL && n->as_Call()->generator()->call_node() == n) {3990CallNode* call = n->as_Call();3991CallGenerator* cg = call->generator();3992cg->print_inlining_late("receiver not constant");3993}3994uint max = n->len();3995for ( uint i = 0; i < max; ++i ) {3996Node *m = n->in(i);3997if ( m == NULL ) continue;3998useful.push(m);3999}4000}4001for (int i = 0; i < _print_inlining_list->length(); i++) {4002tty->print("%s", _print_inlining_list->adr_at(i)->ss()->as_string());4003}4004}4005}40064007// Dump inlining replay data to the stream.4008// Don't change thread state and acquire any locks.4009void Compile::dump_inline_data(outputStream* out) {4010InlineTree* inl_tree = ilt();4011if (inl_tree != NULL) {4012out->print(" inline %d", inl_tree->count());4013inl_tree->dump_replay_data(out);4014}4015}40164017int Compile::cmp_expensive_nodes(Node* n1, Node* n2) {4018if (n1->Opcode() < n2->Opcode()) return -1;4019else if (n1->Opcode() > n2->Opcode()) return 1;40204021assert(n1->req() == n2->req(), err_msg_res("can't compare %s nodes: n1->req() = %d, n2->req() = %d", NodeClassNames[n1->Opcode()], n1->req(), n2->req()));4022for (uint i = 1; i < n1->req(); i++) {4023if (n1->in(i) < n2->in(i)) return -1;4024else if (n1->in(i) > n2->in(i)) return 1;4025}40264027return 0;4028}40294030int Compile::cmp_expensive_nodes(Node** n1p, Node** n2p) {4031Node* n1 = *n1p;4032Node* n2 = *n2p;40334034return cmp_expensive_nodes(n1, n2);4035}40364037void Compile::sort_expensive_nodes() {4038if (!expensive_nodes_sorted()) {4039_expensive_nodes->sort(cmp_expensive_nodes);4040}4041}40424043bool Compile::expensive_nodes_sorted() const {4044for (int i = 1; i < _expensive_nodes->length(); i++) {4045if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i-1)) < 0) {4046return false;4047}4048}4049return true;4050}40514052bool Compile::should_optimize_expensive_nodes(PhaseIterGVN &igvn) {4053if (_expensive_nodes->length() == 0) {4054return false;4055}40564057assert(OptimizeExpensiveOps, "optimization off?");40584059// Take this opportunity to remove dead nodes from the list4060int j = 0;4061for (int i = 0; i < _expensive_nodes->length(); i++) {4062Node* n = _expensive_nodes->at(i);4063if (!n->is_unreachable(igvn)) {4064assert(n->is_expensive(), "should be expensive");4065_expensive_nodes->at_put(j, n);4066j++;4067}4068}4069_expensive_nodes->trunc_to(j);40704071// Then sort the list so that similar nodes are next to each other4072// and check for at least two nodes of identical kind with same data4073// inputs.4074sort_expensive_nodes();40754076for (int i = 0; i < _expensive_nodes->length()-1; i++) {4077if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i+1)) == 0) {4078return true;4079}4080}40814082return false;4083}40844085void Compile::cleanup_expensive_nodes(PhaseIterGVN &igvn) {4086if (_expensive_nodes->length() == 0) {4087return;4088}40894090assert(OptimizeExpensiveOps, "optimization off?");40914092// Sort to bring similar nodes next to each other and clear the4093// control input of nodes for which there's only a single copy.4094sort_expensive_nodes();40954096int j = 0;4097int identical = 0;4098int i = 0;4099for (; i < _expensive_nodes->length()-1; i++) {4100assert(j <= i, "can't write beyond current index");4101if (_expensive_nodes->at(i)->Opcode() == _expensive_nodes->at(i+1)->Opcode()) {4102identical++;4103_expensive_nodes->at_put(j++, _expensive_nodes->at(i));4104continue;4105}4106if (identical > 0) {4107_expensive_nodes->at_put(j++, _expensive_nodes->at(i));4108identical = 0;4109} else {4110Node* n = _expensive_nodes->at(i);4111igvn.hash_delete(n);4112n->set_req(0, NULL);4113igvn.hash_insert(n);4114}4115}4116if (identical > 0) {4117_expensive_nodes->at_put(j++, _expensive_nodes->at(i));4118} else if (_expensive_nodes->length() >= 1) {4119Node* n = _expensive_nodes->at(i);4120igvn.hash_delete(n);4121n->set_req(0, NULL);4122igvn.hash_insert(n);4123}4124_expensive_nodes->trunc_to(j);4125}41264127void Compile::add_expensive_node(Node * n) {4128assert(!_expensive_nodes->contains(n), "duplicate entry in expensive list");4129assert(n->is_expensive(), "expensive nodes with non-null control here only");4130assert(!n->is_CFG() && !n->is_Mem(), "no cfg or memory nodes here");4131if (OptimizeExpensiveOps) {4132_expensive_nodes->append(n);4133} else {4134// Clear control input and let IGVN optimize expensive nodes if4135// OptimizeExpensiveOps is off.4136n->set_req(0, NULL);4137}4138}41394140/**4141* Remove the speculative part of types and clean up the graph4142*/4143void Compile::remove_speculative_types(PhaseIterGVN &igvn) {4144if (UseTypeSpeculation) {4145Unique_Node_List worklist;4146worklist.push(root());4147int modified = 0;4148// Go over all type nodes that carry a speculative type, drop the4149// speculative part of the type and enqueue the node for an igvn4150// which may optimize it out.4151for (uint next = 0; next < worklist.size(); ++next) {4152Node *n = worklist.at(next);4153if (n->is_Type()) {4154TypeNode* tn = n->as_Type();4155const Type* t = tn->type();4156const Type* t_no_spec = t->remove_speculative();4157if (t_no_spec != t) {4158bool in_hash = igvn.hash_delete(n);4159assert(in_hash || n->hash() == Node::NO_HASH, "node should be in igvn hash table");4160tn->set_type(t_no_spec);4161igvn.hash_insert(n);4162igvn._worklist.push(n); // give it a chance to go away4163modified++;4164}4165}4166uint max = n->len();4167for( uint i = 0; i < max; ++i ) {4168Node *m = n->in(i);4169if (not_a_node(m)) continue;4170worklist.push(m);4171}4172}4173// Drop the speculative part of all types in the igvn's type table4174igvn.remove_speculative_types();4175if (modified > 0) {4176igvn.optimize();4177}4178#ifdef ASSERT4179// Verify that after the IGVN is over no speculative type has resurfaced4180worklist.clear();4181worklist.push(root());4182for (uint next = 0; next < worklist.size(); ++next) {4183Node *n = worklist.at(next);4184const Type* t = igvn.type_or_null(n);4185assert((t == NULL) || (t == t->remove_speculative()), "no more speculative types");4186if (n->is_Type()) {4187t = n->as_Type()->type();4188assert(t == t->remove_speculative(), "no more speculative types");4189}4190uint max = n->len();4191for( uint i = 0; i < max; ++i ) {4192Node *m = n->in(i);4193if (not_a_node(m)) continue;4194worklist.push(m);4195}4196}4197igvn.check_no_speculative_types();4198#endif4199}4200}42014202// Convert integer value to a narrowed long type dependent on ctrl (for example, a range check)4203Node* Compile::constrained_convI2L(PhaseGVN* phase, Node* value, const TypeInt* itype, Node* ctrl) {4204if (ctrl != NULL) {4205// Express control dependency by a CastII node with a narrow type.4206value = new (phase->C) CastIINode(value, itype, false, true /* range check dependency */);4207// Make the CastII node dependent on the control input to prevent the narrowed ConvI2L4208// node from floating above the range check during loop optimizations. Otherwise, the4209// ConvI2L node may be eliminated independently of the range check, causing the data path4210// to become TOP while the control path is still there (although it's unreachable).4211value->set_req(0, ctrl);4212// Save CastII node to remove it after loop optimizations.4213phase->C->add_range_check_cast(value);4214value = phase->transform(value);4215}4216const TypeLong* ltype = TypeLong::make(itype->_lo, itype->_hi, itype->_widen);4217return phase->transform(new (phase->C) ConvI2LNode(value, ltype));4218}42194220// Auxiliary method to support randomized stressing/fuzzing.4221//4222// This method can be called the arbitrary number of times, with current count4223// as the argument. The logic allows selecting a single candidate from the4224// running list of candidates as follows:4225// int count = 0;4226// Cand* selected = null;4227// while(cand = cand->next()) {4228// if (randomized_select(++count)) {4229// selected = cand;4230// }4231// }4232//4233// Including count equalizes the chances any candidate is "selected".4234// This is useful when we don't have the complete list of candidates to choose4235// from uniformly. In this case, we need to adjust the randomicity of the4236// selection, or else we will end up biasing the selection towards the latter4237// candidates.4238//4239// Quick back-envelope calculation shows that for the list of n candidates4240// the equal probability for the candidate to persist as "best" can be4241// achieved by replacing it with "next" k-th candidate with the probability4242// of 1/k. It can be easily shown that by the end of the run, the4243// probability for any candidate is converged to 1/n, thus giving the4244// uniform distribution among all the candidates.4245//4246// We don't care about the domain size as long as (RANDOMIZED_DOMAIN / count) is large.4247#define RANDOMIZED_DOMAIN_POW 294248#define RANDOMIZED_DOMAIN (1 << RANDOMIZED_DOMAIN_POW)4249#define RANDOMIZED_DOMAIN_MASK ((1 << (RANDOMIZED_DOMAIN_POW + 1)) - 1)4250bool Compile::randomized_select(int count) {4251assert(count > 0, "only positive");4252return (os::random() & RANDOMIZED_DOMAIN_MASK) < (RANDOMIZED_DOMAIN / count);4253}42544255void Compile::shenandoah_eliminate_g1_wb_pre(Node* call, PhaseIterGVN* igvn) {4256assert(UseShenandoahGC && call->is_g1_wb_pre_call(), "");4257Node* c = call->as_Call()->proj_out(TypeFunc::Control);4258c = c->unique_ctrl_out();4259assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?");4260c = c->unique_ctrl_out();4261assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?");4262Node* iff = c->in(1)->is_IfProj() ? c->in(1)->in(0) : c->in(2)->in(0);4263assert(iff->is_If(), "expect test");4264if (!iff->is_shenandoah_marking_if(igvn)) {4265c = c->unique_ctrl_out();4266assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?");4267iff = c->in(1)->is_IfProj() ? c->in(1)->in(0) : c->in(2)->in(0);4268assert(iff->is_shenandoah_marking_if(igvn), "expect marking test");4269}4270Node* cmpx = iff->in(1)->in(1);4271igvn->replace_node(cmpx, igvn->makecon(TypeInt::CC_EQ));4272igvn->rehash_node_delayed(call);4273call->del_req(call->req()-1);4274}427542764277