CoCalc -- compile.cpp

GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/hotspot/src/share/vm/opto/compile.cpp
³²²⁸⁵ views
1
/*
2
 * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.
8
 *
9
 * This code is distributed in the hope that it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
 * version 2 for more details (a copy is included in the LICENSE file that
13
 * accompanied this code).
14
 *
15
 * You should have received a copy of the GNU General Public License version
16
 * 2 along with this work; if not, write to the Free Software Foundation,
17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
 *
19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
 * or visit www.oracle.com if you need additional information or have any
21
 * questions.
22
 *
23
 */
24

25
#include "precompiled.hpp"
26
#include "asm/macroAssembler.hpp"
27
#include "asm/macroAssembler.inline.hpp"
28
#include "ci/ciReplay.hpp"
29
#include "classfile/systemDictionary.hpp"
30
#include "code/exceptionHandlerTable.hpp"
31
#include "code/nmethod.hpp"
32
#include "compiler/compileLog.hpp"
33
#include "compiler/disassembler.hpp"
34
#include "compiler/oopMap.hpp"
35
#include "jfr/jfrEvents.hpp"
36
#include "opto/addnode.hpp"
37
#include "opto/block.hpp"
38
#include "opto/c2compiler.hpp"
39
#include "opto/callGenerator.hpp"
40
#include "opto/callnode.hpp"
41
#include "opto/cfgnode.hpp"
42
#include "opto/chaitin.hpp"
43
#include "opto/compile.hpp"
44
#include "opto/connode.hpp"
45
#include "opto/divnode.hpp"
46
#include "opto/escape.hpp"
47
#include "opto/idealGraphPrinter.hpp"
48
#include "opto/loopnode.hpp"
49
#include "opto/machnode.hpp"
50
#include "opto/macro.hpp"
51
#include "opto/matcher.hpp"
52
#include "opto/mathexactnode.hpp"
53
#include "opto/memnode.hpp"
54
#include "opto/mulnode.hpp"
55
#include "opto/node.hpp"
56
#include "opto/opcodes.hpp"
57
#include "opto/output.hpp"
58
#include "opto/parse.hpp"
59
#include "opto/phaseX.hpp"
60
#include "opto/rootnode.hpp"
61
#include "opto/runtime.hpp"
62
#include "opto/stringopts.hpp"
63
#include "opto/type.hpp"
64
#include "opto/vectornode.hpp"
65
#include "runtime/arguments.hpp"
66
#include "runtime/signature.hpp"
67
#include "runtime/stubRoutines.hpp"
68
#include "runtime/timer.hpp"
69
#include "utilities/copy.hpp"
70
#if defined AD_MD_HPP
71
# include AD_MD_HPP
72
#elif defined TARGET_ARCH_MODEL_x86_32
73
# include "adfiles/ad_x86_32.hpp"
74
#elif defined TARGET_ARCH_MODEL_x86_64
75
# include "adfiles/ad_x86_64.hpp"
76
#elif defined TARGET_ARCH_MODEL_aarch64
77
# include "adfiles/ad_aarch64.hpp"
78
#elif defined TARGET_ARCH_MODEL_sparc
79
# include "adfiles/ad_sparc.hpp"
80
#elif defined TARGET_ARCH_MODEL_zero
81
# include "adfiles/ad_zero.hpp"
82
#elif defined TARGET_ARCH_MODEL_ppc_64
83
# include "adfiles/ad_ppc_64.hpp"
84
#endif
85

86
#if INCLUDE_ALL_GCS
87
#include "gc_implementation/shenandoah/shenandoahForwarding.hpp"
88
#include "gc_implementation/shenandoah/c2/shenandoahSupport.hpp"
89
#endif
90

91
// -------------------- Compile::mach_constant_base_node -----------------------
92
// Constant table base node singleton.
93
MachConstantBaseNode* Compile::mach_constant_base_node() {
94
  if (_mach_constant_base_node == NULL) {
95
    _mach_constant_base_node = new (C) MachConstantBaseNode();
96
    _mach_constant_base_node->add_req(C->root());
97
  }
98
  return _mach_constant_base_node;
99
}
100

101

102
/// Support for intrinsics.
103

104
// Return the index at which m must be inserted (or already exists).
105
// The sort order is by the address of the ciMethod, with is_virtual as minor key.
106
int Compile::intrinsic_insertion_index(ciMethod* m, bool is_virtual) {
107
#ifdef ASSERT
108
  for (int i = 1; i < _intrinsics->length(); i++) {
109
    CallGenerator* cg1 = _intrinsics->at(i-1);
110
    CallGenerator* cg2 = _intrinsics->at(i);
111
    assert(cg1->method() != cg2->method()
112
           ? cg1->method()     < cg2->method()
113
           : cg1->is_virtual() < cg2->is_virtual(),
114
           "compiler intrinsics list must stay sorted");
115
  }
116
#endif
117
  // Binary search sorted list, in decreasing intervals [lo, hi].
118
  int lo = 0, hi = _intrinsics->length()-1;
119
  while (lo <= hi) {
120
    int mid = (uint)(hi + lo) / 2;
121
    ciMethod* mid_m = _intrinsics->at(mid)->method();
122
    if (m < mid_m) {
123
      hi = mid-1;
124
    } else if (m > mid_m) {
125
      lo = mid+1;
126
    } else {
127
      // look at minor sort key
128
      bool mid_virt = _intrinsics->at(mid)->is_virtual();
129
      if (is_virtual < mid_virt) {
130
        hi = mid-1;
131
      } else if (is_virtual > mid_virt) {
132
        lo = mid+1;
133
      } else {
134
        return mid;  // exact match
135
      }
136
    }
137
  }
138
  return lo;  // inexact match
139
}
140

141
void Compile::register_intrinsic(CallGenerator* cg) {
142
  if (_intrinsics == NULL) {
143
    _intrinsics = new (comp_arena())GrowableArray<CallGenerator*>(comp_arena(), 60, 0, NULL);
144
  }
145
  // This code is stolen from ciObjectFactory::insert.
146
  // Really, GrowableArray should have methods for
147
  // insert_at, remove_at, and binary_search.
148
  int len = _intrinsics->length();
149
  int index = intrinsic_insertion_index(cg->method(), cg->is_virtual());
150
  if (index == len) {
151
    _intrinsics->append(cg);
152
  } else {
153
#ifdef ASSERT
154
    CallGenerator* oldcg = _intrinsics->at(index);
155
    assert(oldcg->method() != cg->method() || oldcg->is_virtual() != cg->is_virtual(), "don't register twice");
156
#endif
157
    _intrinsics->append(_intrinsics->at(len-1));
158
    int pos;
159
    for (pos = len-2; pos >= index; pos--) {
160
      _intrinsics->at_put(pos+1,_intrinsics->at(pos));
161
    }
162
    _intrinsics->at_put(index, cg);
163
  }
164
  assert(find_intrinsic(cg->method(), cg->is_virtual()) == cg, "registration worked");
165
}
166

167
CallGenerator* Compile::find_intrinsic(ciMethod* m, bool is_virtual) {
168
  assert(m->is_loaded(), "don't try this on unloaded methods");
169
  if (_intrinsics != NULL) {
170
    int index = intrinsic_insertion_index(m, is_virtual);
171
    if (index < _intrinsics->length()
172
        && _intrinsics->at(index)->method() == m
173
        && _intrinsics->at(index)->is_virtual() == is_virtual) {
174
      return _intrinsics->at(index);
175
    }
176
  }
177
  // Lazily create intrinsics for intrinsic IDs well-known in the runtime.
178
  if (m->intrinsic_id() != vmIntrinsics::_none &&
179
      m->intrinsic_id() <= vmIntrinsics::LAST_COMPILER_INLINE) {
180
    CallGenerator* cg = make_vm_intrinsic(m, is_virtual);
181
    if (cg != NULL) {
182
      // Save it for next time:
183
      register_intrinsic(cg);
184
      return cg;
185
    } else {
186
      gather_intrinsic_statistics(m->intrinsic_id(), is_virtual, _intrinsic_disabled);
187
    }
188
  }
189
  return NULL;
190
}
191

192
// Compile:: register_library_intrinsics and make_vm_intrinsic are defined
193
// in library_call.cpp.
194

195

196
#ifndef PRODUCT
197
// statistics gathering...
198

199
juint  Compile::_intrinsic_hist_count[vmIntrinsics::ID_LIMIT] = {0};
200
jubyte Compile::_intrinsic_hist_flags[vmIntrinsics::ID_LIMIT] = {0};
201

202
bool Compile::gather_intrinsic_statistics(vmIntrinsics::ID id, bool is_virtual, int flags) {
203
  assert(id > vmIntrinsics::_none && id < vmIntrinsics::ID_LIMIT, "oob");
204
  int oflags = _intrinsic_hist_flags[id];
205
  assert(flags != 0, "what happened?");
206
  if (is_virtual) {
207
    flags |= _intrinsic_virtual;
208
  }
209
  bool changed = (flags != oflags);
210
  if ((flags & _intrinsic_worked) != 0) {
211
    juint count = (_intrinsic_hist_count[id] += 1);
212
    if (count == 1) {
213
      changed = true;           // first time
214
    }
215
    // increment the overall count also:
216
    _intrinsic_hist_count[vmIntrinsics::_none] += 1;
217
  }
218
  if (changed) {
219
    if (((oflags ^ flags) & _intrinsic_virtual) != 0) {
220
      // Something changed about the intrinsic's virtuality.
221
      if ((flags & _intrinsic_virtual) != 0) {
222
        // This is the first use of this intrinsic as a virtual call.
223
        if (oflags != 0) {
224
          // We already saw it as a non-virtual, so note both cases.
225
          flags |= _intrinsic_both;
226
        }
227
      } else if ((oflags & _intrinsic_both) == 0) {
228
        // This is the first use of this intrinsic as a non-virtual
229
        flags |= _intrinsic_both;
230
      }
231
    }
232
    _intrinsic_hist_flags[id] = (jubyte) (oflags | flags);
233
  }
234
  // update the overall flags also:
235
  _intrinsic_hist_flags[vmIntrinsics::_none] |= (jubyte) flags;
236
  return changed;
237
}
238

239
static char* format_flags(int flags, char* buf) {
240
  buf[0] = 0;
241
  if ((flags & Compile::_intrinsic_worked) != 0)    strcat(buf, ",worked");
242
  if ((flags & Compile::_intrinsic_failed) != 0)    strcat(buf, ",failed");
243
  if ((flags & Compile::_intrinsic_disabled) != 0)  strcat(buf, ",disabled");
244
  if ((flags & Compile::_intrinsic_virtual) != 0)   strcat(buf, ",virtual");
245
  if ((flags & Compile::_intrinsic_both) != 0)      strcat(buf, ",nonvirtual");
246
  if (buf[0] == 0)  strcat(buf, ",");
247
  assert(buf[0] == ',', "must be");
248
  return &buf[1];
249
}
250

251
void Compile::print_intrinsic_statistics() {
252
  char flagsbuf[100];
253
  ttyLocker ttyl;
254
  if (xtty != NULL)  xtty->head("statistics type='intrinsic'");
255
  tty->print_cr("Compiler intrinsic usage:");
256
  juint total = _intrinsic_hist_count[vmIntrinsics::_none];
257
  if (total == 0)  total = 1;  // avoid div0 in case of no successes
258
  #define PRINT_STAT_LINE(name, c, f) \
259
    tty->print_cr("  %4d (%4.1f%%) %s (%s)", (int)(c), ((c) * 100.0) / total, name, f);
260
  for (int index = 1 + (int)vmIntrinsics::_none; index < (int)vmIntrinsics::ID_LIMIT; index++) {
261
    vmIntrinsics::ID id = (vmIntrinsics::ID) index;
262
    int   flags = _intrinsic_hist_flags[id];
263
    juint count = _intrinsic_hist_count[id];
264
    if ((flags | count) != 0) {
265
      PRINT_STAT_LINE(vmIntrinsics::name_at(id), count, format_flags(flags, flagsbuf));
266
    }
267
  }
268
  PRINT_STAT_LINE("total", total, format_flags(_intrinsic_hist_flags[vmIntrinsics::_none], flagsbuf));
269
  if (xtty != NULL)  xtty->tail("statistics");
270
}
271

272
void Compile::print_statistics() {
273
  { ttyLocker ttyl;
274
    if (xtty != NULL)  xtty->head("statistics type='opto'");
275
    Parse::print_statistics();
276
    PhaseCCP::print_statistics();
277
    PhaseRegAlloc::print_statistics();
278
    Scheduling::print_statistics();
279
    PhasePeephole::print_statistics();
280
    PhaseIdealLoop::print_statistics();
281
    if (xtty != NULL)  xtty->tail("statistics");
282
  }
283
  if (_intrinsic_hist_flags[vmIntrinsics::_none] != 0) {
284
    // put this under its own <statistics> element.
285
    print_intrinsic_statistics();
286
  }
287
}
288
#endif //PRODUCT
289

290
// Support for bundling info
291
Bundle* Compile::node_bundling(const Node *n) {
292
  assert(valid_bundle_info(n), "oob");
293
  return &_node_bundling_base[n->_idx];
294
}
295

296
bool Compile::valid_bundle_info(const Node *n) {
297
  return (_node_bundling_limit > n->_idx);
298
}
299

300

301
void Compile::gvn_replace_by(Node* n, Node* nn) {
302
  for (DUIterator_Last imin, i = n->last_outs(imin); i >= imin; ) {
303
    Node* use = n->last_out(i);
304
    bool is_in_table = initial_gvn()->hash_delete(use);
305
    uint uses_found = 0;
306
    for (uint j = 0; j < use->len(); j++) {
307
      if (use->in(j) == n) {
308
        if (j < use->req())
309
          use->set_req(j, nn);
310
        else
311
          use->set_prec(j, nn);
312
        uses_found++;
313
      }
314
    }
315
    if (is_in_table) {
316
      // reinsert into table
317
      initial_gvn()->hash_find_insert(use);
318
    }
319
    record_for_igvn(use);
320
    i -= uses_found;    // we deleted 1 or more copies of this edge
321
  }
322
}
323

324

325
static inline bool not_a_node(const Node* n) {
326
  if (n == NULL)                   return true;
327
  if (((intptr_t)n & 1) != 0)      return true;  // uninitialized, etc.
328
  if (*(address*)n == badAddress)  return true;  // kill by Node::destruct
329
  return false;
330
}
331

332
// Identify all nodes that are reachable from below, useful.
333
// Use breadth-first pass that records state in a Unique_Node_List,
334
// recursive traversal is slower.
335
void Compile::identify_useful_nodes(Unique_Node_List &useful) {
336
  int estimated_worklist_size = live_nodes();
337
  useful.map( estimated_worklist_size, NULL );  // preallocate space
338

339
  // Initialize worklist
340
  if (root() != NULL)     { useful.push(root()); }
341
  // If 'top' is cached, declare it useful to preserve cached node
342
  if( cached_top_node() ) { useful.push(cached_top_node()); }
343

344
  // Push all useful nodes onto the list, breadthfirst
345
  for( uint next = 0; next < useful.size(); ++next ) {
346
    assert( next < unique(), "Unique useful nodes < total nodes");
347
    Node *n  = useful.at(next);
348
    uint max = n->len();
349
    for( uint i = 0; i < max; ++i ) {
350
      Node *m = n->in(i);
351
      if (not_a_node(m))  continue;
352
      useful.push(m);
353
    }
354
  }
355
}
356

357
// Update dead_node_list with any missing dead nodes using useful
358
// list. Consider all non-useful nodes to be useless i.e., dead nodes.
359
void Compile::update_dead_node_list(Unique_Node_List &useful) {
360
  uint max_idx = unique();
361
  VectorSet& useful_node_set = useful.member_set();
362

363
  for (uint node_idx = 0; node_idx < max_idx; node_idx++) {
364
    // If node with index node_idx is not in useful set,
365
    // mark it as dead in dead node list.
366
    if (! useful_node_set.test(node_idx) ) {
367
      record_dead_node(node_idx);
368
    }
369
  }
370
}
371

372
void Compile::remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful) {
373
  int shift = 0;
374
  for (int i = 0; i < inlines->length(); i++) {
375
    CallGenerator* cg = inlines->at(i);
376
    CallNode* call = cg->call_node();
377
    if (shift > 0) {
378
      inlines->at_put(i-shift, cg);
379
    }
380
    if (!useful.member(call)) {
381
      shift++;
382
    }
383
  }
384
  inlines->trunc_to(inlines->length()-shift);
385
}
386

387
// Disconnect all useless nodes by disconnecting those at the boundary.
388
void Compile::remove_useless_nodes(Unique_Node_List &useful) {
389
  uint next = 0;
390
  while (next < useful.size()) {
391
    Node *n = useful.at(next++);
392
    if (n->is_SafePoint()) {
393
      // We're done with a parsing phase. Replaced nodes are not valid
394
      // beyond that point.
395
      n->as_SafePoint()->delete_replaced_nodes();
396
    }
397
    // Use raw traversal of out edges since this code removes out edges
398
    int max = n->outcnt();
399
    for (int j = 0; j < max; ++j) {
400
      Node* child = n->raw_out(j);
401
      if (! useful.member(child)) {
402
        assert(!child->is_top() || child != top(),
403
               "If top is cached in Compile object it is in useful list");
404
        // Only need to remove this out-edge to the useless node
405
        n->raw_del_out(j);
406
        --j;
407
        --max;
408
      }
409
    }
410
    if (n->outcnt() == 1 && n->has_special_unique_user()) {
411
      record_for_igvn(n->unique_out());
412
    }
413
    if (n->Opcode() == Op_AddP && CallLeafNode::has_only_g1_wb_pre_uses(n)) {
414
      for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
415
        record_for_igvn(n->fast_out(i));
416
      }
417
    }
418
  }
419
  // Remove useless macro and predicate opaq nodes
420
  for (int i = C->macro_count()-1; i >= 0; i--) {
421
    Node* n = C->macro_node(i);
422
    if (!useful.member(n)) {
423
      remove_macro_node(n);
424
    }
425
  }
426
  // Remove useless CastII nodes with range check dependency
427
  for (int i = range_check_cast_count() - 1; i >= 0; i--) {
428
    Node* cast = range_check_cast_node(i);
429
    if (!useful.member(cast)) {
430
      remove_range_check_cast(cast);
431
    }
432
  }
433
  // Remove useless expensive node
434
  for (int i = C->expensive_count()-1; i >= 0; i--) {
435
    Node* n = C->expensive_node(i);
436
    if (!useful.member(n)) {
437
      remove_expensive_node(n);
438
    }
439
  }
440
  for (int i = C->shenandoah_barriers_count()-1; i >= 0; i--) {
441
    ShenandoahLoadReferenceBarrierNode* n = C->shenandoah_barrier(i);
442
    if (!useful.member(n)) {
443
      remove_shenandoah_barrier(n);
444
    }
445
  }
446
  // clean up the late inline lists
447
  remove_useless_late_inlines(&_string_late_inlines, useful);
448
  remove_useless_late_inlines(&_boxing_late_inlines, useful);
449
  remove_useless_late_inlines(&_late_inlines, useful);
450
  debug_only(verify_graph_edges(true/*check for no_dead_code*/);)
451
}
452

453
//------------------------------frame_size_in_words-----------------------------
454
// frame_slots in units of words
455
int Compile::frame_size_in_words() const {
456
  // shift is 0 in LP32 and 1 in LP64
457
  const int shift = (LogBytesPerWord - LogBytesPerInt);
458
  int words = _frame_slots >> shift;
459
  assert( words << shift == _frame_slots, "frame size must be properly aligned in LP64" );
460
  return words;
461
}
462

463
// To bang the stack of this compiled method we use the stack size
464
// that the interpreter would need in case of a deoptimization. This
465
// removes the need to bang the stack in the deoptimization blob which
466
// in turn simplifies stack overflow handling.
467
int Compile::bang_size_in_bytes() const {
468
  return MAX2(_interpreter_frame_size, frame_size_in_bytes());
469
}
470

471
// ============================================================================
472
//------------------------------CompileWrapper---------------------------------
473
class CompileWrapper : public StackObj {
474
  Compile *const _compile;
475
 public:
476
  CompileWrapper(Compile* compile);
477

478
  ~CompileWrapper();
479
};
480

481
CompileWrapper::CompileWrapper(Compile* compile) : _compile(compile) {
482
  // the Compile* pointer is stored in the current ciEnv:
483
  ciEnv* env = compile->env();
484
  assert(env == ciEnv::current(), "must already be a ciEnv active");
485
  assert(env->compiler_data() == NULL, "compile already active?");
486
  env->set_compiler_data(compile);
487
  assert(compile == Compile::current(), "sanity");
488

489
  compile->set_type_dict(NULL);
490
  compile->set_type_hwm(NULL);
491
  compile->set_type_last_size(0);
492
  compile->set_last_tf(NULL, NULL);
493
  compile->set_indexSet_arena(NULL);
494
  compile->set_indexSet_free_block_list(NULL);
495
  compile->init_type_arena();
496
  Type::Initialize(compile);
497
  _compile->set_scratch_buffer_blob(NULL);
498
  _compile->begin_method();
499
}
500
CompileWrapper::~CompileWrapper() {
501
  _compile->end_method();
502
  if (_compile->scratch_buffer_blob() != NULL)
503
    BufferBlob::free(_compile->scratch_buffer_blob());
504
  _compile->env()->set_compiler_data(NULL);
505
}
506

507

508
//----------------------------print_compile_messages---------------------------
509
void Compile::print_compile_messages() {
510
#ifndef PRODUCT
511
  // Check if recompiling
512
  if (_subsume_loads == false && PrintOpto) {
513
    // Recompiling without allowing machine instructions to subsume loads
514
    tty->print_cr("*********************************************************");
515
    tty->print_cr("** Bailout: Recompile without subsuming loads          **");
516
    tty->print_cr("*********************************************************");
517
  }
518
  if (_do_escape_analysis != DoEscapeAnalysis && PrintOpto) {
519
    // Recompiling without escape analysis
520
    tty->print_cr("*********************************************************");
521
    tty->print_cr("** Bailout: Recompile without escape analysis          **");
522
    tty->print_cr("*********************************************************");
523
  }
524
  if (_eliminate_boxing != EliminateAutoBox && PrintOpto) {
525
    // Recompiling without boxing elimination
526
    tty->print_cr("*********************************************************");
527
    tty->print_cr("** Bailout: Recompile without boxing elimination       **");
528
    tty->print_cr("*********************************************************");
529
  }
530
  if (env()->break_at_compile()) {
531
    // Open the debugger when compiling this method.
532
    tty->print("### Breaking when compiling: ");
533
    method()->print_short_name();
534
    tty->cr();
535
    BREAKPOINT;
536
  }
537

538
  if( PrintOpto ) {
539
    if (is_osr_compilation()) {
540
      tty->print("[OSR]%3d", _compile_id);
541
    } else {
542
      tty->print("%3d", _compile_id);
543
    }
544
  }
545
#endif
546
}
547

548

549
//-----------------------init_scratch_buffer_blob------------------------------
550
// Construct a temporary BufferBlob and cache it for this compile.
551
void Compile::init_scratch_buffer_blob(int const_size) {
552
  // If there is already a scratch buffer blob allocated and the
553
  // constant section is big enough, use it.  Otherwise free the
554
  // current and allocate a new one.
555
  BufferBlob* blob = scratch_buffer_blob();
556
  if ((blob != NULL) && (const_size <= _scratch_const_size)) {
557
    // Use the current blob.
558
  } else {
559
    if (blob != NULL) {
560
      BufferBlob::free(blob);
561
    }
562

563
    ResourceMark rm;
564
    _scratch_const_size = const_size;
565
    int size = (MAX_inst_size + MAX_stubs_size + _scratch_const_size);
566
    blob = BufferBlob::create("Compile::scratch_buffer", size);
567
    // Record the buffer blob for next time.
568
    set_scratch_buffer_blob(blob);
569
    // Have we run out of code space?
570
    if (scratch_buffer_blob() == NULL) {
571
      // Let CompilerBroker disable further compilations.
572
      record_failure("Not enough space for scratch buffer in CodeCache");
573
      return;
574
    }
575
  }
576

577
  // Initialize the relocation buffers
578
  relocInfo* locs_buf = (relocInfo*) blob->content_end() - MAX_locs_size;
579
  set_scratch_locs_memory(locs_buf);
580
}
581

582

583
//-----------------------scratch_emit_size-------------------------------------
584
// Helper function that computes size by emitting code
585
uint Compile::scratch_emit_size(const Node* n) {
586
  // Start scratch_emit_size section.
587
  set_in_scratch_emit_size(true);
588

589
  // Emit into a trash buffer and count bytes emitted.
590
  // This is a pretty expensive way to compute a size,
591
  // but it works well enough if seldom used.
592
  // All common fixed-size instructions are given a size
593
  // method by the AD file.
594
  // Note that the scratch buffer blob and locs memory are
595
  // allocated at the beginning of the compile task, and
596
  // may be shared by several calls to scratch_emit_size.
597
  // The allocation of the scratch buffer blob is particularly
598
  // expensive, since it has to grab the code cache lock.
599
  BufferBlob* blob = this->scratch_buffer_blob();
600
  assert(blob != NULL, "Initialize BufferBlob at start");
601
  assert(blob->size() > MAX_inst_size, "sanity");
602
  relocInfo* locs_buf = scratch_locs_memory();
603
  address blob_begin = blob->content_begin();
604
  address blob_end   = (address)locs_buf;
605
  assert(blob->content_contains(blob_end), "sanity");
606
  CodeBuffer buf(blob_begin, blob_end - blob_begin);
607
  buf.initialize_consts_size(_scratch_const_size);
608
  buf.initialize_stubs_size(MAX_stubs_size);
609
  assert(locs_buf != NULL, "sanity");
610
  int lsize = MAX_locs_size / 3;
611
  buf.consts()->initialize_shared_locs(&locs_buf[lsize * 0], lsize);
612
  buf.insts()->initialize_shared_locs( &locs_buf[lsize * 1], lsize);
613
  buf.stubs()->initialize_shared_locs( &locs_buf[lsize * 2], lsize);
614

615
  // Do the emission.
616

617
  Label fakeL; // Fake label for branch instructions.
618
  Label*   saveL = NULL;
619
  uint save_bnum = 0;
620
  bool is_branch = n->is_MachBranch();
621
  if (is_branch) {
622
    MacroAssembler masm(&buf);
623
    masm.bind(fakeL);
624
    n->as_MachBranch()->save_label(&saveL, &save_bnum);
625
    n->as_MachBranch()->label_set(&fakeL, 0);
626
  }
627
  n->emit(buf, this->regalloc());
628

629
  // Emitting into the scratch buffer should not fail
630
  assert (!failing(), err_msg_res("Must not have pending failure. Reason is: %s", failure_reason()));
631

632
  if (is_branch) // Restore label.
633
    n->as_MachBranch()->label_set(saveL, save_bnum);
634

635
  // End scratch_emit_size section.
636
  set_in_scratch_emit_size(false);
637

638
  return buf.insts_size();
639
}
640

641

642
// ============================================================================
643
//------------------------------Compile standard-------------------------------
644
debug_only( int Compile::_debug_idx = 100000; )
645

646
// Compile a method.  entry_bci is -1 for normal compilations and indicates
647
// the continuation bci for on stack replacement.
648

649

650
Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci,
651
                  bool subsume_loads, bool do_escape_analysis, bool eliminate_boxing )
652
                : Phase(Compiler),
653
                  _env(ci_env),
654
                  _log(ci_env->log()),
655
                  _compile_id(ci_env->compile_id()),
656
                  _save_argument_registers(false),
657
                  _stub_name(NULL),
658
                  _stub_function(NULL),
659
                  _stub_entry_point(NULL),
660
                  _method(target),
661
                  _entry_bci(osr_bci),
662
                  _initial_gvn(NULL),
663
                  _for_igvn(NULL),
664
                  _warm_calls(NULL),
665
                  _subsume_loads(subsume_loads),
666
                  _do_escape_analysis(do_escape_analysis),
667
                  _eliminate_boxing(eliminate_boxing),
668
                  _failure_reason(NULL),
669
                  _code_buffer("Compile::Fill_buffer"),
670
                  _orig_pc_slot(0),
671
                  _orig_pc_slot_offset_in_bytes(0),
672
                  _has_method_handle_invokes(false),
673
                  _mach_constant_base_node(NULL),
674
                  _node_bundling_limit(0),
675
                  _node_bundling_base(NULL),
676
                  _java_calls(0),
677
                  _inner_loops(0),
678
                  _scratch_const_size(-1),
679
                  _in_scratch_emit_size(false),
680
                  _dead_node_list(comp_arena()),
681
                  _dead_node_count(0),
682
#ifndef PRODUCT
683
                  _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
684
                  _in_dump_cnt(0),
685
                  _printer(IdealGraphPrinter::printer()),
686
#endif
687
                  _congraph(NULL),
688
                  _comp_arena(mtCompiler),
689
                  _node_arena(mtCompiler),
690
                  _old_arena(mtCompiler),
691
                  _Compile_types(mtCompiler),
692
                  _replay_inline_data(NULL),
693
                  _late_inlines(comp_arena(), 2, 0, NULL),
694
                  _string_late_inlines(comp_arena(), 2, 0, NULL),
695
                  _boxing_late_inlines(comp_arena(), 2, 0, NULL),
696
                  _late_inlines_pos(0),
697
                  _number_of_mh_late_inlines(0),
698
                  _inlining_progress(false),
699
                  _inlining_incrementally(false),
700
                  _print_inlining_list(NULL),
701
                  _print_inlining_idx(0),
702
                  _interpreter_frame_size(0),
703
                  _max_node_limit(MaxNodeLimit) {
704
  C = this;
705

706
  CompileWrapper cw(this);
707
#ifndef PRODUCT
708
  if (TimeCompiler2) {
709
    tty->print(" ");
710
    target->holder()->name()->print();
711
    tty->print(".");
712
    target->print_short_name();
713
    tty->print("  ");
714
  }
715
  TraceTime t1("Total compilation time", &_t_totalCompilation, TimeCompiler, TimeCompiler2);
716
  TraceTime t2(NULL, &_t_methodCompilation, TimeCompiler, false);
717
  bool print_opto_assembly = PrintOptoAssembly || _method->has_option("PrintOptoAssembly");
718
  if (!print_opto_assembly) {
719
    bool print_assembly = (PrintAssembly || _method->should_print_assembly());
720
    if (print_assembly && !Disassembler::can_decode()) {
721
      tty->print_cr("PrintAssembly request changed to PrintOptoAssembly");
722
      print_opto_assembly = true;
723
    }
724
  }
725
  set_print_assembly(print_opto_assembly);
726
  set_parsed_irreducible_loop(false);
727

728
  if (method()->has_option("ReplayInline")) {
729
    _replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level());
730
  }
731
#endif
732
  set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining));
733
  set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics"));
734
  set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it
735

736
  if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) {
737
    // Make sure the method being compiled gets its own MDO,
738
    // so we can at least track the decompile_count().
739
    // Need MDO to record RTM code generation state.
740
    method()->ensure_method_data();
741
  }
742

743
  Init(::AliasLevel);
744

745

746
  print_compile_messages();
747

748
  _ilt = InlineTree::build_inline_tree_root();
749

750
  // Even if NO memory addresses are used, MergeMem nodes must have at least 1 slice
751
  assert(num_alias_types() >= AliasIdxRaw, "");
752

753
#define MINIMUM_NODE_HASH  1023
754
  // Node list that Iterative GVN will start with
755
  Unique_Node_List for_igvn(comp_arena());
756
  set_for_igvn(&for_igvn);
757

758
  // GVN that will be run immediately on new nodes
759
  uint estimated_size = method()->code_size()*4+64;
760
  estimated_size = (estimated_size < MINIMUM_NODE_HASH ? MINIMUM_NODE_HASH : estimated_size);
761
  PhaseGVN gvn(node_arena(), estimated_size);
762
  set_initial_gvn(&gvn);
763

764
  if (print_inlining() || print_intrinsics()) {
765
    _print_inlining_list = new (comp_arena())GrowableArray<PrintInliningBuffer>(comp_arena(), 1, 1, PrintInliningBuffer());
766
  }
767
  { // Scope for timing the parser
768
    TracePhase t3("parse", &_t_parser, true);
769

770
    // Put top into the hash table ASAP.
771
    initial_gvn()->transform_no_reclaim(top());
772

773
    // Set up tf(), start(), and find a CallGenerator.
774
    CallGenerator* cg = NULL;
775
    if (is_osr_compilation()) {
776
      const TypeTuple *domain = StartOSRNode::osr_domain();
777
      const TypeTuple *range = TypeTuple::make_range(method()->signature());
778
      init_tf(TypeFunc::make(domain, range));
779
      StartNode* s = new (this) StartOSRNode(root(), domain);
780
      initial_gvn()->set_type_bottom(s);
781
      init_start(s);
782
      cg = CallGenerator::for_osr(method(), entry_bci());
783
    } else {
784
      // Normal case.
785
      init_tf(TypeFunc::make(method()));
786
      StartNode* s = new (this) StartNode(root(), tf()->domain());
787
      initial_gvn()->set_type_bottom(s);
788
      init_start(s);
789
      if (method()->intrinsic_id() == vmIntrinsics::_Reference_get && (UseG1GC || UseShenandoahGC)) {
790
        // With java.lang.ref.reference.get() we must go through the
791
        // intrinsic when G1 is enabled - even when get() is the root
792
        // method of the compile - so that, if necessary, the value in
793
        // the referent field of the reference object gets recorded by
794
        // the pre-barrier code.
795
        // Specifically, if G1 is enabled, the value in the referent
796
        // field is recorded by the G1 SATB pre barrier. This will
797
        // result in the referent being marked live and the reference
798
        // object removed from the list of discovered references during
799
        // reference processing.
800
        cg = find_intrinsic(method(), false);
801
      }
802
      if (cg == NULL) {
803
        float past_uses = method()->interpreter_invocation_count();
804
        float expected_uses = past_uses;
805
        cg = CallGenerator::for_inline(method(), expected_uses);
806
      }
807
    }
808
    if (failing())  return;
809
    if (cg == NULL) {
810
      record_method_not_compilable_all_tiers("cannot parse method");
811
      return;
812
    }
813
    JVMState* jvms = build_start_state(start(), tf());
814
    if ((jvms = cg->generate(jvms)) == NULL) {
815
      if (!failure_reason_is(C2Compiler::retry_class_loading_during_parsing())) {
816
        record_method_not_compilable("method parse failed");
817
      }
818
      return;
819
    }
820
    GraphKit kit(jvms);
821

822
    if (!kit.stopped()) {
823
      // Accept return values, and transfer control we know not where.
824
      // This is done by a special, unique ReturnNode bound to root.
825
      return_values(kit.jvms());
826
    }
827

828
    if (kit.has_exceptions()) {
829
      // Any exceptions that escape from this call must be rethrown
830
      // to whatever caller is dynamically above us on the stack.
831
      // This is done by a special, unique RethrowNode bound to root.
832
      rethrow_exceptions(kit.transfer_exceptions_into_jvms());
833
    }
834

835
    assert(IncrementalInline || (_late_inlines.length() == 0 && !has_mh_late_inlines()), "incremental inlining is off");
836

837
    if (_late_inlines.length() == 0 && !has_mh_late_inlines() && !failing() && has_stringbuilder()) {
838
      inline_string_calls(true);
839
    }
840

841
    if (failing())  return;
842

843
    print_method(PHASE_BEFORE_REMOVEUSELESS, 3);
844

845
    // Remove clutter produced by parsing.
846
    if (!failing()) {
847
      ResourceMark rm;
848
      PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
849
    }
850
  }
851

852
  // Note:  Large methods are capped off in do_one_bytecode().
853
  if (failing())  return;
854

855
  // After parsing, node notes are no longer automagic.
856
  // They must be propagated by register_new_node_with_optimizer(),
857
  // clone(), or the like.
858
  set_default_node_notes(NULL);
859

860
  for (;;) {
861
    int successes = Inline_Warm();
862
    if (failing())  return;
863
    if (successes == 0)  break;
864
  }
865

866
  // Drain the list.
867
  Finish_Warm();
868
#ifndef PRODUCT
869
  if (_printer) {
870
    _printer->print_inlining(this);
871
  }
872
#endif
873

874
  if (failing())  return;
875
  NOT_PRODUCT( verify_graph_edges(); )
876

877
  // Now optimize
878
  Optimize();
879
  if (failing())  return;
880
  NOT_PRODUCT( verify_graph_edges(); )
881

882
#ifndef PRODUCT
883
  if (PrintIdeal) {
884
    ttyLocker ttyl;  // keep the following output all in one block
885
    // This output goes directly to the tty, not the compiler log.
886
    // To enable tools to match it up with the compilation activity,
887
    // be sure to tag this tty output with the compile ID.
888
    if (xtty != NULL) {
889
      xtty->head("ideal compile_id='%d'%s", compile_id(),
890
                 is_osr_compilation()    ? " compile_kind='osr'" :
891
                 "");
892
    }
893
    root()->dump(9999);
894
    if (xtty != NULL) {
895
      xtty->tail("ideal");
896
    }
897
  }
898
#endif
899

900
  NOT_PRODUCT( verify_barriers(); )
901

902
  // Dump compilation data to replay it.
903
  if (method()->has_option("DumpReplay")) {
904
    env()->dump_replay_data(_compile_id);
905
  }
906
  if (method()->has_option("DumpInline") && (ilt() != NULL)) {
907
    env()->dump_inline_data(_compile_id);
908
  }
909

910
  // Now that we know the size of all the monitors we can add a fixed slot
911
  // for the original deopt pc.
912

913
  _orig_pc_slot =  fixed_slots();
914
  int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size);
915
  set_fixed_slots(next_slot);
916

917
  // Compute when to use implicit null checks. Used by matching trap based
918
  // nodes and NullCheck optimization.
919
  set_allowed_deopt_reasons();
920

921
  // Now generate code
922
  Code_Gen();
923
  if (failing())  return;
924

925
  // Check if we want to skip execution of all compiled code.
926
  {
927
#ifndef PRODUCT
928
    if (OptoNoExecute) {
929
      record_method_not_compilable("+OptoNoExecute");  // Flag as failed
930
      return;
931
    }
932
    TracePhase t2("install_code", &_t_registerMethod, TimeCompiler);
933
#endif
934

935
    if (is_osr_compilation()) {
936
      _code_offsets.set_value(CodeOffsets::Verified_Entry, 0);
937
      _code_offsets.set_value(CodeOffsets::OSR_Entry, _first_block_size);
938
    } else {
939
      _code_offsets.set_value(CodeOffsets::Verified_Entry, _first_block_size);
940
      _code_offsets.set_value(CodeOffsets::OSR_Entry, 0);
941
    }
942

943
    env()->register_method(_method, _entry_bci,
944
                           &_code_offsets,
945
                           _orig_pc_slot_offset_in_bytes,
946
                           code_buffer(),
947
                           frame_size_in_words(), _oop_map_set,
948
                           &_handler_table, &_inc_table,
949
                           compiler,
950
                           env()->comp_level(),
951
                           has_unsafe_access(),
952
                           SharedRuntime::is_wide_vector(max_vector_size()),
953
                           rtm_state()
954
                           );
955

956
    if (log() != NULL) // Print code cache state into compiler log
957
      log()->code_cache_state();
958
  }
959
}
960

961
//------------------------------Compile----------------------------------------
962
// Compile a runtime stub
963
Compile::Compile( ciEnv* ci_env,
964
                  TypeFunc_generator generator,
965
                  address stub_function,
966
                  const char *stub_name,
967
                  int is_fancy_jump,
968
                  bool pass_tls,
969
                  bool save_arg_registers,
970
                  bool return_pc )
971
  : Phase(Compiler),
972
    _env(ci_env),
973
    _log(ci_env->log()),
974
    _compile_id(0),
975
    _save_argument_registers(save_arg_registers),
976
    _method(NULL),
977
    _stub_name(stub_name),
978
    _stub_function(stub_function),
979
    _stub_entry_point(NULL),
980
    _entry_bci(InvocationEntryBci),
981
    _initial_gvn(NULL),
982
    _for_igvn(NULL),
983
    _warm_calls(NULL),
984
    _orig_pc_slot(0),
985
    _orig_pc_slot_offset_in_bytes(0),
986
    _subsume_loads(true),
987
    _do_escape_analysis(false),
988
    _eliminate_boxing(false),
989
    _failure_reason(NULL),
990
    _code_buffer("Compile::Fill_buffer"),
991
    _has_method_handle_invokes(false),
992
    _mach_constant_base_node(NULL),
993
    _node_bundling_limit(0),
994
    _node_bundling_base(NULL),
995
    _java_calls(0),
996
    _inner_loops(0),
997
#ifndef PRODUCT
998
    _trace_opto_output(TraceOptoOutput),
999
    _in_dump_cnt(0),
1000
    _printer(NULL),
1001
#endif
1002
    _comp_arena(mtCompiler),
1003
    _node_arena(mtCompiler),
1004
    _old_arena(mtCompiler),
1005
    _Compile_types(mtCompiler),
1006
    _dead_node_list(comp_arena()),
1007
    _dead_node_count(0),
1008
    _congraph(NULL),
1009
    _replay_inline_data(NULL),
1010
    _number_of_mh_late_inlines(0),
1011
    _inlining_progress(false),
1012
    _inlining_incrementally(false),
1013
    _print_inlining_list(NULL),
1014
    _print_inlining_idx(0),
1015
    _allowed_reasons(0),
1016
    _interpreter_frame_size(0),
1017
    _max_node_limit(MaxNodeLimit) {
1018
  C = this;
1019

1020
#ifndef PRODUCT
1021
  TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
1022
  TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
1023
  set_print_assembly(PrintFrameConverterAssembly);
1024
  set_parsed_irreducible_loop(false);
1025
#endif
1026
  set_has_irreducible_loop(false); // no loops
1027

1028
  CompileWrapper cw(this);
1029
  Init(/*AliasLevel=*/ 0);
1030
  init_tf((*generator)());
1031

1032
  {
1033
    // The following is a dummy for the sake of GraphKit::gen_stub
1034
    Unique_Node_List for_igvn(comp_arena());
1035
    set_for_igvn(&for_igvn);  // not used, but some GraphKit guys push on this
1036
    PhaseGVN gvn(Thread::current()->resource_area(),255);
1037
    set_initial_gvn(&gvn);    // not significant, but GraphKit guys use it pervasively
1038
    gvn.transform_no_reclaim(top());
1039

1040
    GraphKit kit;
1041
    kit.gen_stub(stub_function, stub_name, is_fancy_jump, pass_tls, return_pc);
1042
  }
1043

1044
  NOT_PRODUCT( verify_graph_edges(); )
1045
  Code_Gen();
1046
  if (failing())  return;
1047

1048

1049
  // Entry point will be accessed using compile->stub_entry_point();
1050
  if (code_buffer() == NULL) {
1051
    Matcher::soft_match_failure();
1052
  } else {
1053
    if (PrintAssembly && (WizardMode || Verbose))
1054
      tty->print_cr("### Stub::%s", stub_name);
1055

1056
    if (!failing()) {
1057
      assert(_fixed_slots == 0, "no fixed slots used for runtime stubs");
1058

1059
      // Make the NMethod
1060
      // For now we mark the frame as never safe for profile stackwalking
1061
      RuntimeStub *rs = RuntimeStub::new_runtime_stub(stub_name,
1062
                                                      code_buffer(),
1063
                                                      CodeOffsets::frame_never_safe,
1064
                                                      // _code_offsets.value(CodeOffsets::Frame_Complete),
1065
                                                      frame_size_in_words(),
1066
                                                      _oop_map_set,
1067
                                                      save_arg_registers);
1068
      assert(rs != NULL && rs->is_runtime_stub(), "sanity check");
1069

1070
      _stub_entry_point = rs->entry_point();
1071
    }
1072
  }
1073
}
1074

1075
//------------------------------Init-------------------------------------------
1076
// Prepare for a single compilation
1077
void Compile::Init(int aliaslevel) {
1078
  _unique  = 0;
1079
  _regalloc = NULL;
1080

1081
  _tf      = NULL;  // filled in later
1082
  _top     = NULL;  // cached later
1083
  _matcher = NULL;  // filled in later
1084
  _cfg     = NULL;  // filled in later
1085

1086
  set_24_bit_selection_and_mode(Use24BitFP, false);
1087

1088
  _node_note_array = NULL;
1089
  _default_node_notes = NULL;
1090

1091
  _immutable_memory = NULL; // filled in at first inquiry
1092

1093
  // Globally visible Nodes
1094
  // First set TOP to NULL to give safe behavior during creation of RootNode
1095
  set_cached_top_node(NULL);
1096
  set_root(new (this) RootNode());
1097
  // Now that you have a Root to point to, create the real TOP
1098
  set_cached_top_node( new (this) ConNode(Type::TOP) );
1099
  set_recent_alloc(NULL, NULL);
1100

1101
  // Create Debug Information Recorder to record scopes, oopmaps, etc.
1102
  env()->set_oop_recorder(new OopRecorder(env()->arena()));
1103
  env()->set_debug_info(new DebugInformationRecorder(env()->oop_recorder()));
1104
  env()->set_dependencies(new Dependencies(env()));
1105

1106
  _fixed_slots = 0;
1107
  set_has_split_ifs(false);
1108
  set_has_loops(has_method() && method()->has_loops()); // first approximation
1109
  set_has_stringbuilder(false);
1110
  set_has_boxed_value(false);
1111
  _trap_can_recompile = false;  // no traps emitted yet
1112
  _major_progress = true; // start out assuming good things will happen
1113
  set_has_unsafe_access(false);
1114
  set_max_vector_size(0);
1115
  Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
1116
  set_decompile_count(0);
1117

1118
  set_do_freq_based_layout(BlockLayoutByFrequency || method_has_option("BlockLayoutByFrequency"));
1119
  set_num_loop_opts(LoopOptsCount);
1120
  set_do_inlining(Inline);
1121
  set_max_inline_size(MaxInlineSize);
1122
  set_freq_inline_size(FreqInlineSize);
1123
  set_do_scheduling(OptoScheduling);
1124
  set_do_count_invocations(false);
1125
  set_do_method_data_update(false);
1126
  set_rtm_state(NoRTM); // No RTM lock eliding by default
1127
  method_has_option_value("MaxNodeLimit", _max_node_limit);
1128
#if INCLUDE_RTM_OPT
1129
  if (UseRTMLocking && has_method() && (method()->method_data_or_null() != NULL)) {
1130
    int rtm_state = method()->method_data()->rtm_state();
1131
    if (method_has_option("NoRTMLockEliding") || ((rtm_state & NoRTM) != 0)) {
1132
      // Don't generate RTM lock eliding code.
1133
      set_rtm_state(NoRTM);
1134
    } else if (method_has_option("UseRTMLockEliding") || ((rtm_state & UseRTM) != 0) || !UseRTMDeopt) {
1135
      // Generate RTM lock eliding code without abort ratio calculation code.
1136
      set_rtm_state(UseRTM);
1137
    } else if (UseRTMDeopt) {
1138
      // Generate RTM lock eliding code and include abort ratio calculation
1139
      // code if UseRTMDeopt is on.
1140
      set_rtm_state(ProfileRTM);
1141
    }
1142
  }
1143
#endif
1144
  if (debug_info()->recording_non_safepoints()) {
1145
    set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*>
1146
                        (comp_arena(), 8, 0, NULL));
1147
    set_default_node_notes(Node_Notes::make(this));
1148
  }
1149

1150
  // // -- Initialize types before each compile --
1151
  // // Update cached type information
1152
  // if( _method && _method->constants() )
1153
  //   Type::update_loaded_types(_method, _method->constants());
1154

1155
  // Init alias_type map.
1156
  if (!_do_escape_analysis && aliaslevel == 3)
1157
    aliaslevel = 2;  // No unique types without escape analysis
1158
  _AliasLevel = aliaslevel;
1159
  const int grow_ats = 16;
1160
  _max_alias_types = grow_ats;
1161
  _alias_types   = NEW_ARENA_ARRAY(comp_arena(), AliasType*, grow_ats);
1162
  AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType,  grow_ats);
1163
  Copy::zero_to_bytes(ats, sizeof(AliasType)*grow_ats);
1164
  {
1165
    for (int i = 0; i < grow_ats; i++)  _alias_types[i] = &ats[i];
1166
  }
1167
  // Initialize the first few types.
1168
  _alias_types[AliasIdxTop]->Init(AliasIdxTop, NULL);
1169
  _alias_types[AliasIdxBot]->Init(AliasIdxBot, TypePtr::BOTTOM);
1170
  _alias_types[AliasIdxRaw]->Init(AliasIdxRaw, TypeRawPtr::BOTTOM);
1171
  _num_alias_types = AliasIdxRaw+1;
1172
  // Zero out the alias type cache.
1173
  Copy::zero_to_bytes(_alias_cache, sizeof(_alias_cache));
1174
  // A NULL adr_type hits in the cache right away.  Preload the right answer.
1175
  probe_alias_cache(NULL)->_index = AliasIdxTop;
1176

1177
  _intrinsics = NULL;
1178
  _macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
1179
  _predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
1180
  _expensive_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
1181
  _range_check_casts = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
1182
  _shenandoah_barriers = new(comp_arena()) GrowableArray<ShenandoahLoadReferenceBarrierNode*>(comp_arena(), 8,  0, NULL);
1183
  register_library_intrinsics();
1184
#ifdef ASSERT
1185
  _type_verify_symmetry = true;
1186
#endif
1187
}
1188

1189
//---------------------------init_start----------------------------------------
1190
// Install the StartNode on this compile object.
1191
void Compile::init_start(StartNode* s) {
1192
  if (failing())
1193
    return; // already failing
1194
  assert(s == start(), "");
1195
}
1196

1197
StartNode* Compile::start() const {
1198
  assert(!failing(), "");
1199
  for (DUIterator_Fast imax, i = root()->fast_outs(imax); i < imax; i++) {
1200
    Node* start = root()->fast_out(i);
1201
    if( start->is_Start() )
1202
      return start->as_Start();
1203
  }
1204
  fatal("Did not find Start node!");
1205
  return NULL;
1206
}
1207

1208
//-------------------------------immutable_memory-------------------------------------
1209
// Access immutable memory
1210
Node* Compile::immutable_memory() {
1211
  if (_immutable_memory != NULL) {
1212
    return _immutable_memory;
1213
  }
1214
  StartNode* s = start();
1215
  for (DUIterator_Fast imax, i = s->fast_outs(imax); true; i++) {
1216
    Node *p = s->fast_out(i);
1217
    if (p != s && p->as_Proj()->_con == TypeFunc::Memory) {
1218
      _immutable_memory = p;
1219
      return _immutable_memory;
1220
    }
1221
  }
1222
  ShouldNotReachHere();
1223
  return NULL;
1224
}
1225

1226
//----------------------set_cached_top_node------------------------------------
1227
// Install the cached top node, and make sure Node::is_top works correctly.
1228
void Compile::set_cached_top_node(Node* tn) {
1229
  if (tn != NULL)  verify_top(tn);
1230
  Node* old_top = _top;
1231
  _top = tn;
1232
  // Calling Node::setup_is_top allows the nodes the chance to adjust
1233
  // their _out arrays.
1234
  if (_top != NULL)     _top->setup_is_top();
1235
  if (old_top != NULL)  old_top->setup_is_top();
1236
  assert(_top == NULL || top()->is_top(), "");
1237
}
1238

1239
#ifdef ASSERT
1240
uint Compile::count_live_nodes_by_graph_walk() {
1241
  Unique_Node_List useful(comp_arena());
1242
  // Get useful node list by walking the graph.
1243
  identify_useful_nodes(useful);
1244
  return useful.size();
1245
}
1246

1247
void Compile::print_missing_nodes() {
1248

1249
  // Return if CompileLog is NULL and PrintIdealNodeCount is false.
1250
  if ((_log == NULL) && (! PrintIdealNodeCount)) {
1251
    return;
1252
  }
1253

1254
  // This is an expensive function. It is executed only when the user
1255
  // specifies VerifyIdealNodeCount option or otherwise knows the
1256
  // additional work that needs to be done to identify reachable nodes
1257
  // by walking the flow graph and find the missing ones using
1258
  // _dead_node_list.
1259

1260
  Unique_Node_List useful(comp_arena());
1261
  // Get useful node list by walking the graph.
1262
  identify_useful_nodes(useful);
1263

1264
  uint l_nodes = C->live_nodes();
1265
  uint l_nodes_by_walk = useful.size();
1266

1267
  if (l_nodes != l_nodes_by_walk) {
1268
    if (_log != NULL) {
1269
      _log->begin_head("mismatched_nodes count='%d'", abs((int) (l_nodes - l_nodes_by_walk)));
1270
      _log->stamp();
1271
      _log->end_head();
1272
    }
1273
    VectorSet& useful_member_set = useful.member_set();
1274
    int last_idx = l_nodes_by_walk;
1275
    for (int i = 0; i < last_idx; i++) {
1276
      if (useful_member_set.test(i)) {
1277
        if (_dead_node_list.test(i)) {
1278
          if (_log != NULL) {
1279
            _log->elem("mismatched_node_info node_idx='%d' type='both live and dead'", i);
1280
          }
1281
          if (PrintIdealNodeCount) {
1282
            // Print the log message to tty
1283
              tty->print_cr("mismatched_node idx='%d' both live and dead'", i);
1284
              useful.at(i)->dump();
1285
          }
1286
        }
1287
      }
1288
      else if (! _dead_node_list.test(i)) {
1289
        if (_log != NULL) {
1290
          _log->elem("mismatched_node_info node_idx='%d' type='neither live nor dead'", i);
1291
        }
1292
        if (PrintIdealNodeCount) {
1293
          // Print the log message to tty
1294
          tty->print_cr("mismatched_node idx='%d' type='neither live nor dead'", i);
1295
        }
1296
      }
1297
    }
1298
    if (_log != NULL) {
1299
      _log->tail("mismatched_nodes");
1300
    }
1301
  }
1302
}
1303
#endif
1304

1305
#ifndef PRODUCT
1306
void Compile::verify_top(Node* tn) const {
1307
  if (tn != NULL) {
1308
    assert(tn->is_Con(), "top node must be a constant");
1309
    assert(((ConNode*)tn)->type() == Type::TOP, "top node must have correct type");
1310
    assert(tn->in(0) != NULL, "must have live top node");
1311
  }
1312
}
1313
#endif
1314

1315

1316
///-------------------Managing Per-Node Debug & Profile Info-------------------
1317

1318
void Compile::grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by) {
1319
  guarantee(arr != NULL, "");
1320
  int num_blocks = arr->length();
1321
  if (grow_by < num_blocks)  grow_by = num_blocks;
1322
  int num_notes = grow_by * _node_notes_block_size;
1323
  Node_Notes* notes = NEW_ARENA_ARRAY(node_arena(), Node_Notes, num_notes);
1324
  Copy::zero_to_bytes(notes, num_notes * sizeof(Node_Notes));
1325
  while (num_notes > 0) {
1326
    arr->append(notes);
1327
    notes     += _node_notes_block_size;
1328
    num_notes -= _node_notes_block_size;
1329
  }
1330
  assert(num_notes == 0, "exact multiple, please");
1331
}
1332

1333
bool Compile::copy_node_notes_to(Node* dest, Node* source) {
1334
  if (source == NULL || dest == NULL)  return false;
1335

1336
  if (dest->is_Con())
1337
    return false;               // Do not push debug info onto constants.
1338

1339
#ifdef ASSERT
1340
  // Leave a bread crumb trail pointing to the original node:
1341
  if (dest != NULL && dest != source && dest->debug_orig() == NULL) {
1342
    dest->set_debug_orig(source);
1343
  }
1344
#endif
1345

1346
  if (node_note_array() == NULL)
1347
    return false;               // Not collecting any notes now.
1348

1349
  // This is a copy onto a pre-existing node, which may already have notes.
1350
  // If both nodes have notes, do not overwrite any pre-existing notes.
1351
  Node_Notes* source_notes = node_notes_at(source->_idx);
1352
  if (source_notes == NULL || source_notes->is_clear())  return false;
1353
  Node_Notes* dest_notes   = node_notes_at(dest->_idx);
1354
  if (dest_notes == NULL || dest_notes->is_clear()) {
1355
    return set_node_notes_at(dest->_idx, source_notes);
1356
  }
1357

1358
  Node_Notes merged_notes = (*source_notes);
1359
  // The order of operations here ensures that dest notes will win...
1360
  merged_notes.update_from(dest_notes);
1361
  return set_node_notes_at(dest->_idx, &merged_notes);
1362
}
1363

1364

1365
//--------------------------allow_range_check_smearing-------------------------
1366
// Gating condition for coalescing similar range checks.
1367
// Sometimes we try 'speculatively' replacing a series of a range checks by a
1368
// single covering check that is at least as strong as any of them.
1369
// If the optimization succeeds, the simplified (strengthened) range check
1370
// will always succeed.  If it fails, we will deopt, and then give up
1371
// on the optimization.
1372
bool Compile::allow_range_check_smearing() const {
1373
  // If this method has already thrown a range-check,
1374
  // assume it was because we already tried range smearing
1375
  // and it failed.
1376
  uint already_trapped = trap_count(Deoptimization::Reason_range_check);
1377
  return !already_trapped;
1378
}
1379

1380

1381
//------------------------------flatten_alias_type-----------------------------
1382
const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const {
1383
  int offset = tj->offset();
1384
  TypePtr::PTR ptr = tj->ptr();
1385

1386
  // Known instance (scalarizable allocation) alias only with itself.
1387
  bool is_known_inst = tj->isa_oopptr() != NULL &&
1388
                       tj->is_oopptr()->is_known_instance();
1389

1390
  // Process weird unsafe references.
1391
  if (offset == Type::OffsetBot && (tj->isa_instptr() /*|| tj->isa_klassptr()*/)) {
1392
    assert(InlineUnsafeOps, "indeterminate pointers come only from unsafe ops");
1393
    assert(!is_known_inst, "scalarizable allocation should not have unsafe references");
1394
    tj = TypeOopPtr::BOTTOM;
1395
    ptr = tj->ptr();
1396
    offset = tj->offset();
1397
  }
1398

1399
  // Array pointers need some flattening
1400
  const TypeAryPtr *ta = tj->isa_aryptr();
1401
  if (ta && ta->is_stable()) {
1402
    // Erase stability property for alias analysis.
1403
    tj = ta = ta->cast_to_stable(false);
1404
  }
1405
  if( ta && is_known_inst ) {
1406
    if ( offset != Type::OffsetBot &&
1407
         offset > arrayOopDesc::length_offset_in_bytes() ) {
1408
      offset = Type::OffsetBot; // Flatten constant access into array body only
1409
      tj = ta = TypeAryPtr::make(ptr, ta->ary(), ta->klass(), true, offset, ta->instance_id());
1410
    }
1411
  } else if( ta && _AliasLevel >= 2 ) {
1412
    // For arrays indexed by constant indices, we flatten the alias
1413
    // space to include all of the array body.  Only the header, klass
1414
    // and array length can be accessed un-aliased.
1415
    if( offset != Type::OffsetBot ) {
1416
      if( ta->const_oop() ) { // MethodData* or Method*
1417
        offset = Type::OffsetBot;   // Flatten constant access into array body
1418
        tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),ta->ary(),ta->klass(),false,offset);
1419
      } else if( offset == arrayOopDesc::length_offset_in_bytes() ) {
1420
        // range is OK as-is.
1421
        tj = ta = TypeAryPtr::RANGE;
1422
      } else if( offset == oopDesc::klass_offset_in_bytes() ) {
1423
        tj = TypeInstPtr::KLASS; // all klass loads look alike
1424
        ta = TypeAryPtr::RANGE; // generic ignored junk
1425
        ptr = TypePtr::BotPTR;
1426
      } else if( offset == oopDesc::mark_offset_in_bytes() ) {
1427
        tj = TypeInstPtr::MARK;
1428
        ta = TypeAryPtr::RANGE; // generic ignored junk
1429
        ptr = TypePtr::BotPTR;
1430
      } else {                  // Random constant offset into array body
1431
        offset = Type::OffsetBot;   // Flatten constant access into array body
1432
        tj = ta = TypeAryPtr::make(ptr,ta->ary(),ta->klass(),false,offset);
1433
      }
1434
    }
1435
    // Arrays of fixed size alias with arrays of unknown size.
1436
    if (ta->size() != TypeInt::POS) {
1437
      const TypeAry *tary = TypeAry::make(ta->elem(), TypeInt::POS);
1438
      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,ta->klass(),false,offset);
1439
    }
1440
    // Arrays of known objects become arrays of unknown objects.
1441
    if (ta->elem()->isa_narrowoop() && ta->elem() != TypeNarrowOop::BOTTOM) {
1442
      const TypeAry *tary = TypeAry::make(TypeNarrowOop::BOTTOM, ta->size());
1443
      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);
1444
    }
1445
    if (ta->elem()->isa_oopptr() && ta->elem() != TypeInstPtr::BOTTOM) {
1446
      const TypeAry *tary = TypeAry::make(TypeInstPtr::BOTTOM, ta->size());
1447
      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);
1448
    }
1449
    // Arrays of bytes and of booleans both use 'bastore' and 'baload' so
1450
    // cannot be distinguished by bytecode alone.
1451
    if (ta->elem() == TypeInt::BOOL) {
1452
      const TypeAry *tary = TypeAry::make(TypeInt::BYTE, ta->size());
1453
      ciKlass* aklass = ciTypeArrayKlass::make(T_BYTE);
1454
      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,aklass,false,offset);
1455
    }
1456
    // During the 2nd round of IterGVN, NotNull castings are removed.
1457
    // Make sure the Bottom and NotNull variants alias the same.
1458
    // Also, make sure exact and non-exact variants alias the same.
1459
    if (ptr == TypePtr::NotNull || ta->klass_is_exact() || ta->speculative() != NULL) {
1460
      tj = ta = TypeAryPtr::make(TypePtr::BotPTR,ta->ary(),ta->klass(),false,offset);
1461
    }
1462
  }
1463

1464
  // Oop pointers need some flattening
1465
  const TypeInstPtr *to = tj->isa_instptr();
1466
  if( to && _AliasLevel >= 2 && to != TypeOopPtr::BOTTOM ) {
1467
    ciInstanceKlass *k = to->klass()->as_instance_klass();
1468
    if( ptr == TypePtr::Constant ) {
1469
      if (to->klass() != ciEnv::current()->Class_klass() ||
1470
          offset < k->size_helper() * wordSize) {
1471
        // No constant oop pointers (such as Strings); they alias with
1472
        // unknown strings.
1473
        assert(!is_known_inst, "not scalarizable allocation");
1474
        tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
1475
      }
1476
    } else if( is_known_inst ) {
1477
      tj = to; // Keep NotNull and klass_is_exact for instance type
1478
    } else if( ptr == TypePtr::NotNull || to->klass_is_exact() ) {
1479
      // During the 2nd round of IterGVN, NotNull castings are removed.
1480
      // Make sure the Bottom and NotNull variants alias the same.
1481
      // Also, make sure exact and non-exact variants alias the same.
1482
      tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
1483
    }
1484
    if (to->speculative() != NULL) {
1485
      tj = to = TypeInstPtr::make(to->ptr(),to->klass(),to->klass_is_exact(),to->const_oop(),to->offset(), to->instance_id());
1486
    }
1487
    // Canonicalize the holder of this field
1488
    if (offset >= 0 && offset < instanceOopDesc::base_offset_in_bytes()) {
1489
      // First handle header references such as a LoadKlassNode, even if the
1490
      // object's klass is unloaded at compile time (4965979).
1491
      if (!is_known_inst) { // Do it only for non-instance types
1492
        tj = to = TypeInstPtr::make(TypePtr::BotPTR, env()->Object_klass(), false, NULL, offset);
1493
      }
1494
    } else if (offset < 0 || offset >= k->size_helper() * wordSize) {
1495
      // Static fields are in the space above the normal instance
1496
      // fields in the java.lang.Class instance.
1497
      if (to->klass() != ciEnv::current()->Class_klass()) {
1498
        to = NULL;
1499
        tj = TypeOopPtr::BOTTOM;
1500
        offset = tj->offset();
1501
      }
1502
    } else {
1503
      ciInstanceKlass *canonical_holder = k->get_canonical_holder(offset);
1504
      if (!k->equals(canonical_holder) || tj->offset() != offset) {
1505
        if( is_known_inst ) {
1506
          tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, true, NULL, offset, to->instance_id());
1507
        } else {
1508
          tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, false, NULL, offset);
1509
        }
1510
      }
1511
    }
1512
  }
1513

1514
  // Klass pointers to object array klasses need some flattening
1515
  const TypeKlassPtr *tk = tj->isa_klassptr();
1516
  if( tk ) {
1517
    // If we are referencing a field within a Klass, we need
1518
    // to assume the worst case of an Object.  Both exact and
1519
    // inexact types must flatten to the same alias class so
1520
    // use NotNull as the PTR.
1521
    if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) {
1522

1523
      tj = tk = TypeKlassPtr::make(TypePtr::NotNull,
1524
                                   TypeKlassPtr::OBJECT->klass(),
1525
                                   offset);
1526
    }
1527

1528
    ciKlass* klass = tk->klass();
1529
    if( klass->is_obj_array_klass() ) {
1530
      ciKlass* k = TypeAryPtr::OOPS->klass();
1531
      if( !k || !k->is_loaded() )                  // Only fails for some -Xcomp runs
1532
        k = TypeInstPtr::BOTTOM->klass();
1533
      tj = tk = TypeKlassPtr::make( TypePtr::NotNull, k, offset );
1534
    }
1535

1536
    // Check for precise loads from the primary supertype array and force them
1537
    // to the supertype cache alias index.  Check for generic array loads from
1538
    // the primary supertype array and also force them to the supertype cache
1539
    // alias index.  Since the same load can reach both, we need to merge
1540
    // these 2 disparate memories into the same alias class.  Since the
1541
    // primary supertype array is read-only, there's no chance of confusion
1542
    // where we bypass an array load and an array store.
1543
    int primary_supers_offset = in_bytes(Klass::primary_supers_offset());
1544
    if (offset == Type::OffsetBot ||
1545
        (offset >= primary_supers_offset &&
1546
         offset < (int)(primary_supers_offset + Klass::primary_super_limit() * wordSize)) ||
1547
        offset == (int)in_bytes(Klass::secondary_super_cache_offset())) {
1548
      offset = in_bytes(Klass::secondary_super_cache_offset());
1549
      tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset );
1550
    }
1551
  }
1552

1553
  // Flatten all Raw pointers together.
1554
  if (tj->base() == Type::RawPtr)
1555
    tj = TypeRawPtr::BOTTOM;
1556

1557
  if (tj->base() == Type::AnyPtr)
1558
    tj = TypePtr::BOTTOM;      // An error, which the caller must check for.
1559

1560
  // Flatten all to bottom for now
1561
  switch( _AliasLevel ) {
1562
  case 0:
1563
    tj = TypePtr::BOTTOM;
1564
    break;
1565
  case 1:                       // Flatten to: oop, static, field or array
1566
    switch (tj->base()) {
1567
    //case Type::AryPtr: tj = TypeAryPtr::RANGE;    break;
1568
    case Type::RawPtr:   tj = TypeRawPtr::BOTTOM;   break;
1569
    case Type::AryPtr:   // do not distinguish arrays at all
1570
    case Type::InstPtr:  tj = TypeInstPtr::BOTTOM;  break;
1571
    case Type::KlassPtr: tj = TypeKlassPtr::OBJECT; break;
1572
    case Type::AnyPtr:   tj = TypePtr::BOTTOM;      break;  // caller checks it
1573
    default: ShouldNotReachHere();
1574
    }
1575
    break;
1576
  case 2:                       // No collapsing at level 2; keep all splits
1577
  case 3:                       // No collapsing at level 3; keep all splits
1578
    break;
1579
  default:
1580
    Unimplemented();
1581
  }
1582

1583
  offset = tj->offset();
1584
  assert( offset != Type::OffsetTop, "Offset has fallen from constant" );
1585

1586
  assert( (offset != Type::OffsetBot && tj->base() != Type::AryPtr) ||
1587
          (offset == Type::OffsetBot && tj->base() == Type::AryPtr) ||
1588
          (offset == Type::OffsetBot && tj == TypeOopPtr::BOTTOM) ||
1589
          (offset == Type::OffsetBot && tj == TypePtr::BOTTOM) ||
1590
          (offset == oopDesc::mark_offset_in_bytes() && tj->base() == Type::AryPtr) ||
1591
          (offset == oopDesc::klass_offset_in_bytes() && tj->base() == Type::AryPtr) ||
1592
          (offset == arrayOopDesc::length_offset_in_bytes() && tj->base() == Type::AryPtr)  ,
1593
          "For oops, klasses, raw offset must be constant; for arrays the offset is never known" );
1594
  assert( tj->ptr() != TypePtr::TopPTR &&
1595
          tj->ptr() != TypePtr::AnyNull &&
1596
          tj->ptr() != TypePtr::Null, "No imprecise addresses" );
1597
//    assert( tj->ptr() != TypePtr::Constant ||
1598
//            tj->base() == Type::RawPtr ||
1599
//            tj->base() == Type::KlassPtr, "No constant oop addresses" );
1600

1601
  return tj;
1602
}
1603

1604
void Compile::AliasType::Init(int i, const TypePtr* at) {
1605
  _index = i;
1606
  _adr_type = at;
1607
  _field = NULL;
1608
  _element = NULL;
1609
  _is_rewritable = true; // default
1610
  const TypeOopPtr *atoop = (at != NULL) ? at->isa_oopptr() : NULL;
1611
  if (atoop != NULL && atoop->is_known_instance()) {
1612
    const TypeOopPtr *gt = atoop->cast_to_instance_id(TypeOopPtr::InstanceBot);
1613
    _general_index = Compile::current()->get_alias_index(gt);
1614
  } else {
1615
    _general_index = 0;
1616
  }
1617
}
1618

1619
BasicType Compile::AliasType::basic_type() const {
1620
  if (element() != NULL) {
1621
    const Type* element = adr_type()->is_aryptr()->elem();
1622
    return element->isa_narrowoop() ? T_OBJECT : element->array_element_basic_type();
1623
  } if (field() != NULL) {
1624
    return field()->layout_type();
1625
  } else {
1626
    return T_ILLEGAL; // unknown
1627
  }
1628
}
1629

1630
//---------------------------------print_on------------------------------------
1631
#ifndef PRODUCT
1632
void Compile::AliasType::print_on(outputStream* st) {
1633
  if (index() < 10)
1634
        st->print("@ <%d> ", index());
1635
  else  st->print("@ <%d>",  index());
1636
  st->print(is_rewritable() ? "   " : " RO");
1637
  int offset = adr_type()->offset();
1638
  if (offset == Type::OffsetBot)
1639
        st->print(" +any");
1640
  else  st->print(" +%-3d", offset);
1641
  st->print(" in ");
1642
  adr_type()->dump_on(st);
1643
  const TypeOopPtr* tjp = adr_type()->isa_oopptr();
1644
  if (field() != NULL && tjp) {
1645
    if (tjp->klass()  != field()->holder() ||
1646
        tjp->offset() != field()->offset_in_bytes()) {
1647
      st->print(" != ");
1648
      field()->print();
1649
      st->print(" ***");
1650
    }
1651
  }
1652
}
1653

1654
void print_alias_types() {
1655
  Compile* C = Compile::current();
1656
  tty->print_cr("--- Alias types, AliasIdxBot .. %d", C->num_alias_types()-1);
1657
  for (int idx = Compile::AliasIdxBot; idx < C->num_alias_types(); idx++) {
1658
    C->alias_type(idx)->print_on(tty);
1659
    tty->cr();
1660
  }
1661
}
1662
#endif
1663

1664

1665
//----------------------------probe_alias_cache--------------------------------
1666
Compile::AliasCacheEntry* Compile::probe_alias_cache(const TypePtr* adr_type) {
1667
  intptr_t key = (intptr_t) adr_type;
1668
  key ^= key >> logAliasCacheSize;
1669
  return &_alias_cache[key & right_n_bits(logAliasCacheSize)];
1670
}
1671

1672

1673
//-----------------------------grow_alias_types--------------------------------
1674
void Compile::grow_alias_types() {
1675
  const int old_ats  = _max_alias_types; // how many before?
1676
  const int new_ats  = old_ats;          // how many more?
1677
  const int grow_ats = old_ats+new_ats;  // how many now?
1678
  _max_alias_types = grow_ats;
1679
  _alias_types =  REALLOC_ARENA_ARRAY(comp_arena(), AliasType*, _alias_types, old_ats, grow_ats);
1680
  AliasType* ats =    NEW_ARENA_ARRAY(comp_arena(), AliasType, new_ats);
1681
  Copy::zero_to_bytes(ats, sizeof(AliasType)*new_ats);
1682
  for (int i = 0; i < new_ats; i++)  _alias_types[old_ats+i] = &ats[i];
1683
}
1684

1685

1686
//--------------------------------find_alias_type------------------------------
1687
Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_create, ciField* original_field) {
1688
  if (_AliasLevel == 0)
1689
    return alias_type(AliasIdxBot);
1690

1691
  AliasCacheEntry* ace = probe_alias_cache(adr_type);
1692
  if (ace->_adr_type == adr_type) {
1693
    return alias_type(ace->_index);
1694
  }
1695

1696
  // Handle special cases.
1697
  if (adr_type == NULL)             return alias_type(AliasIdxTop);
1698
  if (adr_type == TypePtr::BOTTOM)  return alias_type(AliasIdxBot);
1699

1700
  // Do it the slow way.
1701
  const TypePtr* flat = flatten_alias_type(adr_type);
1702

1703
#ifdef ASSERT
1704
  {
1705
    ResourceMark rm;
1706
    assert(flat == flatten_alias_type(flat),
1707
           err_msg("not idempotent: adr_type = %s; flat = %s => %s", Type::str(adr_type),
1708
                   Type::str(flat), Type::str(flatten_alias_type(flat))));
1709
    assert(flat != TypePtr::BOTTOM,
1710
           err_msg("cannot alias-analyze an untyped ptr: adr_type = %s", Type::str(adr_type)));
1711
    if (flat->isa_oopptr() && !flat->isa_klassptr()) {
1712
      const TypeOopPtr* foop = flat->is_oopptr();
1713
      // Scalarizable allocations have exact klass always.
1714
      bool exact = !foop->klass_is_exact() || foop->is_known_instance();
1715
      const TypePtr* xoop = foop->cast_to_exactness(exact)->is_ptr();
1716
      assert(foop == flatten_alias_type(xoop),
1717
             err_msg("exactness must not affect alias type: foop = %s; xoop = %s",
1718
                     Type::str(foop), Type::str(xoop)));
1719
    }
1720
  }
1721
#endif
1722

1723
  int idx = AliasIdxTop;
1724
  for (int i = 0; i < num_alias_types(); i++) {
1725
    if (alias_type(i)->adr_type() == flat) {
1726
      idx = i;
1727
      break;
1728
    }
1729
  }
1730

1731
  if (idx == AliasIdxTop) {
1732
    if (no_create)  return NULL;
1733
    // Grow the array if necessary.
1734
    if (_num_alias_types == _max_alias_types)  grow_alias_types();
1735
    // Add a new alias type.
1736
    idx = _num_alias_types++;
1737
    _alias_types[idx]->Init(idx, flat);
1738
    if (flat == TypeInstPtr::KLASS)  alias_type(idx)->set_rewritable(false);
1739
    if (flat == TypeAryPtr::RANGE)   alias_type(idx)->set_rewritable(false);
1740
    if (flat->isa_instptr()) {
1741
      if (flat->offset() == java_lang_Class::klass_offset_in_bytes()
1742
          && flat->is_instptr()->klass() == env()->Class_klass())
1743
        alias_type(idx)->set_rewritable(false);
1744
    }
1745
    if (flat->isa_aryptr()) {
1746
#ifdef ASSERT
1747
      const int header_size_min  = arrayOopDesc::base_offset_in_bytes(T_BYTE);
1748
      // (T_BYTE has the weakest alignment and size restrictions...)
1749
      assert(flat->offset() < header_size_min, "array body reference must be OffsetBot");
1750
#endif
1751
      if (flat->offset() == TypePtr::OffsetBot) {
1752
        alias_type(idx)->set_element(flat->is_aryptr()->elem());
1753
      }
1754
    }
1755
    if (flat->isa_klassptr()) {
1756
      if (flat->offset() == in_bytes(Klass::super_check_offset_offset()))
1757
        alias_type(idx)->set_rewritable(false);
1758
      if (flat->offset() == in_bytes(Klass::modifier_flags_offset()))
1759
        alias_type(idx)->set_rewritable(false);
1760
      if (flat->offset() == in_bytes(Klass::access_flags_offset()))
1761
        alias_type(idx)->set_rewritable(false);
1762
      if (flat->offset() == in_bytes(Klass::java_mirror_offset()))
1763
        alias_type(idx)->set_rewritable(false);
1764
    }
1765
    // %%% (We would like to finalize JavaThread::threadObj_offset(),
1766
    // but the base pointer type is not distinctive enough to identify
1767
    // references into JavaThread.)
1768

1769
    // Check for final fields.
1770
    const TypeInstPtr* tinst = flat->isa_instptr();
1771
    if (tinst && tinst->offset() >= instanceOopDesc::base_offset_in_bytes()) {
1772
      ciField* field;
1773
      if (tinst->const_oop() != NULL &&
1774
          tinst->klass() == ciEnv::current()->Class_klass() &&
1775
          tinst->offset() >= (tinst->klass()->as_instance_klass()->size_helper() * wordSize)) {
1776
        // static field
1777
        ciInstanceKlass* k = tinst->const_oop()->as_instance()->java_lang_Class_klass()->as_instance_klass();
1778
        field = k->get_field_by_offset(tinst->offset(), true);
1779
      } else {
1780
        ciInstanceKlass *k = tinst->klass()->as_instance_klass();
1781
        field = k->get_field_by_offset(tinst->offset(), false);
1782
      }
1783
      assert(field == NULL ||
1784
             original_field == NULL ||
1785
             (field->holder() == original_field->holder() &&
1786
              field->offset() == original_field->offset() &&
1787
              field->is_static() == original_field->is_static()), "wrong field?");
1788
      // Set field() and is_rewritable() attributes.
1789
      if (field != NULL)  alias_type(idx)->set_field(field);
1790
    }
1791
  }
1792

1793
  // Fill the cache for next time.
1794
  ace->_adr_type = adr_type;
1795
  ace->_index    = idx;
1796
  assert(alias_type(adr_type) == alias_type(idx),  "type must be installed");
1797

1798
  // Might as well try to fill the cache for the flattened version, too.
1799
  AliasCacheEntry* face = probe_alias_cache(flat);
1800
  if (face->_adr_type == NULL) {
1801
    face->_adr_type = flat;
1802
    face->_index    = idx;
1803
    assert(alias_type(flat) == alias_type(idx), "flat type must work too");
1804
  }
1805

1806
  return alias_type(idx);
1807
}
1808

1809

1810
Compile::AliasType* Compile::alias_type(ciField* field) {
1811
  const TypeOopPtr* t;
1812
  if (field->is_static())
1813
    t = TypeInstPtr::make(field->holder()->java_mirror());
1814
  else
1815
    t = TypeOopPtr::make_from_klass_raw(field->holder());
1816
  AliasType* atp = alias_type(t->add_offset(field->offset_in_bytes()), field);
1817
  assert((field->is_final() || field->is_stable()) == !atp->is_rewritable(), "must get the rewritable bits correct");
1818
  return atp;
1819
}
1820

1821

1822
//------------------------------have_alias_type--------------------------------
1823
bool Compile::have_alias_type(const TypePtr* adr_type) {
1824
  AliasCacheEntry* ace = probe_alias_cache(adr_type);
1825
  if (ace->_adr_type == adr_type) {
1826
    return true;
1827
  }
1828

1829
  // Handle special cases.
1830
  if (adr_type == NULL)             return true;
1831
  if (adr_type == TypePtr::BOTTOM)  return true;
1832

1833
  return find_alias_type(adr_type, true, NULL) != NULL;
1834
}
1835

1836
//-----------------------------must_alias--------------------------------------
1837
// True if all values of the given address type are in the given alias category.
1838
bool Compile::must_alias(const TypePtr* adr_type, int alias_idx) {
1839
  if (alias_idx == AliasIdxBot)         return true;  // the universal category
1840
  if (adr_type == NULL)                 return true;  // NULL serves as TypePtr::TOP
1841
  if (alias_idx == AliasIdxTop)         return false; // the empty category
1842
  if (adr_type->base() == Type::AnyPtr) return false; // TypePtr::BOTTOM or its twins
1843

1844
  // the only remaining possible overlap is identity
1845
  int adr_idx = get_alias_index(adr_type);
1846
  assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
1847
  assert(adr_idx == alias_idx ||
1848
         (alias_type(alias_idx)->adr_type() != TypeOopPtr::BOTTOM
1849
          && adr_type                       != TypeOopPtr::BOTTOM),
1850
         "should not be testing for overlap with an unsafe pointer");
1851
  return adr_idx == alias_idx;
1852
}
1853

1854
//------------------------------can_alias--------------------------------------
1855
// True if any values of the given address type are in the given alias category.
1856
bool Compile::can_alias(const TypePtr* adr_type, int alias_idx) {
1857
  if (alias_idx == AliasIdxTop)         return false; // the empty category
1858
  if (adr_type == NULL)                 return false; // NULL serves as TypePtr::TOP
1859
  if (alias_idx == AliasIdxBot)         return true;  // the universal category
1860
  if (adr_type->base() == Type::AnyPtr) return true;  // TypePtr::BOTTOM or its twins
1861

1862
  // the only remaining possible overlap is identity
1863
  int adr_idx = get_alias_index(adr_type);
1864
  assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
1865
  return adr_idx == alias_idx;
1866
}
1867

1868

1869

1870
//---------------------------pop_warm_call-------------------------------------
1871
WarmCallInfo* Compile::pop_warm_call() {
1872
  WarmCallInfo* wci = _warm_calls;
1873
  if (wci != NULL)  _warm_calls = wci->remove_from(wci);
1874
  return wci;
1875
}
1876

1877
//----------------------------Inline_Warm--------------------------------------
1878
int Compile::Inline_Warm() {
1879
  // If there is room, try to inline some more warm call sites.
1880
  // %%% Do a graph index compaction pass when we think we're out of space?
1881
  if (!InlineWarmCalls)  return 0;
1882

1883
  int calls_made_hot = 0;
1884
  int room_to_grow   = NodeCountInliningCutoff - unique();
1885
  int amount_to_grow = MIN2(room_to_grow, (int)NodeCountInliningStep);
1886
  int amount_grown   = 0;
1887
  WarmCallInfo* call;
1888
  while (amount_to_grow > 0 && (call = pop_warm_call()) != NULL) {
1889
    int est_size = (int)call->size();
1890
    if (est_size > (room_to_grow - amount_grown)) {
1891
      // This one won't fit anyway.  Get rid of it.
1892
      call->make_cold();
1893
      continue;
1894
    }
1895
    call->make_hot();
1896
    calls_made_hot++;
1897
    amount_grown   += est_size;
1898
    amount_to_grow -= est_size;
1899
  }
1900

1901
  if (calls_made_hot > 0)  set_major_progress();
1902
  return calls_made_hot;
1903
}
1904

1905

1906
//----------------------------Finish_Warm--------------------------------------
1907
void Compile::Finish_Warm() {
1908
  if (!InlineWarmCalls)  return;
1909
  if (failing())  return;
1910
  if (warm_calls() == NULL)  return;
1911

1912
  // Clean up loose ends, if we are out of space for inlining.
1913
  WarmCallInfo* call;
1914
  while ((call = pop_warm_call()) != NULL) {
1915
    call->make_cold();
1916
  }
1917
}
1918

1919
//---------------------cleanup_loop_predicates-----------------------
1920
// Remove the opaque nodes that protect the predicates so that all unused
1921
// checks and uncommon_traps will be eliminated from the ideal graph
1922
void Compile::cleanup_loop_predicates(PhaseIterGVN &igvn) {
1923
  if (predicate_count()==0) return;
1924
  for (int i = predicate_count(); i > 0; i--) {
1925
    Node * n = predicate_opaque1_node(i-1);
1926
    assert(n->Opcode() == Op_Opaque1, "must be");
1927
    igvn.replace_node(n, n->in(1));
1928
  }
1929
  assert(predicate_count()==0, "should be clean!");
1930
}
1931

1932
void Compile::add_range_check_cast(Node* n) {
1933
  assert(n->isa_CastII()->has_range_check(), "CastII should have range check dependency");
1934
  assert(!_range_check_casts->contains(n), "duplicate entry in range check casts");
1935
  _range_check_casts->append(n);
1936
}
1937

1938
// Remove all range check dependent CastIINodes.
1939
void Compile::remove_range_check_casts(PhaseIterGVN &igvn) {
1940
  for (int i = range_check_cast_count(); i > 0; i--) {
1941
    Node* cast = range_check_cast_node(i-1);
1942
    assert(cast->isa_CastII()->has_range_check(), "CastII should have range check dependency");
1943
    igvn.replace_node(cast, cast->in(1));
1944
  }
1945
  assert(range_check_cast_count() == 0, "should be empty");
1946
}
1947

1948
// StringOpts and late inlining of string methods
1949
void Compile::inline_string_calls(bool parse_time) {
1950
  {
1951
    // remove useless nodes to make the usage analysis simpler
1952
    ResourceMark rm;
1953
    PhaseRemoveUseless pru(initial_gvn(), for_igvn());
1954
  }
1955

1956
  {
1957
    ResourceMark rm;
1958
    print_method(PHASE_BEFORE_STRINGOPTS, 3);
1959
    PhaseStringOpts pso(initial_gvn(), for_igvn());
1960
    print_method(PHASE_AFTER_STRINGOPTS, 3);
1961
  }
1962

1963
  // now inline anything that we skipped the first time around
1964
  if (!parse_time) {
1965
    _late_inlines_pos = _late_inlines.length();
1966
  }
1967

1968
  while (_string_late_inlines.length() > 0) {
1969
    CallGenerator* cg = _string_late_inlines.pop();
1970
    cg->do_late_inline();
1971
    if (failing())  return;
1972
  }
1973
  _string_late_inlines.trunc_to(0);
1974
}
1975

1976
// Late inlining of boxing methods
1977
void Compile::inline_boxing_calls(PhaseIterGVN& igvn) {
1978
  if (_boxing_late_inlines.length() > 0) {
1979
    assert(has_boxed_value(), "inconsistent");
1980

1981
    PhaseGVN* gvn = initial_gvn();
1982
    set_inlining_incrementally(true);
1983

1984
    assert( igvn._worklist.size() == 0, "should be done with igvn" );
1985
    for_igvn()->clear();
1986
    gvn->replace_with(&igvn);
1987

1988
    _late_inlines_pos = _late_inlines.length();
1989

1990
    while (_boxing_late_inlines.length() > 0) {
1991
      CallGenerator* cg = _boxing_late_inlines.pop();
1992
      cg->do_late_inline();
1993
      if (failing())  return;
1994
    }
1995
    _boxing_late_inlines.trunc_to(0);
1996

1997
    {
1998
      ResourceMark rm;
1999
      PhaseRemoveUseless pru(gvn, for_igvn());
2000
    }
2001

2002
    igvn = PhaseIterGVN(gvn);
2003
    igvn.optimize();
2004

2005
    set_inlining_progress(false);
2006
    set_inlining_incrementally(false);
2007
  }
2008
}
2009

2010
void Compile::inline_incrementally_one(PhaseIterGVN& igvn) {
2011
  assert(IncrementalInline, "incremental inlining should be on");
2012
  PhaseGVN* gvn = initial_gvn();
2013

2014
  set_inlining_progress(false);
2015
  for_igvn()->clear();
2016
  gvn->replace_with(&igvn);
2017

2018
  int i = 0;
2019

2020
  for (; i <_late_inlines.length() && !inlining_progress(); i++) {
2021
    CallGenerator* cg = _late_inlines.at(i);
2022
    _late_inlines_pos = i+1;
2023
    cg->do_late_inline();
2024
    if (failing())  return;
2025
  }
2026
  int j = 0;
2027
  for (; i < _late_inlines.length(); i++, j++) {
2028
    _late_inlines.at_put(j, _late_inlines.at(i));
2029
  }
2030
  _late_inlines.trunc_to(j);
2031

2032
  {
2033
    ResourceMark rm;
2034
    PhaseRemoveUseless pru(gvn, for_igvn());
2035
  }
2036

2037
  igvn = PhaseIterGVN(gvn);
2038
}
2039

2040
// Perform incremental inlining until bound on number of live nodes is reached
2041
void Compile::inline_incrementally(PhaseIterGVN& igvn) {
2042
  PhaseGVN* gvn = initial_gvn();
2043

2044
  set_inlining_incrementally(true);
2045
  set_inlining_progress(true);
2046
  uint low_live_nodes = 0;
2047

2048
  while(inlining_progress() && _late_inlines.length() > 0) {
2049

2050
    if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
2051
      if (low_live_nodes < (uint)LiveNodeCountInliningCutoff * 8 / 10) {
2052
        // PhaseIdealLoop is expensive so we only try it once we are
2053
        // out of live nodes and we only try it again if the previous
2054
        // helped got the number of nodes down significantly
2055
        PhaseIdealLoop ideal_loop( igvn, false, true );
2056
        if (failing())  return;
2057
        low_live_nodes = live_nodes();
2058
        _major_progress = true;
2059
      }
2060

2061
      if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
2062
        break;
2063
      }
2064
    }
2065

2066
    inline_incrementally_one(igvn);
2067

2068
    if (failing())  return;
2069

2070
    igvn.optimize();
2071

2072
    if (failing())  return;
2073
  }
2074

2075
  assert( igvn._worklist.size() == 0, "should be done with igvn" );
2076

2077
  if (_string_late_inlines.length() > 0) {
2078
    assert(has_stringbuilder(), "inconsistent");
2079
    for_igvn()->clear();
2080
    initial_gvn()->replace_with(&igvn);
2081

2082
    inline_string_calls(false);
2083

2084
    if (failing())  return;
2085

2086
    {
2087
      ResourceMark rm;
2088
      PhaseRemoveUseless pru(initial_gvn(), for_igvn());
2089
    }
2090

2091
    igvn = PhaseIterGVN(gvn);
2092

2093
    igvn.optimize();
2094
  }
2095

2096
  set_inlining_incrementally(false);
2097
}
2098

2099

2100
// Remove edges from "root" to each SafePoint at a backward branch.
2101
// They were inserted during parsing (see add_safepoint()) to make
2102
// infinite loops without calls or exceptions visible to root, i.e.,
2103
// useful.
2104
void Compile::remove_root_to_sfpts_edges(PhaseIterGVN& igvn) {
2105
  Node *r = root();
2106
  if (r != NULL) {
2107
    for (uint i = r->req(); i < r->len(); ++i) {
2108
      Node *n = r->in(i);
2109
      if (n != NULL && n->is_SafePoint()) {
2110
        r->rm_prec(i);
2111
        if (n->outcnt() == 0) {
2112
          igvn.remove_dead_node(n);
2113
        }
2114
        --i;
2115
      }
2116
    }
2117
  }
2118
}
2119

2120
//------------------------------Optimize---------------------------------------
2121
// Given a graph, optimize it.
2122
void Compile::Optimize() {
2123
  TracePhase t1("optimizer", &_t_optimizer, true);
2124

2125
#ifndef PRODUCT
2126
  if (env()->break_at_compile()) {
2127
    BREAKPOINT;
2128
  }
2129

2130
#endif
2131

2132
  ResourceMark rm;
2133
  int          loop_opts_cnt;
2134

2135
  NOT_PRODUCT( verify_graph_edges(); )
2136

2137
  print_method(PHASE_AFTER_PARSING);
2138

2139
 {
2140
  // Iterative Global Value Numbering, including ideal transforms
2141
  // Initialize IterGVN with types and values from parse-time GVN
2142
  PhaseIterGVN igvn(initial_gvn());
2143
  {
2144
    NOT_PRODUCT( TracePhase t2("iterGVN", &_t_iterGVN, TimeCompiler); )
2145
    igvn.optimize();
2146
  }
2147

2148
  print_method(PHASE_ITER_GVN1, 2);
2149

2150
  if (failing())  return;
2151

2152
  {
2153
    NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )
2154
    inline_incrementally(igvn);
2155
  }
2156

2157
  print_method(PHASE_INCREMENTAL_INLINE, 2);
2158

2159
  if (failing())  return;
2160

2161
  if (eliminate_boxing()) {
2162
    NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )
2163
    // Inline valueOf() methods now.
2164
    inline_boxing_calls(igvn);
2165

2166
    if (AlwaysIncrementalInline) {
2167
      inline_incrementally(igvn);
2168
    }
2169

2170
    print_method(PHASE_INCREMENTAL_BOXING_INLINE, 2);
2171

2172
    if (failing())  return;
2173
  }
2174

2175
  // Now that all inlining is over, cut edge from root to loop
2176
  // safepoints
2177
  remove_root_to_sfpts_edges(igvn);
2178

2179
  // Remove the speculative part of types and clean up the graph from
2180
  // the extra CastPP nodes whose only purpose is to carry them. Do
2181
  // that early so that optimizations are not disrupted by the extra
2182
  // CastPP nodes.
2183
  remove_speculative_types(igvn);
2184

2185
  // No more new expensive nodes will be added to the list from here
2186
  // so keep only the actual candidates for optimizations.
2187
  cleanup_expensive_nodes(igvn);
2188

2189
  if (!failing() && RenumberLiveNodes && live_nodes() + NodeLimitFudgeFactor < unique()) {
2190
    NOT_PRODUCT(Compile::TracePhase t2("", &_t_renumberLive, TimeCompiler);)
2191
    initial_gvn()->replace_with(&igvn);
2192
    for_igvn()->clear();
2193
    Unique_Node_List new_worklist(C->comp_arena());
2194
    {
2195
      ResourceMark rm;
2196
      PhaseRenumberLive prl = PhaseRenumberLive(initial_gvn(), for_igvn(), &new_worklist);
2197
    }
2198
    set_for_igvn(&new_worklist);
2199
    igvn = PhaseIterGVN(initial_gvn());
2200
    igvn.optimize();
2201
  }
2202

2203
  // Perform escape analysis
2204
  if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
2205
    if (has_loops()) {
2206
      // Cleanup graph (remove dead nodes).
2207
      TracePhase t2("idealLoop", &_t_idealLoop, true);
2208
      PhaseIdealLoop ideal_loop( igvn, false, true );
2209
      if (major_progress()) print_method(PHASE_PHASEIDEAL_BEFORE_EA, 2);
2210
      if (failing())  return;
2211
    }
2212
    ConnectionGraph::do_analysis(this, &igvn);
2213

2214
    if (failing())  return;
2215

2216
    // Optimize out fields loads from scalar replaceable allocations.
2217
    igvn.optimize();
2218
    print_method(PHASE_ITER_GVN_AFTER_EA, 2);
2219

2220
    if (failing())  return;
2221

2222
    if (congraph() != NULL && macro_count() > 0) {
2223
      NOT_PRODUCT( TracePhase t2("macroEliminate", &_t_macroEliminate, TimeCompiler); )
2224
      PhaseMacroExpand mexp(igvn);
2225
      mexp.eliminate_macro_nodes();
2226
      igvn.set_delay_transform(false);
2227

2228
      igvn.optimize();
2229
      print_method(PHASE_ITER_GVN_AFTER_ELIMINATION, 2);
2230

2231
      if (failing())  return;
2232
    }
2233
  }
2234

2235
  // Loop transforms on the ideal graph.  Range Check Elimination,
2236
  // peeling, unrolling, etc.
2237

2238
  // Set loop opts counter
2239
  loop_opts_cnt = num_loop_opts();
2240
  if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {
2241
    {
2242
      TracePhase t2("idealLoop", &_t_idealLoop, true);
2243
      PhaseIdealLoop ideal_loop( igvn, true );
2244
      loop_opts_cnt--;
2245
      if (major_progress()) print_method(PHASE_PHASEIDEALLOOP1, 2);
2246
      if (failing())  return;
2247
    }
2248
    // Loop opts pass if partial peeling occurred in previous pass
2249
    if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {
2250
      TracePhase t3("idealLoop", &_t_idealLoop, true);
2251
      PhaseIdealLoop ideal_loop( igvn, false );
2252
      loop_opts_cnt--;
2253
      if (major_progress()) print_method(PHASE_PHASEIDEALLOOP2, 2);
2254
      if (failing())  return;
2255
    }
2256
    // Loop opts pass for loop-unrolling before CCP
2257
    if(major_progress() && (loop_opts_cnt > 0)) {
2258
      TracePhase t4("idealLoop", &_t_idealLoop, true);
2259
      PhaseIdealLoop ideal_loop( igvn, false );
2260
      loop_opts_cnt--;
2261
      if (major_progress()) print_method(PHASE_PHASEIDEALLOOP3, 2);
2262
    }
2263
    if (!failing()) {
2264
      // Verify that last round of loop opts produced a valid graph
2265
      NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
2266
      PhaseIdealLoop::verify(igvn);
2267
    }
2268
  }
2269
  if (failing())  return;
2270

2271
  // Conditional Constant Propagation;
2272
  PhaseCCP ccp( &igvn );
2273
  assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)");
2274
  {
2275
    TracePhase t2("ccp", &_t_ccp, true);
2276
    ccp.do_transform();
2277
  }
2278
  print_method(PHASE_CPP1, 2);
2279

2280
  assert( true, "Break here to ccp.dump_old2new_map()");
2281

2282
  // Iterative Global Value Numbering, including ideal transforms
2283
  {
2284
    NOT_PRODUCT( TracePhase t2("iterGVN2", &_t_iterGVN2, TimeCompiler); )
2285
    igvn = ccp;
2286
    igvn.optimize();
2287
  }
2288

2289
  print_method(PHASE_ITER_GVN2, 2);
2290

2291
  if (failing())  return;
2292

2293
  // Loop transforms on the ideal graph.  Range Check Elimination,
2294
  // peeling, unrolling, etc.
2295
  if(loop_opts_cnt > 0) {
2296
    debug_only( int cnt = 0; );
2297
    while(major_progress() && (loop_opts_cnt > 0)) {
2298
      TracePhase t2("idealLoop", &_t_idealLoop, true);
2299
      assert( cnt++ < 40, "infinite cycle in loop optimization" );
2300
      PhaseIdealLoop ideal_loop( igvn, true);
2301
      loop_opts_cnt--;
2302
      if (major_progress()) print_method(PHASE_PHASEIDEALLOOP_ITERATIONS, 2);
2303
      if (failing())  return;
2304
    }
2305
  }
2306

2307
  {
2308
    // Verify that all previous optimizations produced a valid graph
2309
    // at least to this point, even if no loop optimizations were done.
2310
    NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
2311
    PhaseIdealLoop::verify(igvn);
2312
  }
2313

2314
  if (range_check_cast_count() > 0) {
2315
    // No more loop optimizations. Remove all range check dependent CastIINodes.
2316
    C->remove_range_check_casts(igvn);
2317
    igvn.optimize();
2318
  }
2319

2320
#ifdef ASSERT
2321
  if (UseShenandoahGC && ShenandoahVerifyOptoBarriers) {
2322
    ShenandoahBarrierC2Support::verify(C->root());
2323
  }
2324
#endif
2325

2326
  {
2327
    NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); )
2328
    PhaseMacroExpand  mex(igvn);
2329
    if (mex.expand_macro_nodes()) {
2330
      assert(failing(), "must bail out w/ explicit message");
2331
      return;
2332
    }
2333
  }
2334

2335
#if INCLUDE_ALL_GCS
2336
  if (UseShenandoahGC) {
2337
    ShenandoahBarrierC2Support::expand(this, igvn);
2338
  }
2339
#endif
2340

2341
 } // (End scope of igvn; run destructor if necessary for asserts.)
2342

2343
  dump_inlining();
2344
  // A method with only infinite loops has no edges entering loops from root
2345
  {
2346
    NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); )
2347
    if (final_graph_reshaping()) {
2348
      assert(failing(), "must bail out w/ explicit message");
2349
      return;
2350
    }
2351
  }
2352

2353
  print_method(PHASE_OPTIMIZE_FINISHED, 2);
2354
}
2355

2356

2357
//------------------------------Code_Gen---------------------------------------
2358
// Given a graph, generate code for it
2359
void Compile::Code_Gen() {
2360
  if (failing()) {
2361
    return;
2362
  }
2363

2364
  // Perform instruction selection.  You might think we could reclaim Matcher
2365
  // memory PDQ, but actually the Matcher is used in generating spill code.
2366
  // Internals of the Matcher (including some VectorSets) must remain live
2367
  // for awhile - thus I cannot reclaim Matcher memory lest a VectorSet usage
2368
  // set a bit in reclaimed memory.
2369

2370
  // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
2371
  // nodes.  Mapping is only valid at the root of each matched subtree.
2372
  NOT_PRODUCT( verify_graph_edges(); )
2373

2374
  Matcher matcher;
2375
  _matcher = &matcher;
2376
  {
2377
    TracePhase t2("matcher", &_t_matcher, true);
2378
    matcher.match();
2379
  }
2380
  // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
2381
  // nodes.  Mapping is only valid at the root of each matched subtree.
2382
  NOT_PRODUCT( verify_graph_edges(); )
2383

2384
  // If you have too many nodes, or if matching has failed, bail out
2385
  check_node_count(0, "out of nodes matching instructions");
2386
  if (failing()) {
2387
    return;
2388
  }
2389

2390
  // Build a proper-looking CFG
2391
  PhaseCFG cfg(node_arena(), root(), matcher);
2392
  _cfg = &cfg;
2393
  {
2394
    NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )
2395
    bool success = cfg.do_global_code_motion();
2396
    if (!success) {
2397
      return;
2398
    }
2399

2400
    print_method(PHASE_GLOBAL_CODE_MOTION, 2);
2401
    NOT_PRODUCT( verify_graph_edges(); )
2402
    debug_only( cfg.verify(); )
2403
  }
2404

2405
  PhaseChaitin regalloc(unique(), cfg, matcher);
2406
  _regalloc = &regalloc;
2407
  {
2408
    TracePhase t2("regalloc", &_t_registerAllocation, true);
2409
    // Perform register allocation.  After Chaitin, use-def chains are
2410
    // no longer accurate (at spill code) and so must be ignored.
2411
    // Node->LRG->reg mappings are still accurate.
2412
    _regalloc->Register_Allocate();
2413

2414
    // Bail out if the allocator builds too many nodes
2415
    if (failing()) {
2416
      return;
2417
    }
2418
  }
2419

2420
  // Prior to register allocation we kept empty basic blocks in case the
2421
  // the allocator needed a place to spill.  After register allocation we
2422
  // are not adding any new instructions.  If any basic block is empty, we
2423
  // can now safely remove it.
2424
  {
2425
    NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )
2426
    cfg.remove_empty_blocks();
2427
    if (do_freq_based_layout()) {
2428
      PhaseBlockLayout layout(cfg);
2429
    } else {
2430
      cfg.set_loop_alignment();
2431
    }
2432
    cfg.fixup_flow();
2433
  }
2434

2435
  // Apply peephole optimizations
2436
  if( OptoPeephole ) {
2437
    NOT_PRODUCT( TracePhase t2("peephole", &_t_peephole, TimeCompiler); )
2438
    PhasePeephole peep( _regalloc, cfg);
2439
    peep.do_transform();
2440
  }
2441

2442
  // Do late expand if CPU requires this.
2443
  if (Matcher::require_postalloc_expand) {
2444
    NOT_PRODUCT(TracePhase t2c("postalloc_expand", &_t_postalloc_expand, true));
2445
    cfg.postalloc_expand(_regalloc);
2446
  }
2447

2448
  // Convert Nodes to instruction bits in a buffer
2449
  {
2450
    // %%%% workspace merge brought two timers together for one job
2451
    TracePhase t2a("output", &_t_output, true);
2452
    NOT_PRODUCT( TraceTime t2b(NULL, &_t_codeGeneration, TimeCompiler, false); )
2453
    Output();
2454
  }
2455

2456
  print_method(PHASE_FINAL_CODE);
2457

2458
  // He's dead, Jim.
2459
  _cfg     = (PhaseCFG*)((intptr_t)0xdeadbeef);
2460
  _regalloc = (PhaseChaitin*)((intptr_t)0xdeadbeef);
2461
}
2462

2463

2464
//------------------------------dump_asm---------------------------------------
2465
// Dump formatted assembly
2466
#ifndef PRODUCT
2467
void Compile::dump_asm(int *pcs, uint pc_limit) {
2468
  bool cut_short = false;
2469
  tty->print_cr("#");
2470
  tty->print("#  ");  _tf->dump();  tty->cr();
2471
  tty->print_cr("#");
2472

2473
  // For all blocks
2474
  int pc = 0x0;                 // Program counter
2475
  char starts_bundle = ' ';
2476
  _regalloc->dump_frame();
2477

2478
  Node *n = NULL;
2479
  for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
2480
    if (VMThread::should_terminate()) {
2481
      cut_short = true;
2482
      break;
2483
    }
2484
    Block* block = _cfg->get_block(i);
2485
    if (block->is_connector() && !Verbose) {
2486
      continue;
2487
    }
2488
    n = block->head();
2489
    if (pcs && n->_idx < pc_limit) {
2490
      tty->print("%3.3x   ", pcs[n->_idx]);
2491
    } else {
2492
      tty->print("      ");
2493
    }
2494
    block->dump_head(_cfg);
2495
    if (block->is_connector()) {
2496
      tty->print_cr("        # Empty connector block");
2497
    } else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
2498
      tty->print_cr("        # Block is sole successor of call");
2499
    }
2500

2501
    // For all instructions
2502
    Node *delay = NULL;
2503
    for (uint j = 0; j < block->number_of_nodes(); j++) {
2504
      if (VMThread::should_terminate()) {
2505
        cut_short = true;
2506
        break;
2507
      }
2508
      n = block->get_node(j);
2509
      if (valid_bundle_info(n)) {
2510
        Bundle* bundle = node_bundling(n);
2511
        if (bundle->used_in_unconditional_delay()) {
2512
          delay = n;
2513
          continue;
2514
        }
2515
        if (bundle->starts_bundle()) {
2516
          starts_bundle = '+';
2517
        }
2518
      }
2519

2520
      if (WizardMode) {
2521
        n->dump();
2522
      }
2523

2524
      if( !n->is_Region() &&    // Dont print in the Assembly
2525
          !n->is_Phi() &&       // a few noisely useless nodes
2526
          !n->is_Proj() &&
2527
          !n->is_MachTemp() &&
2528
          !n->is_SafePointScalarObject() &&
2529
          !n->is_Catch() &&     // Would be nice to print exception table targets
2530
          !n->is_MergeMem() &&  // Not very interesting
2531
          !n->is_top() &&       // Debug info table constants
2532
          !(n->is_Con() && !n->is_Mach())// Debug info table constants
2533
          ) {
2534
        if (pcs && n->_idx < pc_limit)
2535
          tty->print("%3.3x", pcs[n->_idx]);
2536
        else
2537
          tty->print("   ");
2538
        tty->print(" %c ", starts_bundle);
2539
        starts_bundle = ' ';
2540
        tty->print("\t");
2541
        n->format(_regalloc, tty);
2542
        tty->cr();
2543
      }
2544

2545
      // If we have an instruction with a delay slot, and have seen a delay,
2546
      // then back up and print it
2547
      if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
2548
        assert(delay != NULL, "no unconditional delay instruction");
2549
        if (WizardMode) delay->dump();
2550

2551
        if (node_bundling(delay)->starts_bundle())
2552
          starts_bundle = '+';
2553
        if (pcs && n->_idx < pc_limit)
2554
          tty->print("%3.3x", pcs[n->_idx]);
2555
        else
2556
          tty->print("   ");
2557
        tty->print(" %c ", starts_bundle);
2558
        starts_bundle = ' ';
2559
        tty->print("\t");
2560
        delay->format(_regalloc, tty);
2561
        tty->cr();
2562
        delay = NULL;
2563
      }
2564

2565
      // Dump the exception table as well
2566
      if( n->is_Catch() && (Verbose || WizardMode) ) {
2567
        // Print the exception table for this offset
2568
        _handler_table.print_subtable_for(pc);
2569
      }
2570
    }
2571

2572
    if (pcs && n->_idx < pc_limit)
2573
      tty->print_cr("%3.3x", pcs[n->_idx]);
2574
    else
2575
      tty->cr();
2576

2577
    assert(cut_short || delay == NULL, "no unconditional delay branch");
2578

2579
  } // End of per-block dump
2580
  tty->cr();
2581

2582
  if (cut_short)  tty->print_cr("*** disassembly is cut short ***");
2583
}
2584
#endif
2585

2586
//------------------------------Final_Reshape_Counts---------------------------
2587
// This class defines counters to help identify when a method
2588
// may/must be executed using hardware with only 24-bit precision.
2589
struct Final_Reshape_Counts : public StackObj {
2590
  int  _call_count;             // count non-inlined 'common' calls
2591
  int  _float_count;            // count float ops requiring 24-bit precision
2592
  int  _double_count;           // count double ops requiring more precision
2593
  int  _java_call_count;        // count non-inlined 'java' calls
2594
  int  _inner_loop_count;       // count loops which need alignment
2595
  VectorSet _visited;           // Visitation flags
2596
  Node_List _tests;             // Set of IfNodes & PCTableNodes
2597

2598
  Final_Reshape_Counts() :
2599
    _call_count(0), _float_count(0), _double_count(0),
2600
    _java_call_count(0), _inner_loop_count(0),
2601
    _visited( Thread::current()->resource_area() ) { }
2602

2603
  void inc_call_count  () { _call_count  ++; }
2604
  void inc_float_count () { _float_count ++; }
2605
  void inc_double_count() { _double_count++; }
2606
  void inc_java_call_count() { _java_call_count++; }
2607
  void inc_inner_loop_count() { _inner_loop_count++; }
2608

2609
  int  get_call_count  () const { return _call_count  ; }
2610
  int  get_float_count () const { return _float_count ; }
2611
  int  get_double_count() const { return _double_count; }
2612
  int  get_java_call_count() const { return _java_call_count; }
2613
  int  get_inner_loop_count() const { return _inner_loop_count; }
2614
};
2615

2616
#ifdef ASSERT
2617
static bool oop_offset_is_sane(const TypeInstPtr* tp) {
2618
  ciInstanceKlass *k = tp->klass()->as_instance_klass();
2619
  // Make sure the offset goes inside the instance layout.
2620
  return k->contains_field_offset(tp->offset());
2621
  // Note that OffsetBot and OffsetTop are very negative.
2622
}
2623
#endif
2624

2625
// Eliminate trivially redundant StoreCMs and accumulate their
2626
// precedence edges.
2627
void Compile::eliminate_redundant_card_marks(Node* n) {
2628
  assert(n->Opcode() == Op_StoreCM, "expected StoreCM");
2629
  if (n->in(MemNode::Address)->outcnt() > 1) {
2630
    // There are multiple users of the same address so it might be
2631
    // possible to eliminate some of the StoreCMs
2632
    Node* mem = n->in(MemNode::Memory);
2633
    Node* adr = n->in(MemNode::Address);
2634
    Node* val = n->in(MemNode::ValueIn);
2635
    Node* prev = n;
2636
    bool done = false;
2637
    // Walk the chain of StoreCMs eliminating ones that match.  As
2638
    // long as it's a chain of single users then the optimization is
2639
    // safe.  Eliminating partially redundant StoreCMs would require
2640
    // cloning copies down the other paths.
2641
    while (mem->Opcode() == Op_StoreCM && mem->outcnt() == 1 && !done) {
2642
      if (adr == mem->in(MemNode::Address) &&
2643
          val == mem->in(MemNode::ValueIn)) {
2644
        // redundant StoreCM
2645
        if (mem->req() > MemNode::OopStore) {
2646
          // Hasn't been processed by this code yet.
2647
          n->add_prec(mem->in(MemNode::OopStore));
2648
        } else {
2649
          // Already converted to precedence edge
2650
          for (uint i = mem->req(); i < mem->len(); i++) {
2651
            // Accumulate any precedence edges
2652
            if (mem->in(i) != NULL) {
2653
              n->add_prec(mem->in(i));
2654
            }
2655
          }
2656
          // Everything above this point has been processed.
2657
          done = true;
2658
        }
2659
        // Eliminate the previous StoreCM
2660
        prev->set_req(MemNode::Memory, mem->in(MemNode::Memory));
2661
        assert(mem->outcnt() == 0, "should be dead");
2662
        mem->disconnect_inputs(NULL, this);
2663
      } else {
2664
        prev = mem;
2665
      }
2666
      mem = prev->in(MemNode::Memory);
2667
    }
2668
  }
2669
}
2670

2671
//------------------------------final_graph_reshaping_impl----------------------
2672
// Implement items 1-5 from final_graph_reshaping below.
2673
void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
2674

2675
  if ( n->outcnt() == 0 ) return; // dead node
2676
  uint nop = n->Opcode();
2677

2678
  // Check for 2-input instruction with "last use" on right input.
2679
  // Swap to left input.  Implements item (2).
2680
  if( n->req() == 3 &&          // two-input instruction
2681
      n->in(1)->outcnt() > 1 && // left use is NOT a last use
2682
      (!n->in(1)->is_Phi() || n->in(1)->in(2) != n) && // it is not data loop
2683
      n->in(2)->outcnt() == 1 &&// right use IS a last use
2684
      !n->in(2)->is_Con() ) {   // right use is not a constant
2685
    // Check for commutative opcode
2686
    switch( nop ) {
2687
    case Op_AddI:  case Op_AddF:  case Op_AddD:  case Op_AddL:
2688
    case Op_MaxI:  case Op_MinI:
2689
    case Op_MulI:  case Op_MulF:  case Op_MulD:  case Op_MulL:
2690
    case Op_AndL:  case Op_XorL:  case Op_OrL:
2691
    case Op_AndI:  case Op_XorI:  case Op_OrI: {
2692
      // Move "last use" input to left by swapping inputs
2693
      n->swap_edges(1, 2);
2694
      break;
2695
    }
2696
    default:
2697
      break;
2698
    }
2699
  }
2700

2701
#ifdef ASSERT
2702
  if( n->is_Mem() ) {
2703
    int alias_idx = get_alias_index(n->as_Mem()->adr_type());
2704
    assert( n->in(0) != NULL || alias_idx != Compile::AliasIdxRaw ||
2705
            // oop will be recorded in oop map if load crosses safepoint
2706
            n->is_Load() && (n->as_Load()->bottom_type()->isa_oopptr() ||
2707
                             LoadNode::is_immutable_value(n->in(MemNode::Address))),
2708
            "raw memory operations should have control edge");
2709
  }
2710
  if (n->is_MemBar()) {
2711
    MemBarNode* mb = n->as_MemBar();
2712
    if (mb->trailing_store() || mb->trailing_load_store()) {
2713
      assert(mb->leading_membar()->trailing_membar() == mb, "bad membar pair");
2714
      Node* mem = mb->in(MemBarNode::Precedent);
2715
      assert((mb->trailing_store() && mem->is_Store() && mem->as_Store()->is_release()) ||
2716
             (mb->trailing_load_store() && mem->is_LoadStore()), "missing mem op");
2717
    } else if (mb->leading()) {
2718
      assert(mb->trailing_membar()->leading_membar() == mb, "bad membar pair");
2719
    }
2720
  }
2721
#endif
2722
  // Count FPU ops and common calls, implements item (3)
2723
  switch( nop ) {
2724
  // Count all float operations that may use FPU
2725
  case Op_AddF:
2726
  case Op_SubF:
2727
  case Op_MulF:
2728
  case Op_DivF:
2729
  case Op_NegF:
2730
  case Op_ModF:
2731
  case Op_ConvI2F:
2732
  case Op_ConF:
2733
  case Op_CmpF:
2734
  case Op_CmpF3:
2735
  // case Op_ConvL2F: // longs are split into 32-bit halves
2736
    frc.inc_float_count();
2737
    break;
2738

2739
  case Op_ConvF2D:
2740
  case Op_ConvD2F:
2741
    frc.inc_float_count();
2742
    frc.inc_double_count();
2743
    break;
2744

2745
  // Count all double operations that may use FPU
2746
  case Op_AddD:
2747
  case Op_SubD:
2748
  case Op_MulD:
2749
  case Op_DivD:
2750
  case Op_NegD:
2751
  case Op_ModD:
2752
  case Op_ConvI2D:
2753
  case Op_ConvD2I:
2754
  // case Op_ConvL2D: // handled by leaf call
2755
  // case Op_ConvD2L: // handled by leaf call
2756
  case Op_ConD:
2757
  case Op_CmpD:
2758
  case Op_CmpD3:
2759
    frc.inc_double_count();
2760
    break;
2761
  case Op_Opaque1:              // Remove Opaque Nodes before matching
2762
  case Op_Opaque2:              // Remove Opaque Nodes before matching
2763
  case Op_Opaque3:
2764
    n->subsume_by(n->in(1), this);
2765
    break;
2766
  case Op_CallStaticJava:
2767
  case Op_CallJava:
2768
  case Op_CallDynamicJava:
2769
    frc.inc_java_call_count(); // Count java call site;
2770
  case Op_CallRuntime:
2771
  case Op_CallLeaf:
2772
  case Op_CallLeafNoFP: {
2773
    assert( n->is_Call(), "" );
2774
    CallNode *call = n->as_Call();
2775
    if (UseShenandoahGC && call->is_g1_wb_pre_call()) {
2776
      uint cnt = OptoRuntime::g1_wb_pre_Type()->domain()->cnt();
2777
      if (call->req() > cnt) {
2778
        assert(call->req() == cnt+1, "only one extra input");
2779
        Node* addp = call->in(cnt);
2780
        assert(!CallLeafNode::has_only_g1_wb_pre_uses(addp), "useless address computation?");
2781
        call->del_req(cnt);
2782
      }
2783
    }
2784
    // Count call sites where the FP mode bit would have to be flipped.
2785
    // Do not count uncommon runtime calls:
2786
    // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,
2787
    // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...
2788
    if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {
2789
      frc.inc_call_count();   // Count the call site
2790
    } else {                  // See if uncommon argument is shared
2791
      Node *n = call->in(TypeFunc::Parms);
2792
      int nop = n->Opcode();
2793
      // Clone shared simple arguments to uncommon calls, item (1).
2794
      if( n->outcnt() > 1 &&
2795
          !n->is_Proj() &&
2796
          nop != Op_CreateEx &&
2797
          nop != Op_CheckCastPP &&
2798
          nop != Op_DecodeN &&
2799
          nop != Op_DecodeNKlass &&
2800
          !n->is_Mem() ) {
2801
        Node *x = n->clone();
2802
        call->set_req( TypeFunc::Parms, x );
2803
      }
2804
    }
2805
    break;
2806
  }
2807

2808
  case Op_StoreD:
2809
  case Op_LoadD:
2810
  case Op_LoadD_unaligned:
2811
    frc.inc_double_count();
2812
    goto handle_mem;
2813
  case Op_StoreF:
2814
  case Op_LoadF:
2815
    frc.inc_float_count();
2816
    goto handle_mem;
2817

2818
  case Op_StoreCM:
2819
    {
2820
      // Convert OopStore dependence into precedence edge
2821
      Node* prec = n->in(MemNode::OopStore);
2822
      n->del_req(MemNode::OopStore);
2823
      n->add_prec(prec);
2824
      eliminate_redundant_card_marks(n);
2825
    }
2826

2827
    // fall through
2828

2829
  case Op_StoreB:
2830
  case Op_StoreC:
2831
  case Op_StorePConditional:
2832
  case Op_StoreI:
2833
  case Op_StoreL:
2834
  case Op_StoreIConditional:
2835
  case Op_StoreLConditional:
2836
  case Op_CompareAndSwapI:
2837
  case Op_CompareAndSwapL:
2838
  case Op_CompareAndSwapP:
2839
  case Op_CompareAndSwapN:
2840
  case Op_GetAndAddI:
2841
  case Op_GetAndAddL:
2842
  case Op_GetAndSetI:
2843
  case Op_GetAndSetL:
2844
  case Op_GetAndSetP:
2845
  case Op_GetAndSetN:
2846
  case Op_StoreP:
2847
  case Op_StoreN:
2848
  case Op_StoreNKlass:
2849
  case Op_LoadB:
2850
  case Op_LoadUB:
2851
  case Op_LoadUS:
2852
  case Op_LoadI:
2853
  case Op_LoadKlass:
2854
  case Op_LoadNKlass:
2855
  case Op_LoadL:
2856
  case Op_LoadL_unaligned:
2857
  case Op_LoadPLocked:
2858
  case Op_LoadP:
2859
  case Op_LoadN:
2860
  case Op_LoadRange:
2861
  case Op_LoadS: {
2862
  handle_mem:
2863
#ifdef ASSERT
2864
    if( VerifyOptoOopOffsets ) {
2865
      assert( n->is_Mem(), "" );
2866
      MemNode *mem  = (MemNode*)n;
2867
      // Check to see if address types have grounded out somehow.
2868
      const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr();
2869
      assert( !tp || oop_offset_is_sane(tp), "" );
2870
    }
2871
#endif
2872
    break;
2873
  }
2874

2875
  case Op_AddP: {               // Assert sane base pointers
2876
    Node *addp = n->in(AddPNode::Address);
2877
    assert( !addp->is_AddP() ||
2878
            addp->in(AddPNode::Base)->is_top() || // Top OK for allocation
2879
            addp->in(AddPNode::Base) == n->in(AddPNode::Base),
2880
            "Base pointers must match" );
2881
#ifdef _LP64
2882
    if ((UseCompressedOops || UseCompressedClassPointers) &&
2883
        addp->Opcode() == Op_ConP &&
2884
        addp == n->in(AddPNode::Base) &&
2885
        n->in(AddPNode::Offset)->is_Con()) {
2886
      // Use addressing with narrow klass to load with offset on x86.
2887
      // On sparc loading 32-bits constant and decoding it have less
2888
      // instructions (4) then load 64-bits constant (7).
2889
      // Do this transformation here since IGVN will convert ConN back to ConP.
2890
      const Type* t = addp->bottom_type();
2891
      if (t->isa_oopptr() || t->isa_klassptr()) {
2892
        Node* nn = NULL;
2893

2894
        int op = t->isa_oopptr() ? Op_ConN : Op_ConNKlass;
2895

2896
        // Look for existing ConN node of the same exact type.
2897
        Node* r  = root();
2898
        uint cnt = r->outcnt();
2899
        for (uint i = 0; i < cnt; i++) {
2900
          Node* m = r->raw_out(i);
2901
          if (m!= NULL && m->Opcode() == op &&
2902
              m->bottom_type()->make_ptr() == t) {
2903
            nn = m;
2904
            break;
2905
          }
2906
        }
2907
        if (nn != NULL) {
2908
          // Decode a narrow oop to match address
2909
          // [R12 + narrow_oop_reg<<3 + offset]
2910
          if (t->isa_oopptr()) {
2911
            nn = new (this) DecodeNNode(nn, t);
2912
          } else {
2913
            nn = new (this) DecodeNKlassNode(nn, t);
2914
          }
2915
          n->set_req(AddPNode::Base, nn);
2916
          n->set_req(AddPNode::Address, nn);
2917
          if (addp->outcnt() == 0) {
2918
            addp->disconnect_inputs(NULL, this);
2919
          }
2920
        }
2921
      }
2922
    }
2923
#endif
2924
    break;
2925
  }
2926

2927
  case Op_CastPP: {
2928
    // Remove CastPP nodes to gain more freedom during scheduling but
2929
    // keep the dependency they encode as control or precedence edges
2930
    // (if control is set already) on memory operations. Some CastPP
2931
    // nodes don't have a control (don't carry a dependency): skip
2932
    // those.
2933
    if (n->in(0) != NULL) {
2934
      ResourceMark rm;
2935
      Unique_Node_List wq;
2936
      wq.push(n);
2937
      for (uint next = 0; next < wq.size(); ++next) {
2938
        Node *m = wq.at(next);
2939
        for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
2940
          Node* use = m->fast_out(i);
2941
          if (use->is_Mem() || use->is_EncodeNarrowPtr() || use->Opcode() == Op_ShenandoahLoadReferenceBarrier) {
2942
            use->ensure_control_or_add_prec(n->in(0));
2943
          } else if (use->in(0) == NULL) {
2944
            switch(use->Opcode()) {
2945
            case Op_AddP:
2946
            case Op_DecodeN:
2947
            case Op_DecodeNKlass:
2948
            case Op_CheckCastPP:
2949
            case Op_CastPP:
2950
              wq.push(use);
2951
              break;
2952
            }
2953
          }
2954
        }
2955
      }
2956
    }
2957
    const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
2958
    if (is_LP64 && n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) {
2959
      Node* in1 = n->in(1);
2960
      const Type* t = n->bottom_type();
2961
      Node* new_in1 = in1->clone();
2962
      new_in1->as_DecodeN()->set_type(t);
2963

2964
      if (!Matcher::narrow_oop_use_complex_address()) {
2965
        //
2966
        // x86, ARM and friends can handle 2 adds in addressing mode
2967
        // and Matcher can fold a DecodeN node into address by using
2968
        // a narrow oop directly and do implicit NULL check in address:
2969
        //
2970
        // [R12 + narrow_oop_reg<<3 + offset]
2971
        // NullCheck narrow_oop_reg
2972
        //
2973
        // On other platforms (Sparc) we have to keep new DecodeN node and
2974
        // use it to do implicit NULL check in address:
2975
        //
2976
        // decode_not_null narrow_oop_reg, base_reg
2977
        // [base_reg + offset]
2978
        // NullCheck base_reg
2979
        //
2980
        // Pin the new DecodeN node to non-null path on these platform (Sparc)
2981
        // to keep the information to which NULL check the new DecodeN node
2982
        // corresponds to use it as value in implicit_null_check().
2983
        //
2984
        new_in1->set_req(0, n->in(0));
2985
      }
2986

2987
      n->subsume_by(new_in1, this);
2988
      if (in1->outcnt() == 0) {
2989
        in1->disconnect_inputs(NULL, this);
2990
      }
2991
    } else {
2992
      n->subsume_by(n->in(1), this);
2993
      if (n->outcnt() == 0) {
2994
        n->disconnect_inputs(NULL, this);
2995
      }
2996
    }
2997
    break;
2998
  }
2999
#ifdef _LP64
3000
  case Op_CmpP:
3001
    // Do this transformation here to preserve CmpPNode::sub() and
3002
    // other TypePtr related Ideal optimizations (for example, ptr nullness).
3003
    if (n->in(1)->is_DecodeNarrowPtr() || n->in(2)->is_DecodeNarrowPtr()) {
3004
      Node* in1 = n->in(1);
3005
      Node* in2 = n->in(2);
3006
      if (!in1->is_DecodeNarrowPtr()) {
3007
        in2 = in1;
3008
        in1 = n->in(2);
3009
      }
3010
      assert(in1->is_DecodeNarrowPtr(), "sanity");
3011

3012
      Node* new_in2 = NULL;
3013
      if (in2->is_DecodeNarrowPtr()) {
3014
        assert(in2->Opcode() == in1->Opcode(), "must be same node type");
3015
        new_in2 = in2->in(1);
3016
      } else if (in2->Opcode() == Op_ConP) {
3017
        const Type* t = in2->bottom_type();
3018
        if (t == TypePtr::NULL_PTR) {
3019
          assert(in1->is_DecodeN(), "compare klass to null?");
3020
          // Don't convert CmpP null check into CmpN if compressed
3021
          // oops implicit null check is not generated.
3022
          // This will allow to generate normal oop implicit null check.
3023
          if (Matcher::gen_narrow_oop_implicit_null_checks())
3024
            new_in2 = ConNode::make(this, TypeNarrowOop::NULL_PTR);
3025
          //
3026
          // This transformation together with CastPP transformation above
3027
          // will generated code for implicit NULL checks for compressed oops.
3028
          //
3029
          // The original code after Optimize()
3030
          //
3031
          //    LoadN memory, narrow_oop_reg
3032
          //    decode narrow_oop_reg, base_reg
3033
          //    CmpP base_reg, NULL
3034
          //    CastPP base_reg // NotNull
3035
          //    Load [base_reg + offset], val_reg
3036
          //
3037
          // after these transformations will be
3038
          //
3039
          //    LoadN memory, narrow_oop_reg
3040
          //    CmpN narrow_oop_reg, NULL
3041
          //    decode_not_null narrow_oop_reg, base_reg
3042
          //    Load [base_reg + offset], val_reg
3043
          //
3044
          // and the uncommon path (== NULL) will use narrow_oop_reg directly
3045
          // since narrow oops can be used in debug info now (see the code in
3046
          // final_graph_reshaping_walk()).
3047
          //
3048
          // At the end the code will be matched to
3049
          // on x86:
3050
          //
3051
          //    Load_narrow_oop memory, narrow_oop_reg
3052
          //    Load [R12 + narrow_oop_reg<<3 + offset], val_reg
3053
          //    NullCheck narrow_oop_reg
3054
          //
3055
          // and on sparc:
3056
          //
3057
          //    Load_narrow_oop memory, narrow_oop_reg
3058
          //    decode_not_null narrow_oop_reg, base_reg
3059
          //    Load [base_reg + offset], val_reg
3060
          //    NullCheck base_reg
3061
          //
3062
        } else if (t->isa_oopptr()) {
3063
          new_in2 = ConNode::make(this, t->make_narrowoop());
3064
        } else if (t->isa_klassptr()) {
3065
          new_in2 = ConNode::make(this, t->make_narrowklass());
3066
        }
3067
      }
3068
      if (new_in2 != NULL) {
3069
        Node* cmpN = new (this) CmpNNode(in1->in(1), new_in2);
3070
        n->subsume_by(cmpN, this);
3071
        if (in1->outcnt() == 0) {
3072
          in1->disconnect_inputs(NULL, this);
3073
        }
3074
        if (in2->outcnt() == 0) {
3075
          in2->disconnect_inputs(NULL, this);
3076
        }
3077
      }
3078
    }
3079
    break;
3080

3081
  case Op_DecodeN:
3082
  case Op_DecodeNKlass:
3083
    assert(!n->in(1)->is_EncodeNarrowPtr(), "should be optimized out");
3084
    // DecodeN could be pinned when it can't be fold into
3085
    // an address expression, see the code for Op_CastPP above.
3086
    assert(n->in(0) == NULL || (UseCompressedOops && !Matcher::narrow_oop_use_complex_address()), "no control");
3087
    break;
3088

3089
  case Op_EncodeP:
3090
  case Op_EncodePKlass: {
3091
    Node* in1 = n->in(1);
3092
    if (in1->is_DecodeNarrowPtr()) {
3093
      n->subsume_by(in1->in(1), this);
3094
    } else if (in1->Opcode() == Op_ConP) {
3095
      const Type* t = in1->bottom_type();
3096
      if (t == TypePtr::NULL_PTR) {
3097
        assert(t->isa_oopptr(), "null klass?");
3098
        n->subsume_by(ConNode::make(this, TypeNarrowOop::NULL_PTR), this);
3099
      } else if (t->isa_oopptr()) {
3100
        n->subsume_by(ConNode::make(this, t->make_narrowoop()), this);
3101
      } else if (t->isa_klassptr()) {
3102
        n->subsume_by(ConNode::make(this, t->make_narrowklass()), this);
3103
      }
3104
    }
3105
    if (in1->outcnt() == 0) {
3106
      in1->disconnect_inputs(NULL, this);
3107
    }
3108
    break;
3109
  }
3110

3111
  case Op_Proj: {
3112
    if (OptimizeStringConcat) {
3113
      ProjNode* p = n->as_Proj();
3114
      if (p->_is_io_use) {
3115
        // Separate projections were used for the exception path which
3116
        // are normally removed by a late inline.  If it wasn't inlined
3117
        // then they will hang around and should just be replaced with
3118
        // the original one.
3119
        Node* proj = NULL;
3120
        // Replace with just one
3121
        for (SimpleDUIterator i(p->in(0)); i.has_next(); i.next()) {
3122
          Node *use = i.get();
3123
          if (use->is_Proj() && p != use && use->as_Proj()->_con == p->_con) {
3124
            proj = use;
3125
            break;
3126
          }
3127
        }
3128
        assert(proj != NULL || p->_con == TypeFunc::I_O, "io may be dropped at an infinite loop");
3129
        if (proj != NULL) {
3130
          p->subsume_by(proj, this);
3131
        }
3132
      }
3133
    }
3134
    break;
3135
  }
3136

3137
  case Op_Phi:
3138
    if (n->as_Phi()->bottom_type()->isa_narrowoop() || n->as_Phi()->bottom_type()->isa_narrowklass()) {
3139
      // The EncodeP optimization may create Phi with the same edges
3140
      // for all paths. It is not handled well by Register Allocator.
3141
      Node* unique_in = n->in(1);
3142
      assert(unique_in != NULL, "");
3143
      uint cnt = n->req();
3144
      for (uint i = 2; i < cnt; i++) {
3145
        Node* m = n->in(i);
3146
        assert(m != NULL, "");
3147
        if (unique_in != m)
3148
          unique_in = NULL;
3149
      }
3150
      if (unique_in != NULL) {
3151
        n->subsume_by(unique_in, this);
3152
      }
3153
    }
3154
    break;
3155

3156
#endif
3157

3158
#ifdef ASSERT
3159
  case Op_CastII:
3160
    // Verify that all range check dependent CastII nodes were removed.
3161
    if (n->isa_CastII()->has_range_check()) {
3162
      n->dump(3);
3163
      assert(false, "Range check dependent CastII node was not removed");
3164
    }
3165
    break;
3166
#endif
3167

3168
  case Op_ModI:
3169
    if (UseDivMod) {
3170
      // Check if a%b and a/b both exist
3171
      Node* d = n->find_similar(Op_DivI);
3172
      if (d) {
3173
        // Replace them with a fused divmod if supported
3174
        if (Matcher::has_match_rule(Op_DivModI)) {
3175
          DivModINode* divmod = DivModINode::make(this, n);
3176
          d->subsume_by(divmod->div_proj(), this);
3177
          n->subsume_by(divmod->mod_proj(), this);
3178
        } else {
3179
          // replace a%b with a-((a/b)*b)
3180
          Node* mult = new (this) MulINode(d, d->in(2));
3181
          Node* sub  = new (this) SubINode(d->in(1), mult);
3182
          n->subsume_by(sub, this);
3183
        }
3184
      }
3185
    }
3186
    break;
3187

3188
  case Op_ModL:
3189
    if (UseDivMod) {
3190
      // Check if a%b and a/b both exist
3191
      Node* d = n->find_similar(Op_DivL);
3192
      if (d) {
3193
        // Replace them with a fused divmod if supported
3194
        if (Matcher::has_match_rule(Op_DivModL)) {
3195
          DivModLNode* divmod = DivModLNode::make(this, n);
3196
          d->subsume_by(divmod->div_proj(), this);
3197
          n->subsume_by(divmod->mod_proj(), this);
3198
        } else {
3199
          // replace a%b with a-((a/b)*b)
3200
          Node* mult = new (this) MulLNode(d, d->in(2));
3201
          Node* sub  = new (this) SubLNode(d->in(1), mult);
3202
          n->subsume_by(sub, this);
3203
        }
3204
      }
3205
    }
3206
    break;
3207

3208
  case Op_LoadVector:
3209
  case Op_StoreVector:
3210
    break;
3211

3212
  case Op_PackB:
3213
  case Op_PackS:
3214
  case Op_PackI:
3215
  case Op_PackF:
3216
  case Op_PackL:
3217
  case Op_PackD:
3218
    if (n->req()-1 > 2) {
3219
      // Replace many operand PackNodes with a binary tree for matching
3220
      PackNode* p = (PackNode*) n;
3221
      Node* btp = p->binary_tree_pack(this, 1, n->req());
3222
      n->subsume_by(btp, this);
3223
    }
3224
    break;
3225
  case Op_Loop:
3226
  case Op_CountedLoop:
3227
    if (n->as_Loop()->is_inner_loop()) {
3228
      frc.inc_inner_loop_count();
3229
    }
3230
    break;
3231
  case Op_LShiftI:
3232
  case Op_RShiftI:
3233
  case Op_URShiftI:
3234
  case Op_LShiftL:
3235
  case Op_RShiftL:
3236
  case Op_URShiftL:
3237
    if (Matcher::need_masked_shift_count) {
3238
      // The cpu's shift instructions don't restrict the count to the
3239
      // lower 5/6 bits. We need to do the masking ourselves.
3240
      Node* in2 = n->in(2);
3241
      juint mask = (n->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
3242
      const TypeInt* t = in2->find_int_type();
3243
      if (t != NULL && t->is_con()) {
3244
        juint shift = t->get_con();
3245
        if (shift > mask) { // Unsigned cmp
3246
          n->set_req(2, ConNode::make(this, TypeInt::make(shift & mask)));
3247
        }
3248
      } else {
3249
        if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
3250
          Node* shift = new (this) AndINode(in2, ConNode::make(this, TypeInt::make(mask)));
3251
          n->set_req(2, shift);
3252
        }
3253
      }
3254
      if (in2->outcnt() == 0) { // Remove dead node
3255
        in2->disconnect_inputs(NULL, this);
3256
      }
3257
    }
3258
    break;
3259
  case Op_MemBarStoreStore:
3260
  case Op_MemBarRelease:
3261
    // Break the link with AllocateNode: it is no longer useful and
3262
    // confuses register allocation.
3263
    if (n->req() > MemBarNode::Precedent) {
3264
      n->set_req(MemBarNode::Precedent, top());
3265
    }
3266
    break;
3267
  case Op_ShenandoahLoadReferenceBarrier:
3268
    assert(false, "should have been expanded already");
3269
    break;
3270
  default:
3271
    assert( !n->is_Call(), "" );
3272
    assert( !n->is_Mem(), "" );
3273
    assert( nop != Op_ProfileBoolean, "should be eliminated during IGVN");
3274
    break;
3275
  }
3276

3277
  // Collect CFG split points
3278
  if (n->is_MultiBranch())
3279
    frc._tests.push(n);
3280
}
3281

3282
//------------------------------final_graph_reshaping_walk---------------------
3283
// Replacing Opaque nodes with their input in final_graph_reshaping_impl(),
3284
// requires that the walk visits a node's inputs before visiting the node.
3285
void Compile::final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) {
3286
  ResourceArea *area = Thread::current()->resource_area();
3287
  Unique_Node_List sfpt(area);
3288

3289
  frc._visited.set(root->_idx); // first, mark node as visited
3290
  uint cnt = root->req();
3291
  Node *n = root;
3292
  uint  i = 0;
3293
  while (true) {
3294
    if (i < cnt) {
3295
      // Place all non-visited non-null inputs onto stack
3296
      Node* m = n->in(i);
3297
      ++i;
3298
      if (m != NULL && !frc._visited.test_set(m->_idx)) {
3299
        if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) {
3300
          // compute worst case interpreter size in case of a deoptimization
3301
          update_interpreter_frame_size(m->as_SafePoint()->jvms()->interpreter_frame_size());
3302

3303
          sfpt.push(m);
3304
        }
3305
        cnt = m->req();
3306
        nstack.push(n, i); // put on stack parent and next input's index
3307
        n = m;
3308
        i = 0;
3309
      }
3310
    } else {
3311
      // Now do post-visit work
3312
      final_graph_reshaping_impl( n, frc );
3313
      if (nstack.is_empty())
3314
        break;             // finished
3315
      n = nstack.node();   // Get node from stack
3316
      cnt = n->req();
3317
      i = nstack.index();
3318
      nstack.pop();        // Shift to the next node on stack
3319
    }
3320
  }
3321

3322
  // Skip next transformation if compressed oops are not used.
3323
  if ((UseCompressedOops && !Matcher::gen_narrow_oop_implicit_null_checks()) ||
3324
      (!UseCompressedOops && !UseCompressedClassPointers))
3325
    return;
3326

3327
  // Go over safepoints nodes to skip DecodeN/DecodeNKlass nodes for debug edges.
3328
  // It could be done for an uncommon traps or any safepoints/calls
3329
  // if the DecodeN/DecodeNKlass node is referenced only in a debug info.
3330
  while (sfpt.size() > 0) {
3331
    n = sfpt.pop();
3332
    JVMState *jvms = n->as_SafePoint()->jvms();
3333
    assert(jvms != NULL, "sanity");
3334
    int start = jvms->debug_start();
3335
    int end   = n->req();
3336
    bool is_uncommon = (n->is_CallStaticJava() &&
3337
                        n->as_CallStaticJava()->uncommon_trap_request() != 0);
3338
    for (int j = start; j < end; j++) {
3339
      Node* in = n->in(j);
3340
      if (in->is_DecodeNarrowPtr()) {
3341
        bool safe_to_skip = true;
3342
        if (!is_uncommon ) {
3343
          // Is it safe to skip?
3344
          for (uint i = 0; i < in->outcnt(); i++) {
3345
            Node* u = in->raw_out(i);
3346
            if (!u->is_SafePoint() ||
3347
                 u->is_Call() && u->as_Call()->has_non_debug_use(n)) {
3348
              safe_to_skip = false;
3349
            }
3350
          }
3351
        }
3352
        if (safe_to_skip) {
3353
          n->set_req(j, in->in(1));
3354
        }
3355
        if (in->outcnt() == 0) {
3356
          in->disconnect_inputs(NULL, this);
3357
        }
3358
      }
3359
    }
3360
  }
3361
}
3362

3363
//------------------------------final_graph_reshaping--------------------------
3364
// Final Graph Reshaping.
3365
//
3366
// (1) Clone simple inputs to uncommon calls, so they can be scheduled late
3367
//     and not commoned up and forced early.  Must come after regular
3368
//     optimizations to avoid GVN undoing the cloning.  Clone constant
3369
//     inputs to Loop Phis; these will be split by the allocator anyways.
3370
//     Remove Opaque nodes.
3371
// (2) Move last-uses by commutative operations to the left input to encourage
3372
//     Intel update-in-place two-address operations and better register usage
3373
//     on RISCs.  Must come after regular optimizations to avoid GVN Ideal
3374
//     calls canonicalizing them back.
3375
// (3) Count the number of double-precision FP ops, single-precision FP ops
3376
//     and call sites.  On Intel, we can get correct rounding either by
3377
//     forcing singles to memory (requires extra stores and loads after each
3378
//     FP bytecode) or we can set a rounding mode bit (requires setting and
3379
//     clearing the mode bit around call sites).  The mode bit is only used
3380
//     if the relative frequency of single FP ops to calls is low enough.
3381
//     This is a key transform for SPEC mpeg_audio.
3382
// (4) Detect infinite loops; blobs of code reachable from above but not
3383
//     below.  Several of the Code_Gen algorithms fail on such code shapes,
3384
//     so we simply bail out.  Happens a lot in ZKM.jar, but also happens
3385
//     from time to time in other codes (such as -Xcomp finalizer loops, etc).
3386
//     Detection is by looking for IfNodes where only 1 projection is
3387
//     reachable from below or CatchNodes missing some targets.
3388
// (5) Assert for insane oop offsets in debug mode.
3389

3390
bool Compile::final_graph_reshaping() {
3391
  // an infinite loop may have been eliminated by the optimizer,
3392
  // in which case the graph will be empty.
3393
  if (root()->req() == 1) {
3394
    record_method_not_compilable("trivial infinite loop");
3395
    return true;
3396
  }
3397

3398
  // Expensive nodes have their control input set to prevent the GVN
3399
  // from freely commoning them. There's no GVN beyond this point so
3400
  // no need to keep the control input. We want the expensive nodes to
3401
  // be freely moved to the least frequent code path by gcm.
3402
  assert(OptimizeExpensiveOps || expensive_count() == 0, "optimization off but list non empty?");
3403
  for (int i = 0; i < expensive_count(); i++) {
3404
    _expensive_nodes->at(i)->set_req(0, NULL);
3405
  }
3406

3407
  Final_Reshape_Counts frc;
3408

3409
  // Visit everybody reachable!
3410
  // Allocate stack of size C->live_nodes()/2 to avoid frequent realloc
3411
  Node_Stack nstack(live_nodes() >> 1);
3412
  final_graph_reshaping_walk(nstack, root(), frc);
3413

3414
  // Check for unreachable (from below) code (i.e., infinite loops).
3415
  for( uint i = 0; i < frc._tests.size(); i++ ) {
3416
    MultiBranchNode *n = frc._tests[i]->as_MultiBranch();
3417
    // Get number of CFG targets.
3418
    // Note that PCTables include exception targets after calls.
3419
    uint required_outcnt = n->required_outcnt();
3420
    if (n->outcnt() != required_outcnt) {
3421
      // Check for a few special cases.  Rethrow Nodes never take the
3422
      // 'fall-thru' path, so expected kids is 1 less.
3423
      if (n->is_PCTable() && n->in(0) && n->in(0)->in(0)) {
3424
        if (n->in(0)->in(0)->is_Call()) {
3425
          CallNode *call = n->in(0)->in(0)->as_Call();
3426
          if (call->entry_point() == OptoRuntime::rethrow_stub()) {
3427
            required_outcnt--;      // Rethrow always has 1 less kid
3428
          } else if (call->req() > TypeFunc::Parms &&
3429
                     call->is_CallDynamicJava()) {
3430
            // Check for null receiver. In such case, the optimizer has
3431
            // detected that the virtual call will always result in a null
3432
            // pointer exception. The fall-through projection of this CatchNode
3433
            // will not be populated.
3434
            Node *arg0 = call->in(TypeFunc::Parms);
3435
            if (arg0->is_Type() &&
3436
                arg0->as_Type()->type()->higher_equal(TypePtr::NULL_PTR)) {
3437
              required_outcnt--;
3438
            }
3439
          } else if (call->entry_point() == OptoRuntime::new_array_Java() &&
3440
                     call->req() > TypeFunc::Parms+1 &&
3441
                     call->is_CallStaticJava()) {
3442
            // Check for negative array length. In such case, the optimizer has
3443
            // detected that the allocation attempt will always result in an
3444
            // exception. There is no fall-through projection of this CatchNode .
3445
            Node *arg1 = call->in(TypeFunc::Parms+1);
3446
            if (arg1->is_Type() &&
3447
                arg1->as_Type()->type()->join(TypeInt::POS)->empty()) {
3448
              required_outcnt--;
3449
            }
3450
          }
3451
        }
3452
      }
3453
      // Recheck with a better notion of 'required_outcnt'
3454
      if (n->outcnt() != required_outcnt) {
3455
        record_method_not_compilable("malformed control flow");
3456
        return true;            // Not all targets reachable!
3457
      }
3458
    }
3459
    // Check that I actually visited all kids.  Unreached kids
3460
    // must be infinite loops.
3461
    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++)
3462
      if (!frc._visited.test(n->fast_out(j)->_idx)) {
3463
        record_method_not_compilable("infinite loop");
3464
        return true;            // Found unvisited kid; must be unreach
3465
      }
3466
  }
3467

3468
  // If original bytecodes contained a mixture of floats and doubles
3469
  // check if the optimizer has made it homogenous, item (3).
3470
  if( Use24BitFPMode && Use24BitFP && UseSSE == 0 &&
3471
      frc.get_float_count() > 32 &&
3472
      frc.get_double_count() == 0 &&
3473
      (10 * frc.get_call_count() < frc.get_float_count()) ) {
3474
    set_24_bit_selection_and_mode( false,  true );
3475
  }
3476

3477
  set_java_calls(frc.get_java_call_count());
3478
  set_inner_loops(frc.get_inner_loop_count());
3479

3480
  // No infinite loops, no reason to bail out.
3481
  return false;
3482
}
3483

3484
//-----------------------------too_many_traps----------------------------------
3485
// Report if there are too many traps at the current method and bci.
3486
// Return true if there was a trap, and/or PerMethodTrapLimit is exceeded.
3487
bool Compile::too_many_traps(ciMethod* method,
3488
                             int bci,
3489
                             Deoptimization::DeoptReason reason) {
3490
  ciMethodData* md = method->method_data();
3491
  if (md->is_empty()) {
3492
    // Assume the trap has not occurred, or that it occurred only
3493
    // because of a transient condition during start-up in the interpreter.
3494
    return false;
3495
  }
3496
  ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;
3497
  if (md->has_trap_at(bci, m, reason) != 0) {
3498
    // Assume PerBytecodeTrapLimit==0, for a more conservative heuristic.
3499
    // Also, if there are multiple reasons, or if there is no per-BCI record,
3500
    // assume the worst.
3501
    if (log())
3502
      log()->elem("observe trap='%s' count='%d'",
3503
                  Deoptimization::trap_reason_name(reason),
3504
                  md->trap_count(reason));
3505
    return true;
3506
  } else {
3507
    // Ignore method/bci and see if there have been too many globally.
3508
    return too_many_traps(reason, md);
3509
  }
3510
}
3511

3512
// Less-accurate variant which does not require a method and bci.
3513
bool Compile::too_many_traps(Deoptimization::DeoptReason reason,
3514
                             ciMethodData* logmd) {
3515
  if (trap_count(reason) >= Deoptimization::per_method_trap_limit(reason)) {
3516
    // Too many traps globally.
3517
    // Note that we use cumulative trap_count, not just md->trap_count.
3518
    if (log()) {
3519
      int mcount = (logmd == NULL)? -1: (int)logmd->trap_count(reason);
3520
      log()->elem("observe trap='%s' count='0' mcount='%d' ccount='%d'",
3521
                  Deoptimization::trap_reason_name(reason),
3522
                  mcount, trap_count(reason));
3523
    }
3524
    return true;
3525
  } else {
3526
    // The coast is clear.
3527
    return false;
3528
  }
3529
}
3530

3531
//--------------------------too_many_recompiles--------------------------------
3532
// Report if there are too many recompiles at the current method and bci.
3533
// Consults PerBytecodeRecompilationCutoff and PerMethodRecompilationCutoff.
3534
// Is not eager to return true, since this will cause the compiler to use
3535
// Action_none for a trap point, to avoid too many recompilations.
3536
bool Compile::too_many_recompiles(ciMethod* method,
3537
                                  int bci,
3538
                                  Deoptimization::DeoptReason reason) {
3539
  ciMethodData* md = method->method_data();
3540
  if (md->is_empty()) {
3541
    // Assume the trap has not occurred, or that it occurred only
3542
    // because of a transient condition during start-up in the interpreter.
3543
    return false;
3544
  }
3545
  // Pick a cutoff point well within PerBytecodeRecompilationCutoff.
3546
  uint bc_cutoff = (uint) PerBytecodeRecompilationCutoff / 8;
3547
  uint m_cutoff  = (uint) PerMethodRecompilationCutoff / 2 + 1;  // not zero
3548
  Deoptimization::DeoptReason per_bc_reason
3549
    = Deoptimization::reason_recorded_per_bytecode_if_any(reason);
3550
  ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;
3551
  if ((per_bc_reason == Deoptimization::Reason_none
3552
       || md->has_trap_at(bci, m, reason) != 0)
3553
      // The trap frequency measure we care about is the recompile count:
3554
      && md->trap_recompiled_at(bci, m)
3555
      && md->overflow_recompile_count() >= bc_cutoff) {
3556
    // Do not emit a trap here if it has already caused recompilations.
3557
    // Also, if there are multiple reasons, or if there is no per-BCI record,
3558
    // assume the worst.
3559
    if (log())
3560
      log()->elem("observe trap='%s recompiled' count='%d' recompiles2='%d'",
3561
                  Deoptimization::trap_reason_name(reason),
3562
                  md->trap_count(reason),
3563
                  md->overflow_recompile_count());
3564
    return true;
3565
  } else if (trap_count(reason) != 0
3566
             && decompile_count() >= m_cutoff) {
3567
    // Too many recompiles globally, and we have seen this sort of trap.
3568
    // Use cumulative decompile_count, not just md->decompile_count.
3569
    if (log())
3570
      log()->elem("observe trap='%s' count='%d' mcount='%d' decompiles='%d' mdecompiles='%d'",
3571
                  Deoptimization::trap_reason_name(reason),
3572
                  md->trap_count(reason), trap_count(reason),
3573
                  md->decompile_count(), decompile_count());
3574
    return true;
3575
  } else {
3576
    // The coast is clear.
3577
    return false;
3578
  }
3579
}
3580

3581
// Compute when not to trap. Used by matching trap based nodes and
3582
// NullCheck optimization.
3583
void Compile::set_allowed_deopt_reasons() {
3584
  _allowed_reasons = 0;
3585
  if (is_method_compilation()) {
3586
    for (int rs = (int)Deoptimization::Reason_none+1; rs < Compile::trapHistLength; rs++) {
3587
      assert(rs < BitsPerInt, "recode bit map");
3588
      if (!too_many_traps((Deoptimization::DeoptReason) rs)) {
3589
        _allowed_reasons |= nth_bit(rs);
3590
      }
3591
    }
3592
  }
3593
}
3594

3595
#ifndef PRODUCT
3596
//------------------------------verify_graph_edges---------------------------
3597
// Walk the Graph and verify that there is a one-to-one correspondence
3598
// between Use-Def edges and Def-Use edges in the graph.
3599
void Compile::verify_graph_edges(bool no_dead_code) {
3600
  if (VerifyGraphEdges) {
3601
    ResourceArea *area = Thread::current()->resource_area();
3602
    Unique_Node_List visited(area);
3603
    // Call recursive graph walk to check edges
3604
    _root->verify_edges(visited);
3605
    if (no_dead_code) {
3606
      // Now make sure that no visited node is used by an unvisited node.
3607
      bool dead_nodes = false;
3608
      Unique_Node_List checked(area);
3609
      while (visited.size() > 0) {
3610
        Node* n = visited.pop();
3611
        checked.push(n);
3612
        for (uint i = 0; i < n->outcnt(); i++) {
3613
          Node* use = n->raw_out(i);
3614
          if (checked.member(use))  continue;  // already checked
3615
          if (visited.member(use))  continue;  // already in the graph
3616
          if (use->is_Con())        continue;  // a dead ConNode is OK
3617
          // At this point, we have found a dead node which is DU-reachable.
3618
          if (!dead_nodes) {
3619
            tty->print_cr("*** Dead nodes reachable via DU edges:");
3620
            dead_nodes = true;
3621
          }
3622
          use->dump(2);
3623
          tty->print_cr("---");
3624
          checked.push(use);  // No repeats; pretend it is now checked.
3625
        }
3626
      }
3627
      assert(!dead_nodes, "using nodes must be reachable from root");
3628
    }
3629
  }
3630
}
3631

3632
// Verify GC barriers consistency
3633
// Currently supported:
3634
// - G1 pre-barriers (see GraphKit::g1_write_barrier_pre())
3635
void Compile::verify_barriers() {
3636
  if (UseG1GC || UseShenandoahGC) {
3637
    // Verify G1 pre-barriers
3638
    const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active());
3639

3640
    ResourceArea *area = Thread::current()->resource_area();
3641
    Unique_Node_List visited(area);
3642
    Node_List worklist(area);
3643
    // We're going to walk control flow backwards starting from the Root
3644
    worklist.push(_root);
3645
    while (worklist.size() > 0) {
3646
      Node* x = worklist.pop();
3647
      if (x == NULL || x == top()) continue;
3648
      if (visited.member(x)) {
3649
        continue;
3650
      } else {
3651
        visited.push(x);
3652
      }
3653

3654
      if (x->is_Region()) {
3655
        for (uint i = 1; i < x->req(); i++) {
3656
          worklist.push(x->in(i));
3657
        }
3658
      } else {
3659
        worklist.push(x->in(0));
3660
        // We are looking for the pattern:
3661
        //                            /->ThreadLocal
3662
        // If->Bool->CmpI->LoadB->AddP->ConL(marking_offset)
3663
        //              \->ConI(0)
3664
        // We want to verify that the If and the LoadB have the same control
3665
        // See GraphKit::g1_write_barrier_pre()
3666
        if (x->is_If()) {
3667
          IfNode *iff = x->as_If();
3668
          if (iff->in(1)->is_Bool() && iff->in(1)->in(1)->is_Cmp()) {
3669
            CmpNode *cmp = iff->in(1)->in(1)->as_Cmp();
3670
            if (cmp->Opcode() == Op_CmpI && cmp->in(2)->is_Con() && cmp->in(2)->bottom_type()->is_int()->get_con() == 0
3671
                && cmp->in(1)->is_Load()) {
3672
              LoadNode* load = cmp->in(1)->as_Load();
3673
              if (load->Opcode() == Op_LoadB && load->in(2)->is_AddP() && load->in(2)->in(2)->Opcode() == Op_ThreadLocal
3674
                  && load->in(2)->in(3)->is_Con()
3675
                  && load->in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == marking_offset) {
3676

3677
                Node* if_ctrl = iff->in(0);
3678
                Node* load_ctrl = load->in(0);
3679

3680
                if (if_ctrl != load_ctrl) {
3681
                  // Skip possible CProj->NeverBranch in infinite loops
3682
                  if ((if_ctrl->is_Proj() && if_ctrl->Opcode() == Op_CProj)
3683
                      && (if_ctrl->in(0)->is_MultiBranch() && if_ctrl->in(0)->Opcode() == Op_NeverBranch)) {
3684
                    if_ctrl = if_ctrl->in(0)->in(0);
3685
                  }
3686
                }
3687
                assert(load_ctrl != NULL && if_ctrl == load_ctrl, "controls must match");
3688
              }
3689
            }
3690
          }
3691
        }
3692
      }
3693
    }
3694
  }
3695
}
3696

3697
#endif
3698

3699
// The Compile object keeps track of failure reasons separately from the ciEnv.
3700
// This is required because there is not quite a 1-1 relation between the
3701
// ciEnv and its compilation task and the Compile object.  Note that one
3702
// ciEnv might use two Compile objects, if C2Compiler::compile_method decides
3703
// to backtrack and retry without subsuming loads.  Other than this backtracking
3704
// behavior, the Compile's failure reason is quietly copied up to the ciEnv
3705
// by the logic in C2Compiler.
3706
void Compile::record_failure(const char* reason) {
3707
  if (log() != NULL) {
3708
    log()->elem("failure reason='%s' phase='compile'", reason);
3709
  }
3710
  if (_failure_reason == NULL) {
3711
    // Record the first failure reason.
3712
    _failure_reason = reason;
3713
  }
3714

3715
  if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
3716
    C->print_method(PHASE_FAILURE);
3717
  }
3718
  _root = NULL;  // flush the graph, too
3719
}
3720

3721
Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, bool dolog)
3722
  : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false),
3723
    _phase_name(name), _dolog(dolog)
3724
{
3725
  if (dolog) {
3726
    C = Compile::current();
3727
    _log = C->log();
3728
  } else {
3729
    C = NULL;
3730
    _log = NULL;
3731
  }
3732
  if (_log != NULL) {
3733
    _log->begin_head("phase name='%s' nodes='%d' live='%d'", _phase_name, C->unique(), C->live_nodes());
3734
    _log->stamp();
3735
    _log->end_head();
3736
  }
3737
}
3738

3739
Compile::TracePhase::~TracePhase() {
3740

3741
  C = Compile::current();
3742
  if (_dolog) {
3743
    _log = C->log();
3744
  } else {
3745
    _log = NULL;
3746
  }
3747

3748
#ifdef ASSERT
3749
  if (PrintIdealNodeCount) {
3750
    tty->print_cr("phase name='%s' nodes='%d' live='%d' live_graph_walk='%d'",
3751
                  _phase_name, C->unique(), C->live_nodes(), C->count_live_nodes_by_graph_walk());
3752
  }
3753

3754
  if (VerifyIdealNodeCount) {
3755
    Compile::current()->print_missing_nodes();
3756
  }
3757
#endif
3758

3759
  if (_log != NULL) {
3760
    _log->done("phase name='%s' nodes='%d' live='%d'", _phase_name, C->unique(), C->live_nodes());
3761
  }
3762
}
3763

3764
//=============================================================================
3765
// Two Constant's are equal when the type and the value are equal.
3766
bool Compile::Constant::operator==(const Constant& other) {
3767
  if (type()          != other.type()         )  return false;
3768
  if (can_be_reused() != other.can_be_reused())  return false;
3769
  // For floating point values we compare the bit pattern.
3770
  switch (type()) {
3771
  case T_FLOAT:   return (_v._value.i == other._v._value.i);
3772
  case T_LONG:
3773
  case T_DOUBLE:  return (_v._value.j == other._v._value.j);
3774
  case T_OBJECT:
3775
  case T_ADDRESS: return (_v._value.l == other._v._value.l);
3776
  case T_VOID:    return (_v._value.l == other._v._value.l);  // jump-table entries
3777
  case T_METADATA: return (_v._metadata == other._v._metadata);
3778
  default: ShouldNotReachHere();
3779
  }
3780
  return false;
3781
}
3782

3783
static int type_to_size_in_bytes(BasicType t) {
3784
  switch (t) {
3785
  case T_LONG:    return sizeof(jlong  );
3786
  case T_FLOAT:   return sizeof(jfloat );
3787
  case T_DOUBLE:  return sizeof(jdouble);
3788
  case T_METADATA: return sizeof(Metadata*);
3789
    // We use T_VOID as marker for jump-table entries (labels) which
3790
    // need an internal word relocation.
3791
  case T_VOID:
3792
  case T_ADDRESS:
3793
  case T_OBJECT:  return sizeof(jobject);
3794
  }
3795

3796
  ShouldNotReachHere();
3797
  return -1;
3798
}
3799

3800
int Compile::ConstantTable::qsort_comparator(Constant* a, Constant* b) {
3801
  // sort descending
3802
  if (a->freq() > b->freq())  return -1;
3803
  if (a->freq() < b->freq())  return  1;
3804
  return 0;
3805
}
3806

3807
void Compile::ConstantTable::calculate_offsets_and_size() {
3808
  // First, sort the array by frequencies.
3809
  _constants.sort(qsort_comparator);
3810

3811
#ifdef ASSERT
3812
  // Make sure all jump-table entries were sorted to the end of the
3813
  // array (they have a negative frequency).
3814
  bool found_void = false;
3815
  for (int i = 0; i < _constants.length(); i++) {
3816
    Constant con = _constants.at(i);
3817
    if (con.type() == T_VOID)
3818
      found_void = true;  // jump-tables
3819
    else
3820
      assert(!found_void, "wrong sorting");
3821
  }
3822
#endif
3823

3824
  int offset = 0;
3825
  for (int i = 0; i < _constants.length(); i++) {
3826
    Constant* con = _constants.adr_at(i);
3827

3828
    // Align offset for type.
3829
    int typesize = type_to_size_in_bytes(con->type());
3830
    offset = align_size_up(offset, typesize);
3831
    con->set_offset(offset);   // set constant's offset
3832

3833
    if (con->type() == T_VOID) {
3834
      MachConstantNode* n = (MachConstantNode*) con->get_jobject();
3835
      offset = offset + typesize * n->outcnt();  // expand jump-table
3836
    } else {
3837
      offset = offset + typesize;
3838
    }
3839
  }
3840

3841
  // Align size up to the next section start (which is insts; see
3842
  // CodeBuffer::align_at_start).
3843
  assert(_size == -1, "already set?");
3844
  _size = align_size_up(offset, CodeEntryAlignment);
3845
}
3846

3847
void Compile::ConstantTable::emit(CodeBuffer& cb) {
3848
  MacroAssembler _masm(&cb);
3849
  for (int i = 0; i < _constants.length(); i++) {
3850
    Constant con = _constants.at(i);
3851
    address constant_addr = NULL;
3852
    switch (con.type()) {
3853
    case T_LONG:   constant_addr = _masm.long_constant(  con.get_jlong()  ); break;
3854
    case T_FLOAT:  constant_addr = _masm.float_constant( con.get_jfloat() ); break;
3855
    case T_DOUBLE: constant_addr = _masm.double_constant(con.get_jdouble()); break;
3856
    case T_OBJECT: {
3857
      jobject obj = con.get_jobject();
3858
      int oop_index = _masm.oop_recorder()->find_index(obj);
3859
      constant_addr = _masm.address_constant((address) obj, oop_Relocation::spec(oop_index));
3860
      break;
3861
    }
3862
    case T_ADDRESS: {
3863
      address addr = (address) con.get_jobject();
3864
      constant_addr = _masm.address_constant(addr);
3865
      break;
3866
    }
3867
    // We use T_VOID as marker for jump-table entries (labels) which
3868
    // need an internal word relocation.
3869
    case T_VOID: {
3870
      MachConstantNode* n = (MachConstantNode*) con.get_jobject();
3871
      // Fill the jump-table with a dummy word.  The real value is
3872
      // filled in later in fill_jump_table.
3873
      address dummy = (address) n;
3874
      constant_addr = _masm.address_constant(dummy);
3875
      // Expand jump-table
3876
      for (uint i = 1; i < n->outcnt(); i++) {
3877
        address temp_addr = _masm.address_constant(dummy + i);
3878
        assert(temp_addr, "consts section too small");
3879
      }
3880
      break;
3881
    }
3882
    case T_METADATA: {
3883
      Metadata* obj = con.get_metadata();
3884
      int metadata_index = _masm.oop_recorder()->find_index(obj);
3885
      constant_addr = _masm.address_constant((address) obj, metadata_Relocation::spec(metadata_index));
3886
      break;
3887
    }
3888
    default: ShouldNotReachHere();
3889
    }
3890
    assert(constant_addr, "consts section too small");
3891
    assert((constant_addr - _masm.code()->consts()->start()) == con.offset(),
3892
            err_msg_res("must be: %d == %d", (int) (constant_addr - _masm.code()->consts()->start()), (int)(con.offset())));
3893
  }
3894
}
3895

3896
int Compile::ConstantTable::find_offset(Constant& con) const {
3897
  int idx = _constants.find(con);
3898
  assert(idx != -1, "constant must be in constant table");
3899
  int offset = _constants.at(idx).offset();
3900
  assert(offset != -1, "constant table not emitted yet?");
3901
  return offset;
3902
}
3903

3904
void Compile::ConstantTable::add(Constant& con) {
3905
  if (con.can_be_reused()) {
3906
    int idx = _constants.find(con);
3907
    if (idx != -1 && _constants.at(idx).can_be_reused()) {
3908
      _constants.adr_at(idx)->inc_freq(con.freq());  // increase the frequency by the current value
3909
      return;
3910
    }
3911
  }
3912
  (void) _constants.append(con);
3913
}
3914

3915
Compile::Constant Compile::ConstantTable::add(MachConstantNode* n, BasicType type, jvalue value) {
3916
  Block* b = Compile::current()->cfg()->get_block_for_node(n);
3917
  Constant con(type, value, b->_freq);
3918
  add(con);
3919
  return con;
3920
}
3921

3922
Compile::Constant Compile::ConstantTable::add(Metadata* metadata) {
3923
  Constant con(metadata);
3924
  add(con);
3925
  return con;
3926
}
3927

3928
Compile::Constant Compile::ConstantTable::add(MachConstantNode* n, MachOper* oper) {
3929
  jvalue value;
3930
  BasicType type = oper->type()->basic_type();
3931
  switch (type) {
3932
  case T_LONG:    value.j = oper->constantL(); break;
3933
  case T_FLOAT:   value.f = oper->constantF(); break;
3934
  case T_DOUBLE:  value.d = oper->constantD(); break;
3935
  case T_OBJECT:
3936
  case T_ADDRESS: value.l = (jobject) oper->constant(); break;
3937
  case T_METADATA: return add((Metadata*)oper->constant()); break;
3938
  default: guarantee(false, err_msg_res("unhandled type: %s", type2name(type)));
3939
  }
3940
  return add(n, type, value);
3941
}
3942

3943
Compile::Constant Compile::ConstantTable::add_jump_table(MachConstantNode* n) {
3944
  jvalue value;
3945
  // We can use the node pointer here to identify the right jump-table
3946
  // as this method is called from Compile::Fill_buffer right before
3947
  // the MachNodes are emitted and the jump-table is filled (means the
3948
  // MachNode pointers do not change anymore).
3949
  value.l = (jobject) n;
3950
  Constant con(T_VOID, value, next_jump_table_freq(), false);  // Labels of a jump-table cannot be reused.
3951
  add(con);
3952
  return con;
3953
}
3954

3955
void Compile::ConstantTable::fill_jump_table(CodeBuffer& cb, MachConstantNode* n, GrowableArray<Label*> labels) const {
3956
  // If called from Compile::scratch_emit_size do nothing.
3957
  if (Compile::current()->in_scratch_emit_size())  return;
3958

3959
  assert(labels.is_nonempty(), "must be");
3960
  assert((uint) labels.length() == n->outcnt(), err_msg_res("must be equal: %d == %d", labels.length(), n->outcnt()));
3961

3962
  // Since MachConstantNode::constant_offset() also contains
3963
  // table_base_offset() we need to subtract the table_base_offset()
3964
  // to get the plain offset into the constant table.
3965
  int offset = n->constant_offset() - table_base_offset();
3966

3967
  MacroAssembler _masm(&cb);
3968
  address* jump_table_base = (address*) (_masm.code()->consts()->start() + offset);
3969

3970
  for (uint i = 0; i < n->outcnt(); i++) {
3971
    address* constant_addr = &jump_table_base[i];
3972
    assert(*constant_addr == (((address) n) + i), err_msg_res("all jump-table entries must contain adjusted node pointer: " INTPTR_FORMAT " == " INTPTR_FORMAT, p2i(*constant_addr), p2i(((address) n) + i)));
3973
    *constant_addr = cb.consts()->target(*labels.at(i), (address) constant_addr);
3974
    cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type);
3975
  }
3976
}
3977

3978
void Compile::dump_inlining() {
3979
  if (print_inlining() || print_intrinsics()) {
3980
    // Print inlining message for candidates that we couldn't inline
3981
    // for lack of space or non constant receiver
3982
    for (int i = 0; i < _late_inlines.length(); i++) {
3983
      CallGenerator* cg = _late_inlines.at(i);
3984
      cg->print_inlining_late("live nodes > LiveNodeCountInliningCutoff");
3985
    }
3986
    Unique_Node_List useful;
3987
    useful.push(root());
3988
    for (uint next = 0; next < useful.size(); ++next) {
3989
      Node* n  = useful.at(next);
3990
      if (n->is_Call() && n->as_Call()->generator() != NULL && n->as_Call()->generator()->call_node() == n) {
3991
        CallNode* call = n->as_Call();
3992
        CallGenerator* cg = call->generator();
3993
        cg->print_inlining_late("receiver not constant");
3994
      }
3995
      uint max = n->len();
3996
      for ( uint i = 0; i < max; ++i ) {
3997
        Node *m = n->in(i);
3998
        if ( m == NULL ) continue;
3999
        useful.push(m);
4000
      }
4001
    }
4002
    for (int i = 0; i < _print_inlining_list->length(); i++) {
4003
      tty->print("%s", _print_inlining_list->adr_at(i)->ss()->as_string());
4004
    }
4005
  }
4006
}
4007

4008
// Dump inlining replay data to the stream.
4009
// Don't change thread state and acquire any locks.
4010
void Compile::dump_inline_data(outputStream* out) {
4011
  InlineTree* inl_tree = ilt();
4012
  if (inl_tree != NULL) {
4013
    out->print(" inline %d", inl_tree->count());
4014
    inl_tree->dump_replay_data(out);
4015
  }
4016
}
4017

4018
int Compile::cmp_expensive_nodes(Node* n1, Node* n2) {
4019
  if (n1->Opcode() < n2->Opcode())      return -1;
4020
  else if (n1->Opcode() > n2->Opcode()) return 1;
4021

4022
  assert(n1->req() == n2->req(), err_msg_res("can't compare %s nodes: n1->req() = %d, n2->req() = %d", NodeClassNames[n1->Opcode()], n1->req(), n2->req()));
4023
  for (uint i = 1; i < n1->req(); i++) {
4024
    if (n1->in(i) < n2->in(i))      return -1;
4025
    else if (n1->in(i) > n2->in(i)) return 1;
4026
  }
4027

4028
  return 0;
4029
}
4030

4031
int Compile::cmp_expensive_nodes(Node** n1p, Node** n2p) {
4032
  Node* n1 = *n1p;
4033
  Node* n2 = *n2p;
4034

4035
  return cmp_expensive_nodes(n1, n2);
4036
}
4037

4038
void Compile::sort_expensive_nodes() {
4039
  if (!expensive_nodes_sorted()) {
4040
    _expensive_nodes->sort(cmp_expensive_nodes);
4041
  }
4042
}
4043

4044
bool Compile::expensive_nodes_sorted() const {
4045
  for (int i = 1; i < _expensive_nodes->length(); i++) {
4046
    if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i-1)) < 0) {
4047
      return false;
4048
    }
4049
  }
4050
  return true;
4051
}
4052

4053
bool Compile::should_optimize_expensive_nodes(PhaseIterGVN &igvn) {
4054
  if (_expensive_nodes->length() == 0) {
4055
    return false;
4056
  }
4057

4058
  assert(OptimizeExpensiveOps, "optimization off?");
4059

4060
  // Take this opportunity to remove dead nodes from the list
4061
  int j = 0;
4062
  for (int i = 0; i < _expensive_nodes->length(); i++) {
4063
    Node* n = _expensive_nodes->at(i);
4064
    if (!n->is_unreachable(igvn)) {
4065
      assert(n->is_expensive(), "should be expensive");
4066
      _expensive_nodes->at_put(j, n);
4067
      j++;
4068
    }
4069
  }
4070
  _expensive_nodes->trunc_to(j);
4071

4072
  // Then sort the list so that similar nodes are next to each other
4073
  // and check for at least two nodes of identical kind with same data
4074
  // inputs.
4075
  sort_expensive_nodes();
4076

4077
  for (int i = 0; i < _expensive_nodes->length()-1; i++) {
4078
    if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i+1)) == 0) {
4079
      return true;
4080
    }
4081
  }
4082

4083
  return false;
4084
}
4085

4086
void Compile::cleanup_expensive_nodes(PhaseIterGVN &igvn) {
4087
  if (_expensive_nodes->length() == 0) {
4088
    return;
4089
  }
4090

4091
  assert(OptimizeExpensiveOps, "optimization off?");
4092

4093
  // Sort to bring similar nodes next to each other and clear the
4094
  // control input of nodes for which there's only a single copy.
4095
  sort_expensive_nodes();
4096

4097
  int j = 0;
4098
  int identical = 0;
4099
  int i = 0;
4100
  for (; i < _expensive_nodes->length()-1; i++) {
4101
    assert(j <= i, "can't write beyond current index");
4102
    if (_expensive_nodes->at(i)->Opcode() == _expensive_nodes->at(i+1)->Opcode()) {
4103
      identical++;
4104
      _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
4105
      continue;
4106
    }
4107
    if (identical > 0) {
4108
      _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
4109
      identical = 0;
4110
    } else {
4111
      Node* n = _expensive_nodes->at(i);
4112
      igvn.hash_delete(n);
4113
      n->set_req(0, NULL);
4114
      igvn.hash_insert(n);
4115
    }
4116
  }
4117
  if (identical > 0) {
4118
    _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
4119
  } else if (_expensive_nodes->length() >= 1) {
4120
    Node* n = _expensive_nodes->at(i);
4121
    igvn.hash_delete(n);
4122
    n->set_req(0, NULL);
4123
    igvn.hash_insert(n);
4124
  }
4125
  _expensive_nodes->trunc_to(j);
4126
}
4127

4128
void Compile::add_expensive_node(Node * n) {
4129
  assert(!_expensive_nodes->contains(n), "duplicate entry in expensive list");
4130
  assert(n->is_expensive(), "expensive nodes with non-null control here only");
4131
  assert(!n->is_CFG() && !n->is_Mem(), "no cfg or memory nodes here");
4132
  if (OptimizeExpensiveOps) {
4133
    _expensive_nodes->append(n);
4134
  } else {
4135
    // Clear control input and let IGVN optimize expensive nodes if
4136
    // OptimizeExpensiveOps is off.
4137
    n->set_req(0, NULL);
4138
  }
4139
}
4140

4141
/**
4142
 * Remove the speculative part of types and clean up the graph
4143
 */
4144
void Compile::remove_speculative_types(PhaseIterGVN &igvn) {
4145
  if (UseTypeSpeculation) {
4146
    Unique_Node_List worklist;
4147
    worklist.push(root());
4148
    int modified = 0;
4149
    // Go over all type nodes that carry a speculative type, drop the
4150
    // speculative part of the type and enqueue the node for an igvn
4151
    // which may optimize it out.
4152
    for (uint next = 0; next < worklist.size(); ++next) {
4153
      Node *n  = worklist.at(next);
4154
      if (n->is_Type()) {
4155
        TypeNode* tn = n->as_Type();
4156
        const Type* t = tn->type();
4157
        const Type* t_no_spec = t->remove_speculative();
4158
        if (t_no_spec != t) {
4159
          bool in_hash = igvn.hash_delete(n);
4160
          assert(in_hash || n->hash() == Node::NO_HASH, "node should be in igvn hash table");
4161
          tn->set_type(t_no_spec);
4162
          igvn.hash_insert(n);
4163
          igvn._worklist.push(n); // give it a chance to go away
4164
          modified++;
4165
        }
4166
      }
4167
      uint max = n->len();
4168
      for( uint i = 0; i < max; ++i ) {
4169
        Node *m = n->in(i);
4170
        if (not_a_node(m))  continue;
4171
        worklist.push(m);
4172
      }
4173
    }
4174
    // Drop the speculative part of all types in the igvn's type table
4175
    igvn.remove_speculative_types();
4176
    if (modified > 0) {
4177
      igvn.optimize();
4178
    }
4179
#ifdef ASSERT
4180
    // Verify that after the IGVN is over no speculative type has resurfaced
4181
    worklist.clear();
4182
    worklist.push(root());
4183
    for (uint next = 0; next < worklist.size(); ++next) {
4184
      Node *n  = worklist.at(next);
4185
      const Type* t = igvn.type_or_null(n);
4186
      assert((t == NULL) || (t == t->remove_speculative()), "no more speculative types");
4187
      if (n->is_Type()) {
4188
        t = n->as_Type()->type();
4189
        assert(t == t->remove_speculative(), "no more speculative types");
4190
      }
4191
      uint max = n->len();
4192
      for( uint i = 0; i < max; ++i ) {
4193
        Node *m = n->in(i);
4194
        if (not_a_node(m))  continue;
4195
        worklist.push(m);
4196
      }
4197
    }
4198
    igvn.check_no_speculative_types();
4199
#endif
4200
  }
4201
}
4202

4203
// Convert integer value to a narrowed long type dependent on ctrl (for example, a range check)
4204
Node* Compile::constrained_convI2L(PhaseGVN* phase, Node* value, const TypeInt* itype, Node* ctrl) {
4205
  if (ctrl != NULL) {
4206
    // Express control dependency by a CastII node with a narrow type.
4207
    value = new (phase->C) CastIINode(value, itype, false, true /* range check dependency */);
4208
    // Make the CastII node dependent on the control input to prevent the narrowed ConvI2L
4209
    // node from floating above the range check during loop optimizations. Otherwise, the
4210
    // ConvI2L node may be eliminated independently of the range check, causing the data path
4211
    // to become TOP while the control path is still there (although it's unreachable).
4212
    value->set_req(0, ctrl);
4213
    // Save CastII node to remove it after loop optimizations.
4214
    phase->C->add_range_check_cast(value);
4215
    value = phase->transform(value);
4216
  }
4217
  const TypeLong* ltype = TypeLong::make(itype->_lo, itype->_hi, itype->_widen);
4218
  return phase->transform(new (phase->C) ConvI2LNode(value, ltype));
4219
}
4220

4221
// Auxiliary method to support randomized stressing/fuzzing.
4222
//
4223
// This method can be called the arbitrary number of times, with current count
4224
// as the argument. The logic allows selecting a single candidate from the
4225
// running list of candidates as follows:
4226
//    int count = 0;
4227
//    Cand* selected = null;
4228
//    while(cand = cand->next()) {
4229
//      if (randomized_select(++count)) {
4230
//        selected = cand;
4231
//      }
4232
//    }
4233
//
4234
// Including count equalizes the chances any candidate is "selected".
4235
// This is useful when we don't have the complete list of candidates to choose
4236
// from uniformly. In this case, we need to adjust the randomicity of the
4237
// selection, or else we will end up biasing the selection towards the latter
4238
// candidates.
4239
//
4240
// Quick back-envelope calculation shows that for the list of n candidates
4241
// the equal probability for the candidate to persist as "best" can be
4242
// achieved by replacing it with "next" k-th candidate with the probability
4243
// of 1/k. It can be easily shown that by the end of the run, the
4244
// probability for any candidate is converged to 1/n, thus giving the
4245
// uniform distribution among all the candidates.
4246
//
4247
// We don't care about the domain size as long as (RANDOMIZED_DOMAIN / count) is large.
4248
#define RANDOMIZED_DOMAIN_POW 29
4249
#define RANDOMIZED_DOMAIN (1 << RANDOMIZED_DOMAIN_POW)
4250
#define RANDOMIZED_DOMAIN_MASK ((1 << (RANDOMIZED_DOMAIN_POW + 1)) - 1)
4251
bool Compile::randomized_select(int count) {
4252
  assert(count > 0, "only positive");
4253
  return (os::random() & RANDOMIZED_DOMAIN_MASK) < (RANDOMIZED_DOMAIN / count);
4254
}
4255

4256
void Compile::shenandoah_eliminate_g1_wb_pre(Node* call, PhaseIterGVN* igvn) {
4257
  assert(UseShenandoahGC && call->is_g1_wb_pre_call(), "");
4258
  Node* c = call->as_Call()->proj_out(TypeFunc::Control);
4259
  c = c->unique_ctrl_out();
4260
  assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?");
4261
  c = c->unique_ctrl_out();
4262
  assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?");
4263
  Node* iff = c->in(1)->is_IfProj() ? c->in(1)->in(0) : c->in(2)->in(0);
4264
  assert(iff->is_If(), "expect test");
4265
  if (!iff->is_shenandoah_marking_if(igvn)) {
4266
    c = c->unique_ctrl_out();
4267
    assert(c->is_Region() && c->req() == 3, "where's the pre barrier control flow?");
4268
    iff = c->in(1)->is_IfProj() ? c->in(1)->in(0) : c->in(2)->in(0);
4269
    assert(iff->is_shenandoah_marking_if(igvn), "expect marking test");
4270
  }
4271
  Node* cmpx = iff->in(1)->in(1);
4272
  igvn->replace_node(cmpx, igvn->makecon(TypeInt::CC_EQ));
4273
  igvn->rehash_node_delayed(call);
4274
  call->del_req(call->req()-1);
4275
}
4276

4277
Product

Resources

Company