CoCalc -- concurrentMark.cpp

GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
³⁸⁹²⁰ views
1
/*
2
 * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
 *
5
 * This code is free software; you can redistribute it and/or modify it
6
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.
8
 *
9
 * This code is distributed in the hope that it will be useful, but WITHOUT
10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12
 * version 2 for more details (a copy is included in the LICENSE file that
13
 * accompanied this code).
14
 *
15
 * You should have received a copy of the GNU General Public License version
16
 * 2 along with this work; if not, write to the Free Software Foundation,
17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
 *
19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
 * or visit www.oracle.com if you need additional information or have any
21
 * questions.
22
 *
23
 */
24

25
#include "precompiled.hpp"
26
#include "classfile/metadataOnStackMark.hpp"
27
#include "classfile/symbolTable.hpp"
28
#include "code/codeCache.hpp"
29
#include "gc_implementation/g1/concurrentMark.inline.hpp"
30
#include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
31
#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
32
#include "gc_implementation/g1/g1CollectorPolicy.hpp"
33
#include "gc_implementation/g1/g1ErgoVerbose.hpp"
34
#include "gc_implementation/g1/g1Log.hpp"
35
#include "gc_implementation/g1/g1OopClosures.inline.hpp"
36
#include "gc_implementation/g1/g1RemSet.hpp"
37
#include "gc_implementation/g1/heapRegion.inline.hpp"
38
#include "gc_implementation/g1/heapRegionManager.inline.hpp"
39
#include "gc_implementation/g1/heapRegionRemSet.hpp"
40
#include "gc_implementation/g1/heapRegionSet.inline.hpp"
41
#include "gc_implementation/shared/vmGCOperations.hpp"
42
#include "gc_implementation/shared/gcTimer.hpp"
43
#include "gc_implementation/shared/gcTrace.hpp"
44
#include "gc_implementation/shared/gcTraceTime.hpp"
45
#include "memory/allocation.hpp"
46
#include "memory/genOopClosures.inline.hpp"
47
#include "memory/referencePolicy.hpp"
48
#include "memory/resourceArea.hpp"
49
#include "oops/oop.inline.hpp"
50
#include "runtime/handles.inline.hpp"
51
#include "runtime/java.hpp"
52
#include "runtime/prefetch.inline.hpp"
53
#include "services/memTracker.hpp"
54

55
// Concurrent marking bit map wrapper
56

57
CMBitMapRO::CMBitMapRO(int shifter) :
58
  _bm(),
59
  _shifter(shifter) {
60
  _bmStartWord = 0;
61
  _bmWordSize = 0;
62
}
63

64
HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
65
                                               const HeapWord* limit) const {
66
  // First we must round addr *up* to a possible object boundary.
67
  addr = (HeapWord*)align_size_up((intptr_t)addr,
68
                                  HeapWordSize << _shifter);
69
  size_t addrOffset = heapWordToOffset(addr);
70
  if (limit == NULL) {
71
    limit = _bmStartWord + _bmWordSize;
72
  }
73
  size_t limitOffset = heapWordToOffset(limit);
74
  size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
75
  HeapWord* nextAddr = offsetToHeapWord(nextOffset);
76
  assert(nextAddr >= addr, "get_next_one postcondition");
77
  assert(nextAddr == limit || isMarked(nextAddr),
78
         "get_next_one postcondition");
79
  return nextAddr;
80
}
81

82
HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
83
                                                 const HeapWord* limit) const {
84
  size_t addrOffset = heapWordToOffset(addr);
85
  if (limit == NULL) {
86
    limit = _bmStartWord + _bmWordSize;
87
  }
88
  size_t limitOffset = heapWordToOffset(limit);
89
  size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
90
  HeapWord* nextAddr = offsetToHeapWord(nextOffset);
91
  assert(nextAddr >= addr, "get_next_one postcondition");
92
  assert(nextAddr == limit || !isMarked(nextAddr),
93
         "get_next_one postcondition");
94
  return nextAddr;
95
}
96

97
int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
98
  assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
99
  return (int) (diff >> _shifter);
100
}
101

102
#ifndef PRODUCT
103
bool CMBitMapRO::covers(MemRegion heap_rs) const {
104
  // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
105
  assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
106
         "size inconsistency");
107
  return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
108
         _bmWordSize  == heap_rs.word_size();
109
}
110
#endif
111

112
void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
113
  _bm.print_on_error(st, prefix);
114
}
115

116
size_t CMBitMap::compute_size(size_t heap_size) {
117
  return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
118
}
119

120
size_t CMBitMap::mark_distance() {
121
  return MinObjAlignmentInBytes * BitsPerByte;
122
}
123

124
void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
125
  _bmStartWord = heap.start();
126
  _bmWordSize = heap.word_size();
127

128
  _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
129
  _bm.set_size(_bmWordSize >> _shifter);
130

131
  storage->set_mapping_changed_listener(&_listener);
132
}
133

134
void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
135
  if (zero_filled) {
136
    return;
137
  }
138
  // We need to clear the bitmap on commit, removing any existing information.
139
  MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
140
  _bm->clearRange(mr);
141
}
142

143
// Closure used for clearing the given mark bitmap.
144
class ClearBitmapHRClosure : public HeapRegionClosure {
145
 private:
146
  ConcurrentMark* _cm;
147
  CMBitMap* _bitmap;
148
  bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
149
 public:
150
  ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
151
    assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
152
  }
153

154
  virtual bool doHeapRegion(HeapRegion* r) {
155
    size_t const chunk_size_in_words = M / HeapWordSize;
156

157
    HeapWord* cur = r->bottom();
158
    HeapWord* const end = r->end();
159

160
    while (cur < end) {
161
      MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
162
      _bitmap->clearRange(mr);
163

164
      cur += chunk_size_in_words;
165

166
      // Abort iteration if after yielding the marking has been aborted.
167
      if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
168
        return true;
169
      }
170
      // Repeat the asserts from before the start of the closure. We will do them
171
      // as asserts here to minimize their overhead on the product. However, we
172
      // will have them as guarantees at the beginning / end of the bitmap
173
      // clearing to get some checking in the product.
174
      assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
175
      assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
176
    }
177

178
    return false;
179
  }
180
};
181

182
void CMBitMap::clearAll() {
183
  ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
184
  G1CollectedHeap::heap()->heap_region_iterate(&cl);
185
  guarantee(cl.complete(), "Must have completed iteration.");
186
  return;
187
}
188

189
void CMBitMap::markRange(MemRegion mr) {
190
  mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
191
  assert(!mr.is_empty(), "unexpected empty region");
192
  assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
193
          ((HeapWord *) mr.end())),
194
         "markRange memory region end is not card aligned");
195
  // convert address range into offset range
196
  _bm.at_put_range(heapWordToOffset(mr.start()),
197
                   heapWordToOffset(mr.end()), true);
198
}
199

200
void CMBitMap::clearRange(MemRegion mr) {
201
  mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
202
  assert(!mr.is_empty(), "unexpected empty region");
203
  // convert address range into offset range
204
  _bm.at_put_range(heapWordToOffset(mr.start()),
205
                   heapWordToOffset(mr.end()), false);
206
}
207

208
MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
209
                                            HeapWord* end_addr) {
210
  HeapWord* start = getNextMarkedWordAddress(addr);
211
  start = MIN2(start, end_addr);
212
  HeapWord* end   = getNextUnmarkedWordAddress(start);
213
  end = MIN2(end, end_addr);
214
  assert(start <= end, "Consistency check");
215
  MemRegion mr(start, end);
216
  if (!mr.is_empty()) {
217
    clearRange(mr);
218
  }
219
  return mr;
220
}
221

222
CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
223
  _base(NULL), _cm(cm)
224
#ifdef ASSERT
225
  , _drain_in_progress(false)
226
  , _drain_in_progress_yields(false)
227
#endif
228
{}
229

230
bool CMMarkStack::allocate(size_t capacity) {
231
  // allocate a stack of the requisite depth
232
  ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
233
  if (!rs.is_reserved()) {
234
    warning("ConcurrentMark MarkStack allocation failure");
235
    return false;
236
  }
237
  MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
238
  if (!_virtual_space.initialize(rs, rs.size())) {
239
    warning("ConcurrentMark MarkStack backing store failure");
240
    // Release the virtual memory reserved for the marking stack
241
    rs.release();
242
    return false;
243
  }
244
  assert(_virtual_space.committed_size() == rs.size(),
245
         "Didn't reserve backing store for all of ConcurrentMark stack?");
246
  _base = (oop*) _virtual_space.low();
247
  setEmpty();
248
  _capacity = (jint) capacity;
249
  _saved_index = -1;
250
  _should_expand = false;
251
  NOT_PRODUCT(_max_depth = 0);
252
  return true;
253
}
254

255
void CMMarkStack::expand() {
256
  // Called, during remark, if we've overflown the marking stack during marking.
257
  assert(isEmpty(), "stack should been emptied while handling overflow");
258
  assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
259
  // Clear expansion flag
260
  _should_expand = false;
261
  if (_capacity == (jint) MarkStackSizeMax) {
262
    if (PrintGCDetails && Verbose) {
263
      gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
264
    }
265
    return;
266
  }
267
  // Double capacity if possible
268
  jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
269
  // Do not give up existing stack until we have managed to
270
  // get the double capacity that we desired.
271
  ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
272
                                                           sizeof(oop)));
273
  if (rs.is_reserved()) {
274
    // Release the backing store associated with old stack
275
    _virtual_space.release();
276
    // Reinitialize virtual space for new stack
277
    if (!_virtual_space.initialize(rs, rs.size())) {
278
      fatal("Not enough swap for expanded marking stack capacity");
279
    }
280
    _base = (oop*)(_virtual_space.low());
281
    _index = 0;
282
    _capacity = new_capacity;
283
  } else {
284
    if (PrintGCDetails && Verbose) {
285
      // Failed to double capacity, continue;
286
      gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
287
                          SIZE_FORMAT "K to " SIZE_FORMAT "K",
288
                          _capacity / K, new_capacity / K);
289
    }
290
  }
291
}
292

293
void CMMarkStack::set_should_expand() {
294
  // If we're resetting the marking state because of an
295
  // marking stack overflow, record that we should, if
296
  // possible, expand the stack.
297
  _should_expand = _cm->has_overflown();
298
}
299

300
CMMarkStack::~CMMarkStack() {
301
  if (_base != NULL) {
302
    _base = NULL;
303
    _virtual_space.release();
304
  }
305
}
306

307
void CMMarkStack::par_push(oop ptr) {
308
  while (true) {
309
    if (isFull()) {
310
      _overflow = true;
311
      return;
312
    }
313
    // Otherwise...
314
    jint index = _index;
315
    jint next_index = index+1;
316
    jint res = Atomic::cmpxchg(next_index, &_index, index);
317
    if (res == index) {
318
      _base[index] = ptr;
319
      // Note that we don't maintain this atomically.  We could, but it
320
      // doesn't seem necessary.
321
      NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
322
      return;
323
    }
324
    // Otherwise, we need to try again.
325
  }
326
}
327

328
void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
329
  while (true) {
330
    if (isFull()) {
331
      _overflow = true;
332
      return;
333
    }
334
    // Otherwise...
335
    jint index = _index;
336
    jint next_index = index + n;
337
    if (next_index > _capacity) {
338
      _overflow = true;
339
      return;
340
    }
341
    jint res = Atomic::cmpxchg(next_index, &_index, index);
342
    if (res == index) {
343
      for (int i = 0; i < n; i++) {
344
        int  ind = index + i;
345
        assert(ind < _capacity, "By overflow test above.");
346
        _base[ind] = ptr_arr[i];
347
      }
348
      NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
349
      return;
350
    }
351
    // Otherwise, we need to try again.
352
  }
353
}
354

355
void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
356
  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
357
  jint start = _index;
358
  jint next_index = start + n;
359
  if (next_index > _capacity) {
360
    _overflow = true;
361
    return;
362
  }
363
  // Otherwise.
364
  _index = next_index;
365
  for (int i = 0; i < n; i++) {
366
    int ind = start + i;
367
    assert(ind < _capacity, "By overflow test above.");
368
    _base[ind] = ptr_arr[i];
369
  }
370
  NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
371
}
372

373
bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
374
  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
375
  jint index = _index;
376
  if (index == 0) {
377
    *n = 0;
378
    return false;
379
  } else {
380
    int k = MIN2(max, index);
381
    jint  new_ind = index - k;
382
    for (int j = 0; j < k; j++) {
383
      ptr_arr[j] = _base[new_ind + j];
384
    }
385
    _index = new_ind;
386
    *n = k;
387
    return true;
388
  }
389
}
390

391
template<class OopClosureClass>
392
bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
393
  assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
394
         || SafepointSynchronize::is_at_safepoint(),
395
         "Drain recursion must be yield-safe.");
396
  bool res = true;
397
  debug_only(_drain_in_progress = true);
398
  debug_only(_drain_in_progress_yields = yield_after);
399
  while (!isEmpty()) {
400
    oop newOop = pop();
401
    assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
402
    assert(newOop->is_oop(), "Expected an oop");
403
    assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
404
           "only grey objects on this stack");
405
    newOop->oop_iterate(cl);
406
    if (yield_after && _cm->do_yield_check()) {
407
      res = false;
408
      break;
409
    }
410
  }
411
  debug_only(_drain_in_progress = false);
412
  return res;
413
}
414

415
void CMMarkStack::note_start_of_gc() {
416
  assert(_saved_index == -1,
417
         "note_start_of_gc()/end_of_gc() bracketed incorrectly");
418
  _saved_index = _index;
419
}
420

421
void CMMarkStack::note_end_of_gc() {
422
  // This is intentionally a guarantee, instead of an assert. If we
423
  // accidentally add something to the mark stack during GC, it
424
  // will be a correctness issue so it's better if we crash. we'll
425
  // only check this once per GC anyway, so it won't be a performance
426
  // issue in any way.
427
  guarantee(_saved_index == _index,
428
            err_msg("saved index: %d index: %d", _saved_index, _index));
429
  _saved_index = -1;
430
}
431

432
void CMMarkStack::oops_do(OopClosure* f) {
433
  assert(_saved_index == _index,
434
         err_msg("saved index: %d index: %d", _saved_index, _index));
435
  for (int i = 0; i < _index; i += 1) {
436
    f->do_oop(&_base[i]);
437
  }
438
}
439

440
CMRootRegions::CMRootRegions() :
441
  _young_list(NULL), _cm(NULL), _scan_in_progress(false),
442
  _should_abort(false),  _next_survivor(NULL) { }
443

444
void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
445
  _young_list = g1h->young_list();
446
  _cm = cm;
447
}
448

449
void CMRootRegions::prepare_for_scan() {
450
  assert(!scan_in_progress(), "pre-condition");
451

452
  // Currently, only survivors can be root regions.
453
  assert(_next_survivor == NULL, "pre-condition");
454
  _next_survivor = _young_list->first_survivor_region();
455
  _scan_in_progress = (_next_survivor != NULL);
456
  _should_abort = false;
457
}
458

459
HeapRegion* CMRootRegions::claim_next() {
460
  if (_should_abort) {
461
    // If someone has set the should_abort flag, we return NULL to
462
    // force the caller to bail out of their loop.
463
    return NULL;
464
  }
465

466
  // Currently, only survivors can be root regions.
467
  HeapRegion* res = _next_survivor;
468
  if (res != NULL) {
469
    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
470
    // Read it again in case it changed while we were waiting for the lock.
471
    res = _next_survivor;
472
    if (res != NULL) {
473
      if (res == _young_list->last_survivor_region()) {
474
        // We just claimed the last survivor so store NULL to indicate
475
        // that we're done.
476
        _next_survivor = NULL;
477
      } else {
478
        _next_survivor = res->get_next_young_region();
479
      }
480
    } else {
481
      // Someone else claimed the last survivor while we were trying
482
      // to take the lock so nothing else to do.
483
    }
484
  }
485
  assert(res == NULL || res->is_survivor(), "post-condition");
486

487
  return res;
488
}
489

490
void CMRootRegions::scan_finished() {
491
  assert(scan_in_progress(), "pre-condition");
492

493
  // Currently, only survivors can be root regions.
494
  if (!_should_abort) {
495
    assert(_next_survivor == NULL, "we should have claimed all survivors");
496
  }
497
  _next_survivor = NULL;
498

499
  {
500
    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
501
    _scan_in_progress = false;
502
    RootRegionScan_lock->notify_all();
503
  }
504
}
505

506
bool CMRootRegions::wait_until_scan_finished() {
507
  if (!scan_in_progress()) return false;
508

509
  {
510
    MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
511
    while (scan_in_progress()) {
512
      RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
513
    }
514
  }
515
  return true;
516
}
517

518
#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
519
#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
520
#endif // _MSC_VER
521

522
uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
523
  return MAX2((n_par_threads + 2) / 4, 1U);
524
}
525

526
ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
527
  _g1h(g1h),
528
  _markBitMap1(),
529
  _markBitMap2(),
530
  _parallel_marking_threads(0),
531
  _max_parallel_marking_threads(0),
532
  _sleep_factor(0.0),
533
  _marking_task_overhead(1.0),
534
  _cleanup_sleep_factor(0.0),
535
  _cleanup_task_overhead(1.0),
536
  _cleanup_list("Cleanup List"),
537
  _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
538
  _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
539
            CardTableModRefBS::card_shift,
540
            false /* in_resource_area*/),
541

542
  _prevMarkBitMap(&_markBitMap1),
543
  _nextMarkBitMap(&_markBitMap2),
544

545
  _markStack(this),
546
  // _finger set in set_non_marking_state
547

548
  _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
549
  // _active_tasks set in set_non_marking_state
550
  // _tasks set inside the constructor
551
  _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
552
  _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
553

554
  _has_overflown(false),
555
  _concurrent(false),
556
  _has_aborted(false),
557
  _aborted_gc_id(GCId::undefined()),
558
  _restart_for_overflow(false),
559
  _concurrent_marking_in_progress(false),
560

561
  // _verbose_level set below
562

563
  _init_times(),
564
  _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
565
  _cleanup_times(),
566
  _total_counting_time(0.0),
567
  _total_rs_scrub_time(0.0),
568

569
  _parallel_workers(NULL),
570

571
  _count_card_bitmaps(NULL),
572
  _count_marked_bytes(NULL),
573
  _completed_initialization(false) {
574
  CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
575
  if (verbose_level < no_verbose) {
576
    verbose_level = no_verbose;
577
  }
578
  if (verbose_level > high_verbose) {
579
    verbose_level = high_verbose;
580
  }
581
  _verbose_level = verbose_level;
582

583
  if (verbose_low()) {
584
    gclog_or_tty->print_cr("[global] init, heap start = " PTR_FORMAT", "
585
                           "heap end = " INTPTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
586
  }
587

588
  _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
589
  _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
590

591
  // Create & start a ConcurrentMark thread.
592
  _cmThread = new ConcurrentMarkThread(this);
593
  assert(cmThread() != NULL, "CM Thread should have been created");
594
  assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
595
  if (_cmThread->osthread() == NULL) {
596
      vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
597
  }
598

599
  assert(CGC_lock != NULL, "Where's the CGC_lock?");
600
  assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
601
  assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
602

603
  SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
604
  satb_qs.set_buffer_size(G1SATBBufferSize);
605

606
  _root_regions.init(_g1h, this);
607

608
  if (ConcGCThreads > ParallelGCThreads) {
609
    warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
610
            "than ParallelGCThreads (" UINTX_FORMAT ").",
611
            ConcGCThreads, ParallelGCThreads);
612
    return;
613
  }
614
  if (ParallelGCThreads == 0) {
615
    // if we are not running with any parallel GC threads we will not
616
    // spawn any marking threads either
617
    _parallel_marking_threads =       0;
618
    _max_parallel_marking_threads =   0;
619
    _sleep_factor             =     0.0;
620
    _marking_task_overhead    =     1.0;
621
  } else {
622
    if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
623
      // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
624
      // if both are set
625
      _sleep_factor             = 0.0;
626
      _marking_task_overhead    = 1.0;
627
    } else if (G1MarkingOverheadPercent > 0) {
628
      // We will calculate the number of parallel marking threads based
629
      // on a target overhead with respect to the soft real-time goal
630
      double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
631
      double overall_cm_overhead =
632
        (double) MaxGCPauseMillis * marking_overhead /
633
        (double) GCPauseIntervalMillis;
634
      double cpu_ratio = 1.0 / os::initial_active_processor_count();
635
      double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
636
      double marking_task_overhead =
637
        overall_cm_overhead / marking_thread_num * os::initial_active_processor_count();
638
      double sleep_factor =
639
                         (1.0 - marking_task_overhead) / marking_task_overhead;
640

641
      FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
642
      _sleep_factor             = sleep_factor;
643
      _marking_task_overhead    = marking_task_overhead;
644
    } else {
645
      // Calculate the number of parallel marking threads by scaling
646
      // the number of parallel GC threads.
647
      uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
648
      FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
649
      _sleep_factor             = 0.0;
650
      _marking_task_overhead    = 1.0;
651
    }
652

653
    assert(ConcGCThreads > 0, "Should have been set");
654
    _parallel_marking_threads = (uint) ConcGCThreads;
655
    _max_parallel_marking_threads = _parallel_marking_threads;
656

657
    if (parallel_marking_threads() > 1) {
658
      _cleanup_task_overhead = 1.0;
659
    } else {
660
      _cleanup_task_overhead = marking_task_overhead();
661
    }
662
    _cleanup_sleep_factor =
663
                     (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
664

665
#if 0
666
    gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
667
    gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
668
    gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
669
    gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
670
    gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
671
#endif
672

673
    guarantee(parallel_marking_threads() > 0, "peace of mind");
674
    _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
675
         _max_parallel_marking_threads, false, true);
676
    if (_parallel_workers == NULL) {
677
      vm_exit_during_initialization("Failed necessary allocation.");
678
    } else {
679
      _parallel_workers->initialize_workers();
680
    }
681
  }
682

683
  if (FLAG_IS_DEFAULT(MarkStackSize)) {
684
    uintx mark_stack_size =
685
      MIN2(MarkStackSizeMax,
686
          MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
687
    // Verify that the calculated value for MarkStackSize is in range.
688
    // It would be nice to use the private utility routine from Arguments.
689
    if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
690
      warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
691
              "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
692
              mark_stack_size, (uintx) 1, MarkStackSizeMax);
693
      return;
694
    }
695
    FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
696
  } else {
697
    // Verify MarkStackSize is in range.
698
    if (FLAG_IS_CMDLINE(MarkStackSize)) {
699
      if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
700
        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
701
          warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
702
                  "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
703
                  MarkStackSize, (uintx) 1, MarkStackSizeMax);
704
          return;
705
        }
706
      } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
707
        if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
708
          warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
709
                  " or for MarkStackSizeMax (" UINTX_FORMAT ")",
710
                  MarkStackSize, MarkStackSizeMax);
711
          return;
712
        }
713
      }
714
    }
715
  }
716

717
  if (!_markStack.allocate(MarkStackSize)) {
718
    warning("Failed to allocate CM marking stack");
719
    return;
720
  }
721

722
  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
723
  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
724

725
  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
726
  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
727

728
  BitMap::idx_t card_bm_size = _card_bm.size();
729

730
  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
731
  _active_tasks = _max_worker_id;
732

733
  size_t max_regions = (size_t) _g1h->max_regions();
734
  for (uint i = 0; i < _max_worker_id; ++i) {
735
    CMTaskQueue* task_queue = new CMTaskQueue();
736
    task_queue->initialize();
737
    _task_queues->register_queue(i, task_queue);
738

739
    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
740
    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
741

742
    _tasks[i] = new CMTask(i, this,
743
                           _count_marked_bytes[i],
744
                           &_count_card_bitmaps[i],
745
                           task_queue, _task_queues);
746

747
    _accum_task_vtime[i] = 0.0;
748
  }
749

750
  // Calculate the card number for the bottom of the heap. Used
751
  // in biasing indexes into the accounting card bitmaps.
752
  _heap_bottom_card_num =
753
    intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
754
                                CardTableModRefBS::card_shift);
755

756
  // Clear all the liveness counting data
757
  clear_all_count_data();
758

759
  // so that the call below can read a sensible value
760
  _heap_start = g1h->reserved_region().start();
761
  set_non_marking_state();
762
  _completed_initialization = true;
763
}
764

765
void ConcurrentMark::reset() {
766
  // Starting values for these two. This should be called in a STW
767
  // phase.
768
  MemRegion reserved = _g1h->g1_reserved();
769
  _heap_start = reserved.start();
770
  _heap_end   = reserved.end();
771

772
  // Separated the asserts so that we know which one fires.
773
  assert(_heap_start != NULL, "heap bounds should look ok");
774
  assert(_heap_end != NULL, "heap bounds should look ok");
775
  assert(_heap_start < _heap_end, "heap bounds should look ok");
776

777
  // Reset all the marking data structures and any necessary flags
778
  reset_marking_state();
779

780
  if (verbose_low()) {
781
    gclog_or_tty->print_cr("[global] resetting");
782
  }
783

784
  // We do reset all of them, since different phases will use
785
  // different number of active threads. So, it's easiest to have all
786
  // of them ready.
787
  for (uint i = 0; i < _max_worker_id; ++i) {
788
    _tasks[i]->reset(_nextMarkBitMap);
789
  }
790

791
  // we need this to make sure that the flag is on during the evac
792
  // pause with initial mark piggy-backed
793
  set_concurrent_marking_in_progress();
794
}
795

796

797
void ConcurrentMark::reset_marking_state(bool clear_overflow) {
798
  _markStack.set_should_expand();
799
  _markStack.setEmpty();        // Also clears the _markStack overflow flag
800
  if (clear_overflow) {
801
    clear_has_overflown();
802
  } else {
803
    assert(has_overflown(), "pre-condition");
804
  }
805
  _finger = _heap_start;
806

807
  for (uint i = 0; i < _max_worker_id; ++i) {
808
    CMTaskQueue* queue = _task_queues->queue(i);
809
    queue->set_empty();
810
  }
811
}
812

813
void ConcurrentMark::set_concurrency(uint active_tasks) {
814
  assert(active_tasks <= _max_worker_id, "we should not have more");
815

816
  _active_tasks = active_tasks;
817
  // Need to update the three data structures below according to the
818
  // number of active threads for this phase.
819
  _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
820
  _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
821
  _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
822
}
823

824
void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
825
  set_concurrency(active_tasks);
826

827
  _concurrent = concurrent;
828
  // We propagate this to all tasks, not just the active ones.
829
  for (uint i = 0; i < _max_worker_id; ++i)
830
    _tasks[i]->set_concurrent(concurrent);
831

832
  if (concurrent) {
833
    set_concurrent_marking_in_progress();
834
  } else {
835
    // We currently assume that the concurrent flag has been set to
836
    // false before we start remark. At this point we should also be
837
    // in a STW phase.
838
    assert(!concurrent_marking_in_progress(), "invariant");
839
    assert(out_of_regions(),
840
           err_msg("only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
841
                   p2i(_finger), p2i(_heap_end)));
842
  }
843
}
844

845
void ConcurrentMark::set_non_marking_state() {
846
  // We set the global marking state to some default values when we're
847
  // not doing marking.
848
  reset_marking_state();
849
  _active_tasks = 0;
850
  clear_concurrent_marking_in_progress();
851
}
852

853
ConcurrentMark::~ConcurrentMark() {
854
  // The ConcurrentMark instance is never freed.
855
  ShouldNotReachHere();
856
}
857

858
void ConcurrentMark::clearNextBitmap() {
859
  G1CollectedHeap* g1h = G1CollectedHeap::heap();
860

861
  // Make sure that the concurrent mark thread looks to still be in
862
  // the current cycle.
863
  guarantee(cmThread()->during_cycle(), "invariant");
864

865
  // We are finishing up the current cycle by clearing the next
866
  // marking bitmap and getting it ready for the next cycle. During
867
  // this time no other cycle can start. So, let's make sure that this
868
  // is the case.
869
  guarantee(!g1h->mark_in_progress(), "invariant");
870

871
  ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
872
  g1h->heap_region_iterate(&cl);
873

874
  // Clear the liveness counting data. If the marking has been aborted, the abort()
875
  // call already did that.
876
  if (cl.complete()) {
877
    clear_all_count_data();
878
  }
879

880
  // Repeat the asserts from above.
881
  guarantee(cmThread()->during_cycle(), "invariant");
882
  guarantee(!g1h->mark_in_progress(), "invariant");
883
}
884

885
class CheckBitmapClearHRClosure : public HeapRegionClosure {
886
  CMBitMap* _bitmap;
887
  bool _error;
888
 public:
889
  CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
890
  }
891

892
  virtual bool doHeapRegion(HeapRegion* r) {
893
    // This closure can be called concurrently to the mutator, so we must make sure
894
    // that the result of the getNextMarkedWordAddress() call is compared to the
895
    // value passed to it as limit to detect any found bits.
896
    // We can use the region's orig_end() for the limit and the comparison value
897
    // as it always contains the "real" end of the region that never changes and
898
    // has no side effects.
899
    // Due to the latter, there can also be no problem with the compiler generating
900
    // reloads of the orig_end() call.
901
    HeapWord* end = r->orig_end();
902
    return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
903
  }
904
};
905

906
bool ConcurrentMark::nextMarkBitmapIsClear() {
907
  CheckBitmapClearHRClosure cl(_nextMarkBitMap);
908
  _g1h->heap_region_iterate(&cl);
909
  return cl.complete();
910
}
911

912
class NoteStartOfMarkHRClosure: public HeapRegionClosure {
913
public:
914
  bool doHeapRegion(HeapRegion* r) {
915
    if (!r->continuesHumongous()) {
916
      r->note_start_of_marking();
917
    }
918
    return false;
919
  }
920
};
921

922
void ConcurrentMark::checkpointRootsInitialPre() {
923
  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
924
  G1CollectorPolicy* g1p = g1h->g1_policy();
925

926
  _has_aborted = false;
927

928
#ifndef PRODUCT
929
  if (G1PrintReachableAtInitialMark) {
930
    print_reachable("at-cycle-start",
931
                    VerifyOption_G1UsePrevMarking, true /* all */);
932
  }
933
#endif
934

935
  // Initialise marking structures. This has to be done in a STW phase.
936
  reset();
937

938
  // For each region note start of marking.
939
  NoteStartOfMarkHRClosure startcl;
940
  g1h->heap_region_iterate(&startcl);
941
}
942

943

944
void ConcurrentMark::checkpointRootsInitialPost() {
945
  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
946

947
  // If we force an overflow during remark, the remark operation will
948
  // actually abort and we'll restart concurrent marking. If we always
949
  // force an oveflow during remark we'll never actually complete the
950
  // marking phase. So, we initilize this here, at the start of the
951
  // cycle, so that at the remaining overflow number will decrease at
952
  // every remark and we'll eventually not need to cause one.
953
  force_overflow_stw()->init();
954

955
  // Start Concurrent Marking weak-reference discovery.
956
  ReferenceProcessor* rp = g1h->ref_processor_cm();
957
  // enable ("weak") refs discovery
958
  rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
959
  rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
960

961
  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
962
  // This is the start of  the marking cycle, we're expected all
963
  // threads to have SATB queues with active set to false.
964
  satb_mq_set.set_active_all_threads(true, /* new active value */
965
                                     false /* expected_active */);
966

967
  _root_regions.prepare_for_scan();
968

969
  // update_g1_committed() will be called at the end of an evac pause
970
  // when marking is on. So, it's also called at the end of the
971
  // initial-mark pause to update the heap end, if the heap expands
972
  // during it. No need to call it here.
973
}
974

975
/*
976
 * Notice that in the next two methods, we actually leave the STS
977
 * during the barrier sync and join it immediately afterwards. If we
978
 * do not do this, the following deadlock can occur: one thread could
979
 * be in the barrier sync code, waiting for the other thread to also
980
 * sync up, whereas another one could be trying to yield, while also
981
 * waiting for the other threads to sync up too.
982
 *
983
 * Note, however, that this code is also used during remark and in
984
 * this case we should not attempt to leave / enter the STS, otherwise
985
 * we'll either hit an asseert (debug / fastdebug) or deadlock
986
 * (product). So we should only leave / enter the STS if we are
987
 * operating concurrently.
988
 *
989
 * Because the thread that does the sync barrier has left the STS, it
990
 * is possible to be suspended for a Full GC or an evacuation pause
991
 * could occur. This is actually safe, since the entering the sync
992
 * barrier is one of the last things do_marking_step() does, and it
993
 * doesn't manipulate any data structures afterwards.
994
 */
995

996
void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
997
  if (verbose_low()) {
998
    gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
999
  }
1000

1001
  if (concurrent()) {
1002
    SuspendibleThreadSet::leave();
1003
  }
1004

1005
  bool barrier_aborted = !_first_overflow_barrier_sync.enter();
1006

1007
  if (concurrent()) {
1008
    SuspendibleThreadSet::join();
1009
  }
1010
  // at this point everyone should have synced up and not be doing any
1011
  // more work
1012

1013
  if (verbose_low()) {
1014
    if (barrier_aborted) {
1015
      gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
1016
    } else {
1017
      gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
1018
    }
1019
  }
1020

1021
  if (barrier_aborted) {
1022
    // If the barrier aborted we ignore the overflow condition and
1023
    // just abort the whole marking phase as quickly as possible.
1024
    return;
1025
  }
1026

1027
  // If we're executing the concurrent phase of marking, reset the marking
1028
  // state; otherwise the marking state is reset after reference processing,
1029
  // during the remark pause.
1030
  // If we reset here as a result of an overflow during the remark we will
1031
  // see assertion failures from any subsequent set_concurrency_and_phase()
1032
  // calls.
1033
  if (concurrent()) {
1034
    // let the task associated with with worker 0 do this
1035
    if (worker_id == 0) {
1036
      // task 0 is responsible for clearing the global data structures
1037
      // We should be here because of an overflow. During STW we should
1038
      // not clear the overflow flag since we rely on it being true when
1039
      // we exit this method to abort the pause and restart concurent
1040
      // marking.
1041
      reset_marking_state(true /* clear_overflow */);
1042
      force_overflow()->update();
1043

1044
      if (G1Log::fine()) {
1045
        gclog_or_tty->gclog_stamp(concurrent_gc_id());
1046
        gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1047
      }
1048
    }
1049
  }
1050

1051
  // after this, each task should reset its own data structures then
1052
  // then go into the second barrier
1053
}
1054

1055
void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1056
  if (verbose_low()) {
1057
    gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1058
  }
1059

1060
  if (concurrent()) {
1061
    SuspendibleThreadSet::leave();
1062
  }
1063

1064
  bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1065

1066
  if (concurrent()) {
1067
    SuspendibleThreadSet::join();
1068
  }
1069
  // at this point everything should be re-initialized and ready to go
1070

1071
  if (verbose_low()) {
1072
    if (barrier_aborted) {
1073
      gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1074
    } else {
1075
      gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1076
    }
1077
  }
1078
}
1079

1080
#ifndef PRODUCT
1081
void ForceOverflowSettings::init() {
1082
  _num_remaining = G1ConcMarkForceOverflow;
1083
  _force = false;
1084
  update();
1085
}
1086

1087
void ForceOverflowSettings::update() {
1088
  if (_num_remaining > 0) {
1089
    _num_remaining -= 1;
1090
    _force = true;
1091
  } else {
1092
    _force = false;
1093
  }
1094
}
1095

1096
bool ForceOverflowSettings::should_force() {
1097
  if (_force) {
1098
    _force = false;
1099
    return true;
1100
  } else {
1101
    return false;
1102
  }
1103
}
1104
#endif // !PRODUCT
1105

1106
class CMConcurrentMarkingTask: public AbstractGangTask {
1107
private:
1108
  ConcurrentMark*       _cm;
1109
  ConcurrentMarkThread* _cmt;
1110

1111
public:
1112
  void work(uint worker_id) {
1113
    assert(Thread::current()->is_ConcurrentGC_thread(),
1114
           "this should only be done by a conc GC thread");
1115
    ResourceMark rm;
1116

1117
    double start_vtime = os::elapsedVTime();
1118

1119
    SuspendibleThreadSet::join();
1120

1121
    assert(worker_id < _cm->active_tasks(), "invariant");
1122
    CMTask* the_task = _cm->task(worker_id);
1123
    the_task->record_start_time();
1124
    if (!_cm->has_aborted()) {
1125
      do {
1126
        double start_vtime_sec = os::elapsedVTime();
1127
        double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1128

1129
        the_task->do_marking_step(mark_step_duration_ms,
1130
                                  true  /* do_termination */,
1131
                                  false /* is_serial*/);
1132

1133
        double end_vtime_sec = os::elapsedVTime();
1134
        double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1135
        _cm->clear_has_overflown();
1136

1137
        _cm->do_yield_check(worker_id);
1138

1139
        jlong sleep_time_ms;
1140
        if (!_cm->has_aborted() && the_task->has_aborted()) {
1141
          sleep_time_ms =
1142
            (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1143
          SuspendibleThreadSet::leave();
1144
          os::sleep(Thread::current(), sleep_time_ms, false);
1145
          SuspendibleThreadSet::join();
1146
        }
1147
      } while (!_cm->has_aborted() && the_task->has_aborted());
1148
    }
1149
    the_task->record_end_time();
1150
    guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1151

1152
    SuspendibleThreadSet::leave();
1153

1154
    double end_vtime = os::elapsedVTime();
1155
    _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1156
  }
1157

1158
  CMConcurrentMarkingTask(ConcurrentMark* cm,
1159
                          ConcurrentMarkThread* cmt) :
1160
      AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1161

1162
  ~CMConcurrentMarkingTask() { }
1163
};
1164

1165
// Calculates the number of active workers for a concurrent
1166
// phase.
1167
uint ConcurrentMark::calc_parallel_marking_threads() {
1168
  if (G1CollectedHeap::use_parallel_gc_threads()) {
1169
    uint n_conc_workers = 0;
1170
    if (!UseDynamicNumberOfGCThreads ||
1171
        (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1172
         !ForceDynamicNumberOfGCThreads)) {
1173
      n_conc_workers = max_parallel_marking_threads();
1174
    } else {
1175
      n_conc_workers =
1176
        AdaptiveSizePolicy::calc_default_active_workers(
1177
                                     max_parallel_marking_threads(),
1178
                                     1, /* Minimum workers */
1179
                                     parallel_marking_threads(),
1180
                                     Threads::number_of_non_daemon_threads());
1181
      // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1182
      // that scaling has already gone into "_max_parallel_marking_threads".
1183
    }
1184
    assert(n_conc_workers > 0, "Always need at least 1");
1185
    return n_conc_workers;
1186
  }
1187
  // If we are not running with any parallel GC threads we will not
1188
  // have spawned any marking threads either. Hence the number of
1189
  // concurrent workers should be 0.
1190
  return 0;
1191
}
1192

1193
void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1194
  // Currently, only survivors can be root regions.
1195
  assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1196
  G1RootRegionScanClosure cl(_g1h, this, worker_id);
1197

1198
  const uintx interval = PrefetchScanIntervalInBytes;
1199
  HeapWord* curr = hr->bottom();
1200
  const HeapWord* end = hr->top();
1201
  while (curr < end) {
1202
    Prefetch::read(curr, interval);
1203
    oop obj = oop(curr);
1204
    int size = obj->oop_iterate(&cl);
1205
    assert(size == obj->size(), "sanity");
1206
    curr += size;
1207
  }
1208
}
1209

1210
class CMRootRegionScanTask : public AbstractGangTask {
1211
private:
1212
  ConcurrentMark* _cm;
1213

1214
public:
1215
  CMRootRegionScanTask(ConcurrentMark* cm) :
1216
    AbstractGangTask("Root Region Scan"), _cm(cm) { }
1217

1218
  void work(uint worker_id) {
1219
    assert(Thread::current()->is_ConcurrentGC_thread(),
1220
           "this should only be done by a conc GC thread");
1221

1222
    CMRootRegions* root_regions = _cm->root_regions();
1223
    HeapRegion* hr = root_regions->claim_next();
1224
    while (hr != NULL) {
1225
      _cm->scanRootRegion(hr, worker_id);
1226
      hr = root_regions->claim_next();
1227
    }
1228
  }
1229
};
1230

1231
void ConcurrentMark::scanRootRegions() {
1232
  // Start of concurrent marking.
1233
  ClassLoaderDataGraph::clear_claimed_marks();
1234

1235
  // scan_in_progress() will have been set to true only if there was
1236
  // at least one root region to scan. So, if it's false, we
1237
  // should not attempt to do any further work.
1238
  if (root_regions()->scan_in_progress()) {
1239
    _parallel_marking_threads = calc_parallel_marking_threads();
1240
    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1241
           "Maximum number of marking threads exceeded");
1242
    uint active_workers = MAX2(1U, parallel_marking_threads());
1243

1244
    CMRootRegionScanTask task(this);
1245
    if (use_parallel_marking_threads()) {
1246
      _parallel_workers->set_active_workers((int) active_workers);
1247
      _parallel_workers->run_task(&task);
1248
    } else {
1249
      task.work(0);
1250
    }
1251

1252
    // It's possible that has_aborted() is true here without actually
1253
    // aborting the survivor scan earlier. This is OK as it's
1254
    // mainly used for sanity checking.
1255
    root_regions()->scan_finished();
1256
  }
1257
}
1258

1259
void ConcurrentMark::markFromRoots() {
1260
  // we might be tempted to assert that:
1261
  // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1262
  //        "inconsistent argument?");
1263
  // However that wouldn't be right, because it's possible that
1264
  // a safepoint is indeed in progress as a younger generation
1265
  // stop-the-world GC happens even as we mark in this generation.
1266

1267
  _restart_for_overflow = false;
1268
  force_overflow_conc()->init();
1269

1270
  // _g1h has _n_par_threads
1271
  _parallel_marking_threads = calc_parallel_marking_threads();
1272
  assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1273
    "Maximum number of marking threads exceeded");
1274

1275
  uint active_workers = MAX2(1U, parallel_marking_threads());
1276

1277
  // Parallel task terminator is set in "set_concurrency_and_phase()"
1278
  set_concurrency_and_phase(active_workers, true /* concurrent */);
1279

1280
  CMConcurrentMarkingTask markingTask(this, cmThread());
1281
  if (use_parallel_marking_threads()) {
1282
    _parallel_workers->set_active_workers((int)active_workers);
1283
    // Don't set _n_par_threads because it affects MT in process_roots()
1284
    // and the decisions on that MT processing is made elsewhere.
1285
    assert(_parallel_workers->active_workers() > 0, "Should have been set");
1286
    _parallel_workers->run_task(&markingTask);
1287
  } else {
1288
    markingTask.work(0);
1289
  }
1290
  print_stats();
1291
}
1292

1293
void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1294
  // world is stopped at this checkpoint
1295
  assert(SafepointSynchronize::is_at_safepoint(),
1296
         "world should be stopped");
1297

1298
  G1CollectedHeap* g1h = G1CollectedHeap::heap();
1299

1300
  // If a full collection has happened, we shouldn't do this.
1301
  if (has_aborted()) {
1302
    g1h->set_marking_complete(); // So bitmap clearing isn't confused
1303
    return;
1304
  }
1305

1306
  SvcGCMarker sgcm(SvcGCMarker::OTHER);
1307

1308
  if (VerifyDuringGC) {
1309
    HandleMark hm;  // handle scope
1310
    Universe::heap()->prepare_for_verify();
1311
    Universe::verify(VerifyOption_G1UsePrevMarking,
1312
                     " VerifyDuringGC:(before)");
1313
  }
1314
  g1h->check_bitmaps("Remark Start");
1315

1316
  G1CollectorPolicy* g1p = g1h->g1_policy();
1317
  g1p->record_concurrent_mark_remark_start();
1318

1319
  double start = os::elapsedTime();
1320

1321
  checkpointRootsFinalWork();
1322

1323
  double mark_work_end = os::elapsedTime();
1324

1325
  weakRefsWork(clear_all_soft_refs);
1326

1327
  if (has_overflown()) {
1328
    // Oops.  We overflowed.  Restart concurrent marking.
1329
    _restart_for_overflow = true;
1330
    if (G1TraceMarkStackOverflow) {
1331
      gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1332
    }
1333

1334
    // Verify the heap w.r.t. the previous marking bitmap.
1335
    if (VerifyDuringGC) {
1336
      HandleMark hm;  // handle scope
1337
      Universe::heap()->prepare_for_verify();
1338
      Universe::verify(VerifyOption_G1UsePrevMarking,
1339
                       " VerifyDuringGC:(overflow)");
1340
    }
1341

1342
    // Clear the marking state because we will be restarting
1343
    // marking due to overflowing the global mark stack.
1344
    reset_marking_state();
1345
  } else {
1346
    // Aggregate the per-task counting data that we have accumulated
1347
    // while marking.
1348
    aggregate_count_data();
1349

1350
    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1351
    // We're done with marking.
1352
    // This is the end of  the marking cycle, we're expected all
1353
    // threads to have SATB queues with active set to true.
1354
    satb_mq_set.set_active_all_threads(false, /* new active value */
1355
                                       true /* expected_active */);
1356

1357
    if (VerifyDuringGC) {
1358
      HandleMark hm;  // handle scope
1359
      Universe::heap()->prepare_for_verify();
1360
      Universe::verify(VerifyOption_G1UseNextMarking,
1361
                       " VerifyDuringGC:(after)");
1362
    }
1363
    g1h->check_bitmaps("Remark End");
1364
    assert(!restart_for_overflow(), "sanity");
1365
    // Completely reset the marking state since marking completed
1366
    set_non_marking_state();
1367
  }
1368

1369
  // Expand the marking stack, if we have to and if we can.
1370
  if (_markStack.should_expand()) {
1371
    _markStack.expand();
1372
  }
1373

1374
  // Statistics
1375
  double now = os::elapsedTime();
1376
  _remark_mark_times.add((mark_work_end - start) * 1000.0);
1377
  _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1378
  _remark_times.add((now - start) * 1000.0);
1379

1380
  g1p->record_concurrent_mark_remark_end();
1381

1382
  G1CMIsAliveClosure is_alive(g1h);
1383
  g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1384
}
1385

1386
// Base class of the closures that finalize and verify the
1387
// liveness counting data.
1388
class CMCountDataClosureBase: public HeapRegionClosure {
1389
protected:
1390
  G1CollectedHeap* _g1h;
1391
  ConcurrentMark* _cm;
1392
  CardTableModRefBS* _ct_bs;
1393

1394
  BitMap* _region_bm;
1395
  BitMap* _card_bm;
1396

1397
  // Takes a region that's not empty (i.e., it has at least one
1398
  // live object in it and sets its corresponding bit on the region
1399
  // bitmap to 1. If the region is "starts humongous" it will also set
1400
  // to 1 the bits on the region bitmap that correspond to its
1401
  // associated "continues humongous" regions.
1402
  void set_bit_for_region(HeapRegion* hr) {
1403
    assert(!hr->continuesHumongous(), "should have filtered those out");
1404

1405
    BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1406
    if (!hr->startsHumongous()) {
1407
      // Normal (non-humongous) case: just set the bit.
1408
      _region_bm->par_at_put(index, true);
1409
    } else {
1410
      // Starts humongous case: calculate how many regions are part of
1411
      // this humongous region and then set the bit range.
1412
      BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1413
      _region_bm->par_at_put_range(index, end_index, true);
1414
    }
1415
  }
1416

1417
public:
1418
  CMCountDataClosureBase(G1CollectedHeap* g1h,
1419
                         BitMap* region_bm, BitMap* card_bm):
1420
    _g1h(g1h), _cm(g1h->concurrent_mark()),
1421
    _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1422
    _region_bm(region_bm), _card_bm(card_bm) { }
1423
};
1424

1425
// Closure that calculates the # live objects per region. Used
1426
// for verification purposes during the cleanup pause.
1427
class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1428
  CMBitMapRO* _bm;
1429
  size_t _region_marked_bytes;
1430

1431
public:
1432
  CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1433
                         BitMap* region_bm, BitMap* card_bm) :
1434
    CMCountDataClosureBase(g1h, region_bm, card_bm),
1435
    _bm(bm), _region_marked_bytes(0) { }
1436

1437
  bool doHeapRegion(HeapRegion* hr) {
1438

1439
    if (hr->continuesHumongous()) {
1440
      // We will ignore these here and process them when their
1441
      // associated "starts humongous" region is processed (see
1442
      // set_bit_for_heap_region()). Note that we cannot rely on their
1443
      // associated "starts humongous" region to have their bit set to
1444
      // 1 since, due to the region chunking in the parallel region
1445
      // iteration, a "continues humongous" region might be visited
1446
      // before its associated "starts humongous".
1447
      return false;
1448
    }
1449

1450
    HeapWord* ntams = hr->next_top_at_mark_start();
1451
    HeapWord* start = hr->bottom();
1452

1453
    assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1454
           err_msg("Preconditions not met - "
1455
                   "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1456
                   p2i(start), p2i(ntams), p2i(hr->end())));
1457

1458
    // Find the first marked object at or after "start".
1459
    start = _bm->getNextMarkedWordAddress(start, ntams);
1460

1461
    size_t marked_bytes = 0;
1462

1463
    while (start < ntams) {
1464
      oop obj = oop(start);
1465
      int obj_sz = obj->size();
1466
      HeapWord* obj_end = start + obj_sz;
1467

1468
      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1469
      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1470

1471
      // Note: if we're looking at the last region in heap - obj_end
1472
      // could be actually just beyond the end of the heap; end_idx
1473
      // will then correspond to a (non-existent) card that is also
1474
      // just beyond the heap.
1475
      if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1476
        // end of object is not card aligned - increment to cover
1477
        // all the cards spanned by the object
1478
        end_idx += 1;
1479
      }
1480

1481
      // Set the bits in the card BM for the cards spanned by this object.
1482
      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1483

1484
      // Add the size of this object to the number of marked bytes.
1485
      marked_bytes += (size_t)obj_sz * HeapWordSize;
1486

1487
      // Find the next marked object after this one.
1488
      start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1489
    }
1490

1491
    // Mark the allocated-since-marking portion...
1492
    HeapWord* top = hr->top();
1493
    if (ntams < top) {
1494
      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1495
      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1496

1497
      // Note: if we're looking at the last region in heap - top
1498
      // could be actually just beyond the end of the heap; end_idx
1499
      // will then correspond to a (non-existent) card that is also
1500
      // just beyond the heap.
1501
      if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1502
        // end of object is not card aligned - increment to cover
1503
        // all the cards spanned by the object
1504
        end_idx += 1;
1505
      }
1506
      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1507

1508
      // This definitely means the region has live objects.
1509
      set_bit_for_region(hr);
1510
    }
1511

1512
    // Update the live region bitmap.
1513
    if (marked_bytes > 0) {
1514
      set_bit_for_region(hr);
1515
    }
1516

1517
    // Set the marked bytes for the current region so that
1518
    // it can be queried by a calling verificiation routine
1519
    _region_marked_bytes = marked_bytes;
1520

1521
    return false;
1522
  }
1523

1524
  size_t region_marked_bytes() const { return _region_marked_bytes; }
1525
};
1526

1527
// Heap region closure used for verifying the counting data
1528
// that was accumulated concurrently and aggregated during
1529
// the remark pause. This closure is applied to the heap
1530
// regions during the STW cleanup pause.
1531

1532
class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1533
  G1CollectedHeap* _g1h;
1534
  ConcurrentMark* _cm;
1535
  CalcLiveObjectsClosure _calc_cl;
1536
  BitMap* _region_bm;   // Region BM to be verified
1537
  BitMap* _card_bm;     // Card BM to be verified
1538
  bool _verbose;        // verbose output?
1539

1540
  BitMap* _exp_region_bm; // Expected Region BM values
1541
  BitMap* _exp_card_bm;   // Expected card BM values
1542

1543
  int _failures;
1544

1545
public:
1546
  VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1547
                                BitMap* region_bm,
1548
                                BitMap* card_bm,
1549
                                BitMap* exp_region_bm,
1550
                                BitMap* exp_card_bm,
1551
                                bool verbose) :
1552
    _g1h(g1h), _cm(g1h->concurrent_mark()),
1553
    _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1554
    _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1555
    _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1556
    _failures(0) { }
1557

1558
  int failures() const { return _failures; }
1559

1560
  bool doHeapRegion(HeapRegion* hr) {
1561
    if (hr->continuesHumongous()) {
1562
      // We will ignore these here and process them when their
1563
      // associated "starts humongous" region is processed (see
1564
      // set_bit_for_heap_region()). Note that we cannot rely on their
1565
      // associated "starts humongous" region to have their bit set to
1566
      // 1 since, due to the region chunking in the parallel region
1567
      // iteration, a "continues humongous" region might be visited
1568
      // before its associated "starts humongous".
1569
      return false;
1570
    }
1571

1572
    int failures = 0;
1573

1574
    // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1575
    // this region and set the corresponding bits in the expected region
1576
    // and card bitmaps.
1577
    bool res = _calc_cl.doHeapRegion(hr);
1578
    assert(res == false, "should be continuing");
1579

1580
    MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1581
                    Mutex::_no_safepoint_check_flag);
1582

1583
    // Verify the marked bytes for this region.
1584
    size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1585
    size_t act_marked_bytes = hr->next_marked_bytes();
1586

1587
    // We're not OK if expected marked bytes > actual marked bytes. It means
1588
    // we have missed accounting some objects during the actual marking.
1589
    if (exp_marked_bytes > act_marked_bytes) {
1590
      if (_verbose) {
1591
        gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1592
                               "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1593
                               hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1594
      }
1595
      failures += 1;
1596
    }
1597

1598
    // Verify the bit, for this region, in the actual and expected
1599
    // (which was just calculated) region bit maps.
1600
    // We're not OK if the bit in the calculated expected region
1601
    // bitmap is set and the bit in the actual region bitmap is not.
1602
    BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1603

1604
    bool expected = _exp_region_bm->at(index);
1605
    bool actual = _region_bm->at(index);
1606
    if (expected && !actual) {
1607
      if (_verbose) {
1608
        gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1609
                               "expected: %s, actual: %s",
1610
                               hr->hrm_index(),
1611
                               BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1612
      }
1613
      failures += 1;
1614
    }
1615

1616
    // Verify that the card bit maps for the cards spanned by the current
1617
    // region match. We have an error if we have a set bit in the expected
1618
    // bit map and the corresponding bit in the actual bitmap is not set.
1619

1620
    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1621
    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1622

1623
    for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1624
      expected = _exp_card_bm->at(i);
1625
      actual = _card_bm->at(i);
1626

1627
      if (expected && !actual) {
1628
        if (_verbose) {
1629
          gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1630
                                 "expected: %s, actual: %s",
1631
                                 hr->hrm_index(), i,
1632
                                 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1633
        }
1634
        failures += 1;
1635
      }
1636
    }
1637

1638
    if (failures > 0 && _verbose)  {
1639
      gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1640
                             "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1641
                             HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1642
                             _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1643
    }
1644

1645
    _failures += failures;
1646

1647
    // We could stop iteration over the heap when we
1648
    // find the first violating region by returning true.
1649
    return false;
1650
  }
1651
};
1652

1653
class G1ParVerifyFinalCountTask: public AbstractGangTask {
1654
protected:
1655
  G1CollectedHeap* _g1h;
1656
  ConcurrentMark* _cm;
1657
  BitMap* _actual_region_bm;
1658
  BitMap* _actual_card_bm;
1659

1660
  uint    _n_workers;
1661

1662
  BitMap* _expected_region_bm;
1663
  BitMap* _expected_card_bm;
1664

1665
  int  _failures;
1666
  bool _verbose;
1667

1668
public:
1669
  G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1670
                            BitMap* region_bm, BitMap* card_bm,
1671
                            BitMap* expected_region_bm, BitMap* expected_card_bm)
1672
    : AbstractGangTask("G1 verify final counting"),
1673
      _g1h(g1h), _cm(_g1h->concurrent_mark()),
1674
      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1675
      _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1676
      _failures(0), _verbose(false),
1677
      _n_workers(0) {
1678
    assert(VerifyDuringGC, "don't call this otherwise");
1679

1680
    // Use the value already set as the number of active threads
1681
    // in the call to run_task().
1682
    if (G1CollectedHeap::use_parallel_gc_threads()) {
1683
      assert( _g1h->workers()->active_workers() > 0,
1684
        "Should have been previously set");
1685
      _n_workers = _g1h->workers()->active_workers();
1686
    } else {
1687
      _n_workers = 1;
1688
    }
1689

1690
    assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1691
    assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1692

1693
    _verbose = _cm->verbose_medium();
1694
  }
1695

1696
  void work(uint worker_id) {
1697
    assert(worker_id < _n_workers, "invariant");
1698

1699
    VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1700
                                            _actual_region_bm, _actual_card_bm,
1701
                                            _expected_region_bm,
1702
                                            _expected_card_bm,
1703
                                            _verbose);
1704

1705
    if (G1CollectedHeap::use_parallel_gc_threads()) {
1706
      _g1h->heap_region_par_iterate_chunked(&verify_cl,
1707
                                            worker_id,
1708
                                            _n_workers,
1709
                                            HeapRegion::VerifyCountClaimValue);
1710
    } else {
1711
      _g1h->heap_region_iterate(&verify_cl);
1712
    }
1713

1714
    Atomic::add(verify_cl.failures(), &_failures);
1715
  }
1716

1717
  int failures() const { return _failures; }
1718
};
1719

1720
// Closure that finalizes the liveness counting data.
1721
// Used during the cleanup pause.
1722
// Sets the bits corresponding to the interval [NTAMS, top]
1723
// (which contains the implicitly live objects) in the
1724
// card liveness bitmap. Also sets the bit for each region,
1725
// containing live data, in the region liveness bitmap.
1726

1727
class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1728
 public:
1729
  FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1730
                              BitMap* region_bm,
1731
                              BitMap* card_bm) :
1732
    CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1733

1734
  bool doHeapRegion(HeapRegion* hr) {
1735

1736
    if (hr->continuesHumongous()) {
1737
      // We will ignore these here and process them when their
1738
      // associated "starts humongous" region is processed (see
1739
      // set_bit_for_heap_region()). Note that we cannot rely on their
1740
      // associated "starts humongous" region to have their bit set to
1741
      // 1 since, due to the region chunking in the parallel region
1742
      // iteration, a "continues humongous" region might be visited
1743
      // before its associated "starts humongous".
1744
      return false;
1745
    }
1746

1747
    HeapWord* ntams = hr->next_top_at_mark_start();
1748
    HeapWord* top   = hr->top();
1749

1750
    assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1751

1752
    // Mark the allocated-since-marking portion...
1753
    if (ntams < top) {
1754
      // This definitely means the region has live objects.
1755
      set_bit_for_region(hr);
1756

1757
      // Now set the bits in the card bitmap for [ntams, top)
1758
      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1759
      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1760

1761
      // Note: if we're looking at the last region in heap - top
1762
      // could be actually just beyond the end of the heap; end_idx
1763
      // will then correspond to a (non-existent) card that is also
1764
      // just beyond the heap.
1765
      if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1766
        // end of object is not card aligned - increment to cover
1767
        // all the cards spanned by the object
1768
        end_idx += 1;
1769
      }
1770

1771
      assert(end_idx <= _card_bm->size(),
1772
             err_msg("oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1773
                     end_idx, _card_bm->size()));
1774
      assert(start_idx < _card_bm->size(),
1775
             err_msg("oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1776
                     start_idx, _card_bm->size()));
1777

1778
      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1779
    }
1780

1781
    // Set the bit for the region if it contains live data
1782
    if (hr->next_marked_bytes() > 0) {
1783
      set_bit_for_region(hr);
1784
    }
1785

1786
    return false;
1787
  }
1788
};
1789

1790
class G1ParFinalCountTask: public AbstractGangTask {
1791
protected:
1792
  G1CollectedHeap* _g1h;
1793
  ConcurrentMark* _cm;
1794
  BitMap* _actual_region_bm;
1795
  BitMap* _actual_card_bm;
1796

1797
  uint    _n_workers;
1798

1799
public:
1800
  G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1801
    : AbstractGangTask("G1 final counting"),
1802
      _g1h(g1h), _cm(_g1h->concurrent_mark()),
1803
      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1804
      _n_workers(0) {
1805
    // Use the value already set as the number of active threads
1806
    // in the call to run_task().
1807
    if (G1CollectedHeap::use_parallel_gc_threads()) {
1808
      assert( _g1h->workers()->active_workers() > 0,
1809
        "Should have been previously set");
1810
      _n_workers = _g1h->workers()->active_workers();
1811
    } else {
1812
      _n_workers = 1;
1813
    }
1814
  }
1815

1816
  void work(uint worker_id) {
1817
    assert(worker_id < _n_workers, "invariant");
1818

1819
    FinalCountDataUpdateClosure final_update_cl(_g1h,
1820
                                                _actual_region_bm,
1821
                                                _actual_card_bm);
1822

1823
    if (G1CollectedHeap::use_parallel_gc_threads()) {
1824
      _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1825
                                            worker_id,
1826
                                            _n_workers,
1827
                                            HeapRegion::FinalCountClaimValue);
1828
    } else {
1829
      _g1h->heap_region_iterate(&final_update_cl);
1830
    }
1831
  }
1832
};
1833

1834
class G1ParNoteEndTask;
1835

1836
class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1837
  G1CollectedHeap* _g1;
1838
  size_t _max_live_bytes;
1839
  uint _regions_claimed;
1840
  size_t _freed_bytes;
1841
  FreeRegionList* _local_cleanup_list;
1842
  HeapRegionSetCount _old_regions_removed;
1843
  HeapRegionSetCount _humongous_regions_removed;
1844
  HRRSCleanupTask* _hrrs_cleanup_task;
1845
  double _claimed_region_time;
1846
  double _max_region_time;
1847

1848
public:
1849
  G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1850
                             FreeRegionList* local_cleanup_list,
1851
                             HRRSCleanupTask* hrrs_cleanup_task) :
1852
    _g1(g1),
1853
    _max_live_bytes(0), _regions_claimed(0),
1854
    _freed_bytes(0),
1855
    _claimed_region_time(0.0), _max_region_time(0.0),
1856
    _local_cleanup_list(local_cleanup_list),
1857
    _old_regions_removed(),
1858
    _humongous_regions_removed(),
1859
    _hrrs_cleanup_task(hrrs_cleanup_task) { }
1860

1861
  size_t freed_bytes() { return _freed_bytes; }
1862
  const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1863
  const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1864

1865
  bool doHeapRegion(HeapRegion *hr) {
1866
    if (hr->continuesHumongous()) {
1867
      return false;
1868
    }
1869
    // We use a claim value of zero here because all regions
1870
    // were claimed with value 1 in the FinalCount task.
1871
    _g1->reset_gc_time_stamps(hr);
1872
    double start = os::elapsedTime();
1873
    _regions_claimed++;
1874
    hr->note_end_of_marking();
1875
    _max_live_bytes += hr->max_live_bytes();
1876

1877
    if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1878
      _freed_bytes += hr->used();
1879
      hr->set_containing_set(NULL);
1880
      if (hr->isHumongous()) {
1881
        assert(hr->startsHumongous(), "we should only see starts humongous");
1882
        _humongous_regions_removed.increment(1u, hr->capacity());
1883
        _g1->free_humongous_region(hr, _local_cleanup_list, true);
1884
      } else {
1885
        _old_regions_removed.increment(1u, hr->capacity());
1886
        _g1->free_region(hr, _local_cleanup_list, true);
1887
      }
1888
    } else {
1889
      hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1890
    }
1891

1892
    double region_time = (os::elapsedTime() - start);
1893
    _claimed_region_time += region_time;
1894
    if (region_time > _max_region_time) {
1895
      _max_region_time = region_time;
1896
    }
1897
    return false;
1898
  }
1899

1900
  size_t max_live_bytes() { return _max_live_bytes; }
1901
  uint regions_claimed() { return _regions_claimed; }
1902
  double claimed_region_time_sec() { return _claimed_region_time; }
1903
  double max_region_time_sec() { return _max_region_time; }
1904
};
1905

1906
class G1ParNoteEndTask: public AbstractGangTask {
1907
  friend class G1NoteEndOfConcMarkClosure;
1908

1909
protected:
1910
  G1CollectedHeap* _g1h;
1911
  size_t _max_live_bytes;
1912
  size_t _freed_bytes;
1913
  FreeRegionList* _cleanup_list;
1914

1915
public:
1916
  G1ParNoteEndTask(G1CollectedHeap* g1h,
1917
                   FreeRegionList* cleanup_list) :
1918
    AbstractGangTask("G1 note end"), _g1h(g1h),
1919
    _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1920

1921
  void work(uint worker_id) {
1922
    double start = os::elapsedTime();
1923
    FreeRegionList local_cleanup_list("Local Cleanup List");
1924
    HRRSCleanupTask hrrs_cleanup_task;
1925
    G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1926
                                           &hrrs_cleanup_task);
1927
    if (G1CollectedHeap::use_parallel_gc_threads()) {
1928
      _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1929
                                            _g1h->workers()->active_workers(),
1930
                                            HeapRegion::NoteEndClaimValue);
1931
    } else {
1932
      _g1h->heap_region_iterate(&g1_note_end);
1933
    }
1934
    assert(g1_note_end.complete(), "Shouldn't have yielded!");
1935

1936
    // Now update the lists
1937
    _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1938
    {
1939
      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1940
      _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1941
      _max_live_bytes += g1_note_end.max_live_bytes();
1942
      _freed_bytes += g1_note_end.freed_bytes();
1943

1944
      // If we iterate over the global cleanup list at the end of
1945
      // cleanup to do this printing we will not guarantee to only
1946
      // generate output for the newly-reclaimed regions (the list
1947
      // might not be empty at the beginning of cleanup; we might
1948
      // still be working on its previous contents). So we do the
1949
      // printing here, before we append the new regions to the global
1950
      // cleanup list.
1951

1952
      G1HRPrinter* hr_printer = _g1h->hr_printer();
1953
      if (hr_printer->is_active()) {
1954
        FreeRegionListIterator iter(&local_cleanup_list);
1955
        while (iter.more_available()) {
1956
          HeapRegion* hr = iter.get_next();
1957
          hr_printer->cleanup(hr);
1958
        }
1959
      }
1960

1961
      _cleanup_list->add_ordered(&local_cleanup_list);
1962
      assert(local_cleanup_list.is_empty(), "post-condition");
1963

1964
      HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1965
    }
1966
  }
1967
  size_t max_live_bytes() { return _max_live_bytes; }
1968
  size_t freed_bytes() { return _freed_bytes; }
1969
};
1970

1971
class G1ParScrubRemSetTask: public AbstractGangTask {
1972
protected:
1973
  G1RemSet* _g1rs;
1974
  BitMap* _region_bm;
1975
  BitMap* _card_bm;
1976
public:
1977
  G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1978
                       BitMap* region_bm, BitMap* card_bm) :
1979
    AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1980
    _region_bm(region_bm), _card_bm(card_bm) { }
1981

1982
  void work(uint worker_id) {
1983
    if (G1CollectedHeap::use_parallel_gc_threads()) {
1984
      _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1985
                       HeapRegion::ScrubRemSetClaimValue);
1986
    } else {
1987
      _g1rs->scrub(_region_bm, _card_bm);
1988
    }
1989
  }
1990

1991
};
1992

1993
void ConcurrentMark::cleanup() {
1994
  // world is stopped at this checkpoint
1995
  assert(SafepointSynchronize::is_at_safepoint(),
1996
         "world should be stopped");
1997
  G1CollectedHeap* g1h = G1CollectedHeap::heap();
1998

1999
  // If a full collection has happened, we shouldn't do this.
2000
  if (has_aborted()) {
2001
    g1h->set_marking_complete(); // So bitmap clearing isn't confused
2002
    return;
2003
  }
2004

2005
  g1h->verify_region_sets_optional();
2006

2007
  if (VerifyDuringGC) {
2008
    HandleMark hm;  // handle scope
2009
    Universe::heap()->prepare_for_verify();
2010
    Universe::verify(VerifyOption_G1UsePrevMarking,
2011
                     " VerifyDuringGC:(before)");
2012
  }
2013
  g1h->check_bitmaps("Cleanup Start");
2014

2015
  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2016
  g1p->record_concurrent_mark_cleanup_start();
2017

2018
  double start = os::elapsedTime();
2019

2020
  HeapRegionRemSet::reset_for_cleanup_tasks();
2021

2022
  uint n_workers;
2023

2024
  // Do counting once more with the world stopped for good measure.
2025
  G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2026

2027
  if (G1CollectedHeap::use_parallel_gc_threads()) {
2028
   assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2029
           "sanity check");
2030

2031
    g1h->set_par_threads();
2032
    n_workers = g1h->n_par_threads();
2033
    assert(g1h->n_par_threads() == n_workers,
2034
           "Should not have been reset");
2035
    g1h->workers()->run_task(&g1_par_count_task);
2036
    // Done with the parallel phase so reset to 0.
2037
    g1h->set_par_threads(0);
2038

2039
    assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2040
           "sanity check");
2041
  } else {
2042
    n_workers = 1;
2043
    g1_par_count_task.work(0);
2044
  }
2045

2046
  if (VerifyDuringGC) {
2047
    // Verify that the counting data accumulated during marking matches
2048
    // that calculated by walking the marking bitmap.
2049

2050
    // Bitmaps to hold expected values
2051
    BitMap expected_region_bm(_region_bm.size(), true);
2052
    BitMap expected_card_bm(_card_bm.size(), true);
2053

2054
    G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2055
                                                 &_region_bm,
2056
                                                 &_card_bm,
2057
                                                 &expected_region_bm,
2058
                                                 &expected_card_bm);
2059

2060
    if (G1CollectedHeap::use_parallel_gc_threads()) {
2061
      g1h->set_par_threads((int)n_workers);
2062
      g1h->workers()->run_task(&g1_par_verify_task);
2063
      // Done with the parallel phase so reset to 0.
2064
      g1h->set_par_threads(0);
2065

2066
      assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2067
             "sanity check");
2068
    } else {
2069
      g1_par_verify_task.work(0);
2070
    }
2071

2072
    guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2073
  }
2074

2075
  size_t start_used_bytes = g1h->used();
2076
  g1h->set_marking_complete();
2077

2078
  double count_end = os::elapsedTime();
2079
  double this_final_counting_time = (count_end - start);
2080
  _total_counting_time += this_final_counting_time;
2081

2082
  if (G1PrintRegionLivenessInfo) {
2083
    G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2084
    _g1h->heap_region_iterate(&cl);
2085
  }
2086

2087
  // Install newly created mark bitMap as "prev".
2088
  swapMarkBitMaps();
2089

2090
  g1h->reset_gc_time_stamp();
2091

2092
  // Note end of marking in all heap regions.
2093
  G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2094
  if (G1CollectedHeap::use_parallel_gc_threads()) {
2095
    g1h->set_par_threads((int)n_workers);
2096
    g1h->workers()->run_task(&g1_par_note_end_task);
2097
    g1h->set_par_threads(0);
2098

2099
    assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2100
           "sanity check");
2101
  } else {
2102
    g1_par_note_end_task.work(0);
2103
  }
2104
  g1h->check_gc_time_stamps();
2105

2106
  if (!cleanup_list_is_empty()) {
2107
    // The cleanup list is not empty, so we'll have to process it
2108
    // concurrently. Notify anyone else that might be wanting free
2109
    // regions that there will be more free regions coming soon.
2110
    g1h->set_free_regions_coming();
2111
  }
2112

2113
  // call below, since it affects the metric by which we sort the heap
2114
  // regions.
2115
  if (G1ScrubRemSets) {
2116
    double rs_scrub_start = os::elapsedTime();
2117
    G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2118
    if (G1CollectedHeap::use_parallel_gc_threads()) {
2119
      g1h->set_par_threads((int)n_workers);
2120
      g1h->workers()->run_task(&g1_par_scrub_rs_task);
2121
      g1h->set_par_threads(0);
2122

2123
      assert(g1h->check_heap_region_claim_values(
2124
                                            HeapRegion::ScrubRemSetClaimValue),
2125
             "sanity check");
2126
    } else {
2127
      g1_par_scrub_rs_task.work(0);
2128
    }
2129

2130
    double rs_scrub_end = os::elapsedTime();
2131
    double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2132
    _total_rs_scrub_time += this_rs_scrub_time;
2133
  }
2134

2135
  // this will also free any regions totally full of garbage objects,
2136
  // and sort the regions.
2137
  g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2138

2139
  // Statistics.
2140
  double end = os::elapsedTime();
2141
  _cleanup_times.add((end - start) * 1000.0);
2142

2143
  if (G1Log::fine()) {
2144
    g1h->print_size_transition(gclog_or_tty,
2145
                               start_used_bytes,
2146
                               g1h->used(),
2147
                               g1h->capacity());
2148
  }
2149

2150
  // Clean up will have freed any regions completely full of garbage.
2151
  // Update the soft reference policy with the new heap occupancy.
2152
  Universe::update_heap_info_at_gc();
2153

2154
  if (VerifyDuringGC) {
2155
    HandleMark hm;  // handle scope
2156
    Universe::heap()->prepare_for_verify();
2157
    Universe::verify(VerifyOption_G1UsePrevMarking,
2158
                     " VerifyDuringGC:(after)");
2159
  }
2160
  g1h->check_bitmaps("Cleanup End");
2161

2162
  g1h->verify_region_sets_optional();
2163

2164
  // We need to make this be a "collection" so any collection pause that
2165
  // races with it goes around and waits for completeCleanup to finish.
2166
  g1h->increment_total_collections();
2167

2168
  // Clean out dead classes and update Metaspace sizes.
2169
  if (ClassUnloadingWithConcurrentMark) {
2170
    ClassLoaderDataGraph::purge();
2171
  }
2172
  MetaspaceGC::compute_new_size();
2173

2174
  // We reclaimed old regions so we should calculate the sizes to make
2175
  // sure we update the old gen/space data.
2176
  g1h->g1mm()->update_sizes();
2177
  g1h->allocation_context_stats().update_after_mark();
2178

2179
  g1h->trace_heap_after_concurrent_cycle();
2180
}
2181

2182
void ConcurrentMark::completeCleanup() {
2183
  if (has_aborted()) return;
2184

2185
  G1CollectedHeap* g1h = G1CollectedHeap::heap();
2186

2187
  _cleanup_list.verify_optional();
2188
  FreeRegionList tmp_free_list("Tmp Free List");
2189

2190
  if (G1ConcRegionFreeingVerbose) {
2191
    gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2192
                           "cleanup list has %u entries",
2193
                           _cleanup_list.length());
2194
  }
2195

2196
  // No one else should be accessing the _cleanup_list at this point,
2197
  // so it is not necessary to take any locks
2198
  while (!_cleanup_list.is_empty()) {
2199
    HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2200
    assert(hr != NULL, "Got NULL from a non-empty list");
2201
    hr->par_clear();
2202
    tmp_free_list.add_ordered(hr);
2203

2204
    // Instead of adding one region at a time to the secondary_free_list,
2205
    // we accumulate them in the local list and move them a few at a
2206
    // time. This also cuts down on the number of notify_all() calls
2207
    // we do during this process. We'll also append the local list when
2208
    // _cleanup_list is empty (which means we just removed the last
2209
    // region from the _cleanup_list).
2210
    if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2211
        _cleanup_list.is_empty()) {
2212
      if (G1ConcRegionFreeingVerbose) {
2213
        gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2214
                               "appending %u entries to the secondary_free_list, "
2215
                               "cleanup list still has %u entries",
2216
                               tmp_free_list.length(),
2217
                               _cleanup_list.length());
2218
      }
2219

2220
      {
2221
        MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2222
        g1h->secondary_free_list_add(&tmp_free_list);
2223
        SecondaryFreeList_lock->notify_all();
2224
      }
2225

2226
      if (G1StressConcRegionFreeing) {
2227
        for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2228
          os::sleep(Thread::current(), (jlong) 1, false);
2229
        }
2230
      }
2231
    }
2232
  }
2233
  assert(tmp_free_list.is_empty(), "post-condition");
2234
}
2235

2236
// Supporting Object and Oop closures for reference discovery
2237
// and processing in during marking
2238

2239
bool G1CMIsAliveClosure::do_object_b(oop obj) {
2240
  HeapWord* addr = (HeapWord*)obj;
2241
  return addr != NULL &&
2242
         (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2243
}
2244

2245
// 'Keep Alive' oop closure used by both serial parallel reference processing.
2246
// Uses the CMTask associated with a worker thread (for serial reference
2247
// processing the CMTask for worker 0 is used) to preserve (mark) and
2248
// trace referent objects.
2249
//
2250
// Using the CMTask and embedded local queues avoids having the worker
2251
// threads operating on the global mark stack. This reduces the risk
2252
// of overflowing the stack - which we would rather avoid at this late
2253
// state. Also using the tasks' local queues removes the potential
2254
// of the workers interfering with each other that could occur if
2255
// operating on the global stack.
2256

2257
class G1CMKeepAliveAndDrainClosure: public OopClosure {
2258
  ConcurrentMark* _cm;
2259
  CMTask*         _task;
2260
  int             _ref_counter_limit;
2261
  int             _ref_counter;
2262
  bool            _is_serial;
2263
 public:
2264
  G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2265
    _cm(cm), _task(task), _is_serial(is_serial),
2266
    _ref_counter_limit(G1RefProcDrainInterval) {
2267
    assert(_ref_counter_limit > 0, "sanity");
2268
    assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2269
    _ref_counter = _ref_counter_limit;
2270
  }
2271

2272
  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2273
  virtual void do_oop(      oop* p) { do_oop_work(p); }
2274

2275
  template <class T> void do_oop_work(T* p) {
2276
    if (!_cm->has_overflown()) {
2277
      oop obj = oopDesc::load_decode_heap_oop(p);
2278
      if (_cm->verbose_high()) {
2279
        gclog_or_tty->print_cr("\t[%u] we're looking at location "
2280
                               "*" PTR_FORMAT " = " PTR_FORMAT,
2281
                               _task->worker_id(), p2i(p), p2i((void*) obj));
2282
      }
2283

2284
      _task->deal_with_reference(obj);
2285
      _ref_counter--;
2286

2287
      if (_ref_counter == 0) {
2288
        // We have dealt with _ref_counter_limit references, pushing them
2289
        // and objects reachable from them on to the local stack (and
2290
        // possibly the global stack). Call CMTask::do_marking_step() to
2291
        // process these entries.
2292
        //
2293
        // We call CMTask::do_marking_step() in a loop, which we'll exit if
2294
        // there's nothing more to do (i.e. we're done with the entries that
2295
        // were pushed as a result of the CMTask::deal_with_reference() calls
2296
        // above) or we overflow.
2297
        //
2298
        // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2299
        // flag while there may still be some work to do. (See the comment at
2300
        // the beginning of CMTask::do_marking_step() for those conditions -
2301
        // one of which is reaching the specified time target.) It is only
2302
        // when CMTask::do_marking_step() returns without setting the
2303
        // has_aborted() flag that the marking step has completed.
2304
        do {
2305
          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2306
          _task->do_marking_step(mark_step_duration_ms,
2307
                                 false      /* do_termination */,
2308
                                 _is_serial);
2309
        } while (_task->has_aborted() && !_cm->has_overflown());
2310
        _ref_counter = _ref_counter_limit;
2311
      }
2312
    } else {
2313
      if (_cm->verbose_high()) {
2314
         gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2315
      }
2316
    }
2317
  }
2318
};
2319

2320
// 'Drain' oop closure used by both serial and parallel reference processing.
2321
// Uses the CMTask associated with a given worker thread (for serial
2322
// reference processing the CMtask for worker 0 is used). Calls the
2323
// do_marking_step routine, with an unbelievably large timeout value,
2324
// to drain the marking data structures of the remaining entries
2325
// added by the 'keep alive' oop closure above.
2326

2327
class G1CMDrainMarkingStackClosure: public VoidClosure {
2328
  ConcurrentMark* _cm;
2329
  CMTask*         _task;
2330
  bool            _is_serial;
2331
 public:
2332
  G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2333
    _cm(cm), _task(task), _is_serial(is_serial) {
2334
    assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2335
  }
2336

2337
  void do_void() {
2338
    do {
2339
      if (_cm->verbose_high()) {
2340
        gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2341
                               _task->worker_id(), BOOL_TO_STR(_is_serial));
2342
      }
2343

2344
      // We call CMTask::do_marking_step() to completely drain the local
2345
      // and global marking stacks of entries pushed by the 'keep alive'
2346
      // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2347
      //
2348
      // CMTask::do_marking_step() is called in a loop, which we'll exit
2349
      // if there's nothing more to do (i.e. we'completely drained the
2350
      // entries that were pushed as a a result of applying the 'keep alive'
2351
      // closure to the entries on the discovered ref lists) or we overflow
2352
      // the global marking stack.
2353
      //
2354
      // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2355
      // flag while there may still be some work to do. (See the comment at
2356
      // the beginning of CMTask::do_marking_step() for those conditions -
2357
      // one of which is reaching the specified time target.) It is only
2358
      // when CMTask::do_marking_step() returns without setting the
2359
      // has_aborted() flag that the marking step has completed.
2360

2361
      _task->do_marking_step(1000000000.0 /* something very large */,
2362
                             true         /* do_termination */,
2363
                             _is_serial);
2364
    } while (_task->has_aborted() && !_cm->has_overflown());
2365
  }
2366
};
2367

2368
// Implementation of AbstractRefProcTaskExecutor for parallel
2369
// reference processing at the end of G1 concurrent marking
2370

2371
class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2372
private:
2373
  G1CollectedHeap* _g1h;
2374
  ConcurrentMark*  _cm;
2375
  WorkGang*        _workers;
2376
  int              _active_workers;
2377

2378
public:
2379
  G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2380
                        ConcurrentMark* cm,
2381
                        WorkGang* workers,
2382
                        int n_workers) :
2383
    _g1h(g1h), _cm(cm),
2384
    _workers(workers), _active_workers(n_workers) { }
2385

2386
  // Executes the given task using concurrent marking worker threads.
2387
  virtual void execute(ProcessTask& task);
2388
  virtual void execute(EnqueueTask& task);
2389
};
2390

2391
class G1CMRefProcTaskProxy: public AbstractGangTask {
2392
  typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2393
  ProcessTask&     _proc_task;
2394
  G1CollectedHeap* _g1h;
2395
  ConcurrentMark*  _cm;
2396

2397
public:
2398
  G1CMRefProcTaskProxy(ProcessTask& proc_task,
2399
                     G1CollectedHeap* g1h,
2400
                     ConcurrentMark* cm) :
2401
    AbstractGangTask("Process reference objects in parallel"),
2402
    _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2403
    ReferenceProcessor* rp = _g1h->ref_processor_cm();
2404
    assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2405
  }
2406

2407
  virtual void work(uint worker_id) {
2408
    ResourceMark rm;
2409
    HandleMark hm;
2410
    CMTask* task = _cm->task(worker_id);
2411
    G1CMIsAliveClosure g1_is_alive(_g1h);
2412
    G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2413
    G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2414

2415
    _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2416
  }
2417
};
2418

2419
void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2420
  assert(_workers != NULL, "Need parallel worker threads.");
2421
  assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2422

2423
  G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2424

2425
  // We need to reset the concurrency level before each
2426
  // proxy task execution, so that the termination protocol
2427
  // and overflow handling in CMTask::do_marking_step() knows
2428
  // how many workers to wait for.
2429
  _cm->set_concurrency(_active_workers);
2430
  _g1h->set_par_threads(_active_workers);
2431
  _workers->run_task(&proc_task_proxy);
2432
  _g1h->set_par_threads(0);
2433
}
2434

2435
class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2436
  typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2437
  EnqueueTask& _enq_task;
2438

2439
public:
2440
  G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2441
    AbstractGangTask("Enqueue reference objects in parallel"),
2442
    _enq_task(enq_task) { }
2443

2444
  virtual void work(uint worker_id) {
2445
    _enq_task.work(worker_id);
2446
  }
2447
};
2448

2449
void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2450
  assert(_workers != NULL, "Need parallel worker threads.");
2451
  assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2452

2453
  G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2454

2455
  // Not strictly necessary but...
2456
  //
2457
  // We need to reset the concurrency level before each
2458
  // proxy task execution, so that the termination protocol
2459
  // and overflow handling in CMTask::do_marking_step() knows
2460
  // how many workers to wait for.
2461
  _cm->set_concurrency(_active_workers);
2462
  _g1h->set_par_threads(_active_workers);
2463
  _workers->run_task(&enq_task_proxy);
2464
  _g1h->set_par_threads(0);
2465
}
2466

2467
void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2468
  G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2469
}
2470

2471
// Helper class to get rid of some boilerplate code.
2472
class G1RemarkGCTraceTime : public GCTraceTime {
2473
  static bool doit_and_prepend(bool doit) {
2474
    if (doit) {
2475
      gclog_or_tty->put(' ');
2476
    }
2477
    return doit;
2478
  }
2479

2480
 public:
2481
  G1RemarkGCTraceTime(const char* title, bool doit)
2482
    : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
2483
        G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
2484
  }
2485
};
2486

2487
void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2488
  if (has_overflown()) {
2489
    // Skip processing the discovered references if we have
2490
    // overflown the global marking stack. Reference objects
2491
    // only get discovered once so it is OK to not
2492
    // de-populate the discovered reference lists. We could have,
2493
    // but the only benefit would be that, when marking restarts,
2494
    // less reference objects are discovered.
2495
    return;
2496
  }
2497

2498
  ResourceMark rm;
2499
  HandleMark   hm;
2500

2501
  G1CollectedHeap* g1h = G1CollectedHeap::heap();
2502

2503
  // Is alive closure.
2504
  G1CMIsAliveClosure g1_is_alive(g1h);
2505

2506
  // Inner scope to exclude the cleaning of the string and symbol
2507
  // tables from the displayed time.
2508
  {
2509
    if (G1Log::finer()) {
2510
      gclog_or_tty->put(' ');
2511
    }
2512
    GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id());
2513

2514
    ReferenceProcessor* rp = g1h->ref_processor_cm();
2515

2516
    // See the comment in G1CollectedHeap::ref_processing_init()
2517
    // about how reference processing currently works in G1.
2518

2519
    // Set the soft reference policy
2520
    rp->setup_policy(clear_all_soft_refs);
2521
    assert(_markStack.isEmpty(), "mark stack should be empty");
2522

2523
    // Instances of the 'Keep Alive' and 'Complete GC' closures used
2524
    // in serial reference processing. Note these closures are also
2525
    // used for serially processing (by the the current thread) the
2526
    // JNI references during parallel reference processing.
2527
    //
2528
    // These closures do not need to synchronize with the worker
2529
    // threads involved in parallel reference processing as these
2530
    // instances are executed serially by the current thread (e.g.
2531
    // reference processing is not multi-threaded and is thus
2532
    // performed by the current thread instead of a gang worker).
2533
    //
2534
    // The gang tasks involved in parallel reference procssing create
2535
    // their own instances of these closures, which do their own
2536
    // synchronization among themselves.
2537
    G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2538
    G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2539

2540
    // We need at least one active thread. If reference processing
2541
    // is not multi-threaded we use the current (VMThread) thread,
2542
    // otherwise we use the work gang from the G1CollectedHeap and
2543
    // we utilize all the worker threads we can.
2544
    bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2545
    uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2546
    active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2547

2548
    // Parallel processing task executor.
2549
    G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2550
                                              g1h->workers(), active_workers);
2551
    AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2552

2553
    // Set the concurrency level. The phase was already set prior to
2554
    // executing the remark task.
2555
    set_concurrency(active_workers);
2556

2557
    // Set the degree of MT processing here.  If the discovery was done MT,
2558
    // the number of threads involved during discovery could differ from
2559
    // the number of active workers.  This is OK as long as the discovered
2560
    // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2561
    rp->set_active_mt_degree(active_workers);
2562

2563
    // Process the weak references.
2564
    const ReferenceProcessorStats& stats =
2565
        rp->process_discovered_references(&g1_is_alive,
2566
                                          &g1_keep_alive,
2567
                                          &g1_drain_mark_stack,
2568
                                          executor,
2569
                                          g1h->gc_timer_cm(),
2570
                                          concurrent_gc_id());
2571
    g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2572

2573
    // The do_oop work routines of the keep_alive and drain_marking_stack
2574
    // oop closures will set the has_overflown flag if we overflow the
2575
    // global marking stack.
2576

2577
    assert(_markStack.overflow() || _markStack.isEmpty(),
2578
            "mark stack should be empty (unless it overflowed)");
2579

2580
    if (_markStack.overflow()) {
2581
      // This should have been done already when we tried to push an
2582
      // entry on to the global mark stack. But let's do it again.
2583
      set_has_overflown();
2584
    }
2585

2586
    assert(rp->num_q() == active_workers, "why not");
2587

2588
    rp->enqueue_discovered_references(executor);
2589

2590
    rp->verify_no_references_recorded();
2591
    assert(!rp->discovery_enabled(), "Post condition");
2592
  }
2593

2594
  if (has_overflown()) {
2595
    // We can not trust g1_is_alive if the marking stack overflowed
2596
    return;
2597
  }
2598

2599
  assert(_markStack.isEmpty(), "Marking should have completed");
2600

2601
  // Unload Klasses, String, Symbols, Code Cache, etc.
2602
  {
2603
    G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
2604

2605
    if (ClassUnloadingWithConcurrentMark) {
2606
      // Cleaning of klasses depends on correct information from MetadataMarkOnStack. The CodeCache::mark_on_stack
2607
      // part is too slow to be done serially, so it is handled during the weakRefsWorkParallelPart phase.
2608
      // Defer the cleaning until we have complete on_stack data.
2609
      MetadataOnStackMark md_on_stack(false /* Don't visit the code cache at this point */);
2610

2611
      bool purged_classes;
2612

2613
      {
2614
        G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
2615
        purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2616
      }
2617

2618
      {
2619
        G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
2620
        weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2621
      }
2622

2623
      {
2624
        G1RemarkGCTraceTime trace("Deallocate Metadata", G1Log::finest());
2625
        ClassLoaderDataGraph::free_deallocate_lists();
2626
      }
2627
    }
2628

2629
    if (G1StringDedup::is_enabled()) {
2630
      G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
2631
      G1StringDedup::unlink(&g1_is_alive);
2632
    }
2633
  }
2634
}
2635

2636
void ConcurrentMark::swapMarkBitMaps() {
2637
  CMBitMapRO* temp = _prevMarkBitMap;
2638
  _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2639
  _nextMarkBitMap  = (CMBitMap*)  temp;
2640
}
2641

2642
// Closure for marking entries in SATB buffers.
2643
class CMSATBBufferClosure : public SATBBufferClosure {
2644
private:
2645
  CMTask* _task;
2646
  G1CollectedHeap* _g1h;
2647

2648
  // This is very similar to CMTask::deal_with_reference, but with
2649
  // more relaxed requirements for the argument, so this must be more
2650
  // circumspect about treating the argument as an object.
2651
  void do_entry(void* entry) const {
2652
    _task->increment_refs_reached();
2653
    HeapRegion* hr = _g1h->heap_region_containing_raw(entry);
2654
    if (entry < hr->next_top_at_mark_start()) {
2655
      // Until we get here, we don't know whether entry refers to a valid
2656
      // object; it could instead have been a stale reference.
2657
      oop obj = static_cast<oop>(entry);
2658
      assert(obj->is_oop(true /* ignore mark word */),
2659
             err_msg("Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)));
2660
      _task->make_reference_grey(obj, hr);
2661
    }
2662
  }
2663

2664
public:
2665
  CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2666
    : _task(task), _g1h(g1h) { }
2667

2668
  virtual void do_buffer(void** buffer, size_t size) {
2669
    for (size_t i = 0; i < size; ++i) {
2670
      do_entry(buffer[i]);
2671
    }
2672
  }
2673
};
2674

2675
class G1RemarkThreadsClosure : public ThreadClosure {
2676
  CMSATBBufferClosure _cm_satb_cl;
2677
  G1CMOopClosure _cm_cl;
2678
  MarkingCodeBlobClosure _code_cl;
2679
  int _thread_parity;
2680
  bool _is_par;
2681

2682
 public:
2683
  G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
2684
    _cm_satb_cl(task, g1h),
2685
    _cm_cl(g1h, g1h->concurrent_mark(), task),
2686
    _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2687
    _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
2688

2689
  void do_thread(Thread* thread) {
2690
    if (thread->is_Java_thread()) {
2691
      if (thread->claim_oops_do(_is_par, _thread_parity)) {
2692
        JavaThread* jt = (JavaThread*)thread;
2693

2694
        // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2695
        // however the liveness of oops reachable from nmethods have very complex lifecycles:
2696
        // * Alive if on the stack of an executing method
2697
        // * Weakly reachable otherwise
2698
        // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2699
        // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2700
        jt->nmethods_do(&_code_cl);
2701

2702
        jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2703
      }
2704
    } else if (thread->is_VM_thread()) {
2705
      if (thread->claim_oops_do(_is_par, _thread_parity)) {
2706
        JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2707
      }
2708
    }
2709
  }
2710
};
2711

2712
class CMRemarkTask: public AbstractGangTask {
2713
private:
2714
  ConcurrentMark* _cm;
2715
  bool            _is_serial;
2716
public:
2717
  void work(uint worker_id) {
2718
    // Since all available tasks are actually started, we should
2719
    // only proceed if we're supposed to be actived.
2720
    if (worker_id < _cm->active_tasks()) {
2721
      CMTask* task = _cm->task(worker_id);
2722
      task->record_start_time();
2723
      {
2724
        ResourceMark rm;
2725
        HandleMark hm;
2726

2727
        G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
2728
        Threads::threads_do(&threads_f);
2729
      }
2730

2731
      do {
2732
        task->do_marking_step(1000000000.0 /* something very large */,
2733
                              true         /* do_termination       */,
2734
                              _is_serial);
2735
      } while (task->has_aborted() && !_cm->has_overflown());
2736
      // If we overflow, then we do not want to restart. We instead
2737
      // want to abort remark and do concurrent marking again.
2738
      task->record_end_time();
2739
    }
2740
  }
2741

2742
  CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2743
    AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2744
    _cm->terminator()->reset_for_reuse(active_workers);
2745
  }
2746
};
2747

2748
void ConcurrentMark::checkpointRootsFinalWork() {
2749
  ResourceMark rm;
2750
  HandleMark   hm;
2751
  G1CollectedHeap* g1h = G1CollectedHeap::heap();
2752

2753
  G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
2754

2755
  g1h->ensure_parsability(false);
2756

2757
  if (G1CollectedHeap::use_parallel_gc_threads()) {
2758
    G1CollectedHeap::StrongRootsScope srs(g1h);
2759
    // this is remark, so we'll use up all active threads
2760
    uint active_workers = g1h->workers()->active_workers();
2761
    if (active_workers == 0) {
2762
      assert(active_workers > 0, "Should have been set earlier");
2763
      active_workers = (uint) ParallelGCThreads;
2764
      g1h->workers()->set_active_workers(active_workers);
2765
    }
2766
    set_concurrency_and_phase(active_workers, false /* concurrent */);
2767
    // Leave _parallel_marking_threads at it's
2768
    // value originally calculated in the ConcurrentMark
2769
    // constructor and pass values of the active workers
2770
    // through the gang in the task.
2771

2772
    CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2773
    // We will start all available threads, even if we decide that the
2774
    // active_workers will be fewer. The extra ones will just bail out
2775
    // immediately.
2776
    g1h->set_par_threads(active_workers);
2777
    g1h->workers()->run_task(&remarkTask);
2778
    g1h->set_par_threads(0);
2779
  } else {
2780
    G1CollectedHeap::StrongRootsScope srs(g1h);
2781
    uint active_workers = 1;
2782
    set_concurrency_and_phase(active_workers, false /* concurrent */);
2783

2784
    // Note - if there's no work gang then the VMThread will be
2785
    // the thread to execute the remark - serially. We have
2786
    // to pass true for the is_serial parameter so that
2787
    // CMTask::do_marking_step() doesn't enter the sync
2788
    // barriers in the event of an overflow. Doing so will
2789
    // cause an assert that the current thread is not a
2790
    // concurrent GC thread.
2791
    CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2792
    remarkTask.work(0);
2793
  }
2794
  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2795
  guarantee(has_overflown() ||
2796
            satb_mq_set.completed_buffers_num() == 0,
2797
            err_msg("Invariant: has_overflown = %s, num buffers = %d",
2798
                    BOOL_TO_STR(has_overflown()),
2799
                    satb_mq_set.completed_buffers_num()));
2800

2801
  print_stats();
2802
}
2803

2804
#ifndef PRODUCT
2805

2806
class PrintReachableOopClosure: public OopClosure {
2807
private:
2808
  G1CollectedHeap* _g1h;
2809
  outputStream*    _out;
2810
  VerifyOption     _vo;
2811
  bool             _all;
2812

2813
public:
2814
  PrintReachableOopClosure(outputStream* out,
2815
                           VerifyOption  vo,
2816
                           bool          all) :
2817
    _g1h(G1CollectedHeap::heap()),
2818
    _out(out), _vo(vo), _all(all) { }
2819

2820
  void do_oop(narrowOop* p) { do_oop_work(p); }
2821
  void do_oop(      oop* p) { do_oop_work(p); }
2822

2823
  template <class T> void do_oop_work(T* p) {
2824
    oop         obj = oopDesc::load_decode_heap_oop(p);
2825
    const char* str = NULL;
2826
    const char* str2 = "";
2827

2828
    if (obj == NULL) {
2829
      str = "";
2830
    } else if (!_g1h->is_in_g1_reserved(obj)) {
2831
      str = " O";
2832
    } else {
2833
      HeapRegion* hr  = _g1h->heap_region_containing(obj);
2834
      bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2835
      bool marked = _g1h->is_marked(obj, _vo);
2836

2837
      if (over_tams) {
2838
        str = " >";
2839
        if (marked) {
2840
          str2 = " AND MARKED";
2841
        }
2842
      } else if (marked) {
2843
        str = " M";
2844
      } else {
2845
        str = " NOT";
2846
      }
2847
    }
2848

2849
    _out->print_cr("  " PTR_FORMAT ": " PTR_FORMAT "%s%s",
2850
                   p2i(p), p2i((void*) obj), str, str2);
2851
  }
2852
};
2853

2854
class PrintReachableObjectClosure : public ObjectClosure {
2855
private:
2856
  G1CollectedHeap* _g1h;
2857
  outputStream*    _out;
2858
  VerifyOption     _vo;
2859
  bool             _all;
2860
  HeapRegion*      _hr;
2861

2862
public:
2863
  PrintReachableObjectClosure(outputStream* out,
2864
                              VerifyOption  vo,
2865
                              bool          all,
2866
                              HeapRegion*   hr) :
2867
    _g1h(G1CollectedHeap::heap()),
2868
    _out(out), _vo(vo), _all(all), _hr(hr) { }
2869

2870
  void do_object(oop o) {
2871
    bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2872
    bool marked = _g1h->is_marked(o, _vo);
2873
    bool print_it = _all || over_tams || marked;
2874

2875
    if (print_it) {
2876
      _out->print_cr(" " PTR_FORMAT "%s",
2877
                     p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2878
      PrintReachableOopClosure oopCl(_out, _vo, _all);
2879
      o->oop_iterate_no_header(&oopCl);
2880
    }
2881
  }
2882
};
2883

2884
class PrintReachableRegionClosure : public HeapRegionClosure {
2885
private:
2886
  G1CollectedHeap* _g1h;
2887
  outputStream*    _out;
2888
  VerifyOption     _vo;
2889
  bool             _all;
2890

2891
public:
2892
  bool doHeapRegion(HeapRegion* hr) {
2893
    HeapWord* b = hr->bottom();
2894
    HeapWord* e = hr->end();
2895
    HeapWord* t = hr->top();
2896
    HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2897
    _out->print_cr("** [" PTR_FORMAT ", " PTR_FORMAT "] top: " PTR_FORMAT " "
2898
                   "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2899
    _out->cr();
2900

2901
    HeapWord* from = b;
2902
    HeapWord* to   = t;
2903

2904
    if (to > from) {
2905
      _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2906
      _out->cr();
2907
      PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2908
      hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2909
      _out->cr();
2910
    }
2911

2912
    return false;
2913
  }
2914

2915
  PrintReachableRegionClosure(outputStream* out,
2916
                              VerifyOption  vo,
2917
                              bool          all) :
2918
    _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2919
};
2920

2921
void ConcurrentMark::print_reachable(const char* str,
2922
                                     VerifyOption vo,
2923
                                     bool all) {
2924
  gclog_or_tty->cr();
2925
  gclog_or_tty->print_cr("== Doing heap dump... ");
2926

2927
  if (G1PrintReachableBaseFile == NULL) {
2928
    gclog_or_tty->print_cr("  #### error: no base file defined");
2929
    return;
2930
  }
2931

2932
  if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2933
      (JVM_MAXPATHLEN - 1)) {
2934
    gclog_or_tty->print_cr("  #### error: file name too long");
2935
    return;
2936
  }
2937

2938
  char file_name[JVM_MAXPATHLEN];
2939
  sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2940
  gclog_or_tty->print_cr("  dumping to file %s", file_name);
2941

2942
  fileStream fout(file_name);
2943
  if (!fout.is_open()) {
2944
    gclog_or_tty->print_cr("  #### error: could not open file");
2945
    return;
2946
  }
2947

2948
  outputStream* out = &fout;
2949
  out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2950
  out->cr();
2951

2952
  out->print_cr("--- ITERATING OVER REGIONS");
2953
  out->cr();
2954
  PrintReachableRegionClosure rcl(out, vo, all);
2955
  _g1h->heap_region_iterate(&rcl);
2956
  out->cr();
2957

2958
  gclog_or_tty->print_cr("  done");
2959
  gclog_or_tty->flush();
2960
}
2961

2962
#endif // PRODUCT
2963

2964
void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2965
  // Note we are overriding the read-only view of the prev map here, via
2966
  // the cast.
2967
  ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2968
}
2969

2970
void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2971
  _nextMarkBitMap->clearRange(mr);
2972
}
2973

2974
HeapRegion*
2975
ConcurrentMark::claim_region(uint worker_id) {
2976
  // "checkpoint" the finger
2977
  HeapWord* finger = _finger;
2978

2979
  // _heap_end will not change underneath our feet; it only changes at
2980
  // yield points.
2981
  while (finger < _heap_end) {
2982
    assert(_g1h->is_in_g1_reserved(finger), "invariant");
2983

2984
    // Note on how this code handles humongous regions. In the
2985
    // normal case the finger will reach the start of a "starts
2986
    // humongous" (SH) region. Its end will either be the end of the
2987
    // last "continues humongous" (CH) region in the sequence, or the
2988
    // standard end of the SH region (if the SH is the only region in
2989
    // the sequence). That way claim_region() will skip over the CH
2990
    // regions. However, there is a subtle race between a CM thread
2991
    // executing this method and a mutator thread doing a humongous
2992
    // object allocation. The two are not mutually exclusive as the CM
2993
    // thread does not need to hold the Heap_lock when it gets
2994
    // here. So there is a chance that claim_region() will come across
2995
    // a free region that's in the progress of becoming a SH or a CH
2996
    // region. In the former case, it will either
2997
    //   a) Miss the update to the region's end, in which case it will
2998
    //      visit every subsequent CH region, will find their bitmaps
2999
    //      empty, and do nothing, or
3000
    //   b) Will observe the update of the region's end (in which case
3001
    //      it will skip the subsequent CH regions).
3002
    // If it comes across a region that suddenly becomes CH, the
3003
    // scenario will be similar to b). So, the race between
3004
    // claim_region() and a humongous object allocation might force us
3005
    // to do a bit of unnecessary work (due to some unnecessary bitmap
3006
    // iterations) but it should not introduce and correctness issues.
3007
    HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
3008

3009
    // Above heap_region_containing_raw may return NULL as we always scan claim
3010
    // until the end of the heap. In this case, just jump to the next region.
3011
    HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
3012

3013
    // Is the gap between reading the finger and doing the CAS too long?
3014
    HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
3015
    if (res == finger && curr_region != NULL) {
3016
      // we succeeded
3017
      HeapWord*   bottom        = curr_region->bottom();
3018
      HeapWord*   limit         = curr_region->next_top_at_mark_start();
3019

3020
      if (verbose_low()) {
3021
        gclog_or_tty->print_cr("[%u] curr_region = " PTR_FORMAT " "
3022
                               "[" PTR_FORMAT ", " PTR_FORMAT "), "
3023
                               "limit = " PTR_FORMAT,
3024
                               worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
3025
      }
3026

3027
      // notice that _finger == end cannot be guaranteed here since,
3028
      // someone else might have moved the finger even further
3029
      assert(_finger >= end, "the finger should have moved forward");
3030

3031
      if (verbose_low()) {
3032
        gclog_or_tty->print_cr("[%u] we were successful with region = "
3033
                               PTR_FORMAT, worker_id, p2i(curr_region));
3034
      }
3035

3036
      if (limit > bottom) {
3037
        if (verbose_low()) {
3038
          gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is not empty, "
3039
                                 "returning it ", worker_id, p2i(curr_region));
3040
        }
3041
        return curr_region;
3042
      } else {
3043
        assert(limit == bottom,
3044
               "the region limit should be at bottom");
3045
        if (verbose_low()) {
3046
          gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is empty, "
3047
                                 "returning NULL", worker_id, p2i(curr_region));
3048
        }
3049
        // we return NULL and the caller should try calling
3050
        // claim_region() again.
3051
        return NULL;
3052
      }
3053
    } else {
3054
      assert(_finger > finger, "the finger should have moved forward");
3055
      if (verbose_low()) {
3056
        if (curr_region == NULL) {
3057
          gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
3058
                                 "global finger = " PTR_FORMAT ", "
3059
                                 "our finger = " PTR_FORMAT,
3060
                                 worker_id, p2i(_finger), p2i(finger));
3061
        } else {
3062
          gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
3063
                                 "global finger = " PTR_FORMAT ", "
3064
                                 "our finger = " PTR_FORMAT,
3065
                                 worker_id, p2i(_finger), p2i(finger));
3066
        }
3067
      }
3068

3069
      // read it again
3070
      finger = _finger;
3071
    }
3072
  }
3073

3074
  return NULL;
3075
}
3076

3077
#ifndef PRODUCT
3078
enum VerifyNoCSetOopsPhase {
3079
  VerifyNoCSetOopsStack,
3080
  VerifyNoCSetOopsQueues
3081
};
3082

3083
class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
3084
private:
3085
  G1CollectedHeap* _g1h;
3086
  VerifyNoCSetOopsPhase _phase;
3087
  int _info;
3088

3089
  const char* phase_str() {
3090
    switch (_phase) {
3091
    case VerifyNoCSetOopsStack:         return "Stack";
3092
    case VerifyNoCSetOopsQueues:        return "Queue";
3093
    default:                            ShouldNotReachHere();
3094
    }
3095
    return NULL;
3096
  }
3097

3098
  void do_object_work(oop obj) {
3099
    guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || obj->is_oop(),
3100
              err_msg("Non-oop " PTR_FORMAT ", phase: %s, info: %d",
3101
                      p2i((void*) obj), phase_str(), _info));
3102
    guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->obj_in_cs(obj),
3103
              err_msg("obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
3104
                      p2i((void*) obj), phase_str(), _info));
3105
  }
3106

3107
public:
3108
  VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
3109

3110
  void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
3111
    _phase = phase;
3112
    _info = info;
3113
  }
3114

3115
  virtual void do_oop(oop* p) {
3116
    oop obj = oopDesc::load_decode_heap_oop(p);
3117
    do_object_work(obj);
3118
  }
3119

3120
  virtual void do_oop(narrowOop* p) {
3121
    // We should not come across narrow oops while scanning marking
3122
    // stacks
3123
    ShouldNotReachHere();
3124
  }
3125

3126
  virtual void do_object(oop obj) {
3127
    do_object_work(obj);
3128
  }
3129
};
3130

3131
void ConcurrentMark::verify_no_cset_oops() {
3132
  assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
3133
  if (!G1CollectedHeap::heap()->mark_in_progress()) {
3134
    return;
3135
  }
3136

3137
  VerifyNoCSetOopsClosure cl;
3138

3139
  // Verify entries on the global mark stack
3140
  cl.set_phase(VerifyNoCSetOopsStack);
3141
  _markStack.oops_do(&cl);
3142

3143
  // Verify entries on the task queues
3144
  for (uint i = 0; i < _max_worker_id; i += 1) {
3145
    cl.set_phase(VerifyNoCSetOopsQueues, i);
3146
    CMTaskQueue* queue = _task_queues->queue(i);
3147
    queue->oops_do(&cl);
3148
  }
3149

3150
  // Verify the global finger
3151
  HeapWord* global_finger = finger();
3152
  if (global_finger != NULL && global_finger < _heap_end) {
3153
    // The global finger always points to a heap region boundary. We
3154
    // use heap_region_containing_raw() to get the containing region
3155
    // given that the global finger could be pointing to a free region
3156
    // which subsequently becomes continues humongous. If that
3157
    // happens, heap_region_containing() will return the bottom of the
3158
    // corresponding starts humongous region and the check below will
3159
    // not hold any more.
3160
    // Since we always iterate over all regions, we might get a NULL HeapRegion
3161
    // here.
3162
    HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3163
    guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
3164
              err_msg("global finger: " PTR_FORMAT " region: " HR_FORMAT,
3165
                      p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3166
  }
3167

3168
  // Verify the task fingers
3169
  assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3170
  for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3171
    CMTask* task = _tasks[i];
3172
    HeapWord* task_finger = task->finger();
3173
    if (task_finger != NULL && task_finger < _heap_end) {
3174
      // See above note on the global finger verification.
3175
      HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3176
      guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
3177
                !task_hr->in_collection_set(),
3178
                err_msg("task finger: " PTR_FORMAT " region: " HR_FORMAT,
3179
                        p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3180
    }
3181
  }
3182
}
3183
#endif // PRODUCT
3184

3185
// Aggregate the counting data that was constructed concurrently
3186
// with marking.
3187
class AggregateCountDataHRClosure: public HeapRegionClosure {
3188
  G1CollectedHeap* _g1h;
3189
  ConcurrentMark* _cm;
3190
  CardTableModRefBS* _ct_bs;
3191
  BitMap* _cm_card_bm;
3192
  uint _max_worker_id;
3193

3194
 public:
3195
  AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3196
                              BitMap* cm_card_bm,
3197
                              uint max_worker_id) :
3198
    _g1h(g1h), _cm(g1h->concurrent_mark()),
3199
    _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3200
    _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3201

3202
  bool doHeapRegion(HeapRegion* hr) {
3203
    if (hr->continuesHumongous()) {
3204
      // We will ignore these here and process them when their
3205
      // associated "starts humongous" region is processed.
3206
      // Note that we cannot rely on their associated
3207
      // "starts humongous" region to have their bit set to 1
3208
      // since, due to the region chunking in the parallel region
3209
      // iteration, a "continues humongous" region might be visited
3210
      // before its associated "starts humongous".
3211
      return false;
3212
    }
3213

3214
    HeapWord* start = hr->bottom();
3215
    HeapWord* limit = hr->next_top_at_mark_start();
3216
    HeapWord* end = hr->end();
3217

3218
    assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3219
           err_msg("Preconditions not met - "
3220
                   "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
3221
                   "top: " PTR_FORMAT ", end: " PTR_FORMAT,
3222
                   p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3223

3224
    assert(hr->next_marked_bytes() == 0, "Precondition");
3225

3226
    if (start == limit) {
3227
      // NTAMS of this region has not been set so nothing to do.
3228
      return false;
3229
    }
3230

3231
    // 'start' should be in the heap.
3232
    assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3233
    // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3234
    assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3235

3236
    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3237
    BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3238
    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3239

3240
    // If ntams is not card aligned then we bump card bitmap index
3241
    // for limit so that we get the all the cards spanned by
3242
    // the object ending at ntams.
3243
    // Note: if this is the last region in the heap then ntams
3244
    // could be actually just beyond the end of the the heap;
3245
    // limit_idx will then  correspond to a (non-existent) card
3246
    // that is also outside the heap.
3247
    if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3248
      limit_idx += 1;
3249
    }
3250

3251
    assert(limit_idx <= end_idx, "or else use atomics");
3252

3253
    // Aggregate the "stripe" in the count data associated with hr.
3254
    uint hrm_index = hr->hrm_index();
3255
    size_t marked_bytes = 0;
3256

3257
    for (uint i = 0; i < _max_worker_id; i += 1) {
3258
      size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3259
      BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3260

3261
      // Fetch the marked_bytes in this region for task i and
3262
      // add it to the running total for this region.
3263
      marked_bytes += marked_bytes_array[hrm_index];
3264

3265
      // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3266
      // into the global card bitmap.
3267
      BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3268

3269
      while (scan_idx < limit_idx) {
3270
        assert(task_card_bm->at(scan_idx) == true, "should be");
3271
        _cm_card_bm->set_bit(scan_idx);
3272
        assert(_cm_card_bm->at(scan_idx) == true, "should be");
3273

3274
        // BitMap::get_next_one_offset() can handle the case when
3275
        // its left_offset parameter is greater than its right_offset
3276
        // parameter. It does, however, have an early exit if
3277
        // left_offset == right_offset. So let's limit the value
3278
        // passed in for left offset here.
3279
        BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3280
        scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3281
      }
3282
    }
3283

3284
    // Update the marked bytes for this region.
3285
    hr->add_to_marked_bytes(marked_bytes);
3286

3287
    // Next heap region
3288
    return false;
3289
  }
3290
};
3291

3292
class G1AggregateCountDataTask: public AbstractGangTask {
3293
protected:
3294
  G1CollectedHeap* _g1h;
3295
  ConcurrentMark* _cm;
3296
  BitMap* _cm_card_bm;
3297
  uint _max_worker_id;
3298
  int _active_workers;
3299

3300
public:
3301
  G1AggregateCountDataTask(G1CollectedHeap* g1h,
3302
                           ConcurrentMark* cm,
3303
                           BitMap* cm_card_bm,
3304
                           uint max_worker_id,
3305
                           int n_workers) :
3306
    AbstractGangTask("Count Aggregation"),
3307
    _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3308
    _max_worker_id(max_worker_id),
3309
    _active_workers(n_workers) { }
3310

3311
  void work(uint worker_id) {
3312
    AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3313

3314
    if (G1CollectedHeap::use_parallel_gc_threads()) {
3315
      _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3316
                                            _active_workers,
3317
                                            HeapRegion::AggregateCountClaimValue);
3318
    } else {
3319
      _g1h->heap_region_iterate(&cl);
3320
    }
3321
  }
3322
};
3323

3324

3325
void ConcurrentMark::aggregate_count_data() {
3326
  int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3327
                        _g1h->workers()->active_workers() :
3328
                        1);
3329

3330
  G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3331
                                           _max_worker_id, n_workers);
3332

3333
  if (G1CollectedHeap::use_parallel_gc_threads()) {
3334
    assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3335
           "sanity check");
3336
    _g1h->set_par_threads(n_workers);
3337
    _g1h->workers()->run_task(&g1_par_agg_task);
3338
    _g1h->set_par_threads(0);
3339

3340
    assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3341
           "sanity check");
3342
    _g1h->reset_heap_region_claim_values();
3343
  } else {
3344
    g1_par_agg_task.work(0);
3345
  }
3346
}
3347

3348
// Clear the per-worker arrays used to store the per-region counting data
3349
void ConcurrentMark::clear_all_count_data() {
3350
  // Clear the global card bitmap - it will be filled during
3351
  // liveness count aggregation (during remark) and the
3352
  // final counting task.
3353
  _card_bm.clear();
3354

3355
  // Clear the global region bitmap - it will be filled as part
3356
  // of the final counting task.
3357
  _region_bm.clear();
3358

3359
  uint max_regions = _g1h->max_regions();
3360
  assert(_max_worker_id > 0, "uninitialized");
3361

3362
  for (uint i = 0; i < _max_worker_id; i += 1) {
3363
    BitMap* task_card_bm = count_card_bitmap_for(i);
3364
    size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3365

3366
    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3367
    assert(marked_bytes_array != NULL, "uninitialized");
3368

3369
    memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3370
    task_card_bm->clear();
3371
  }
3372
}
3373

3374
void ConcurrentMark::print_stats() {
3375
  if (verbose_stats()) {
3376
    gclog_or_tty->print_cr("---------------------------------------------------------------------");
3377
    for (size_t i = 0; i < _active_tasks; ++i) {
3378
      _tasks[i]->print_stats();
3379
      gclog_or_tty->print_cr("---------------------------------------------------------------------");
3380
    }
3381
  }
3382
}
3383

3384
// abandon current marking iteration due to a Full GC
3385
void ConcurrentMark::abort() {
3386
  // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
3387
  // concurrent bitmap clearing.
3388
  _nextMarkBitMap->clearAll();
3389

3390
  // Note we cannot clear the previous marking bitmap here
3391
  // since VerifyDuringGC verifies the objects marked during
3392
  // a full GC against the previous bitmap.
3393

3394
  // Clear the liveness counting data
3395
  clear_all_count_data();
3396
  // Empty mark stack
3397
  reset_marking_state();
3398
  for (uint i = 0; i < _max_worker_id; ++i) {
3399
    _tasks[i]->clear_region_fields();
3400
  }
3401
  _first_overflow_barrier_sync.abort();
3402
  _second_overflow_barrier_sync.abort();
3403
  const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3404
  if (!gc_id.is_undefined()) {
3405
    // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3406
    // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3407
    _aborted_gc_id = gc_id;
3408
   }
3409
  _has_aborted = true;
3410

3411
  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3412
  satb_mq_set.abandon_partial_marking();
3413
  // This can be called either during or outside marking, we'll read
3414
  // the expected_active value from the SATB queue set.
3415
  satb_mq_set.set_active_all_threads(
3416
                                 false, /* new active value */
3417
                                 satb_mq_set.is_active() /* expected_active */);
3418

3419
  _g1h->trace_heap_after_concurrent_cycle();
3420
  _g1h->register_concurrent_cycle_end();
3421
}
3422

3423
const GCId& ConcurrentMark::concurrent_gc_id() {
3424
  if (has_aborted()) {
3425
    return _aborted_gc_id;
3426
  }
3427
  return _g1h->gc_tracer_cm()->gc_id();
3428
}
3429

3430
static void print_ms_time_info(const char* prefix, const char* name,
3431
                               NumberSeq& ns) {
3432
  gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3433
                         prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3434
  if (ns.num() > 0) {
3435
    gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3436
                           prefix, ns.sd(), ns.maximum());
3437
  }
3438
}
3439

3440
void ConcurrentMark::print_summary_info() {
3441
  gclog_or_tty->print_cr(" Concurrent marking:");
3442
  print_ms_time_info("  ", "init marks", _init_times);
3443
  print_ms_time_info("  ", "remarks", _remark_times);
3444
  {
3445
    print_ms_time_info("     ", "final marks", _remark_mark_times);
3446
    print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3447

3448
  }
3449
  print_ms_time_info("  ", "cleanups", _cleanup_times);
3450
  gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3451
                         _total_counting_time,
3452
                         (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3453
                          (double)_cleanup_times.num()
3454
                         : 0.0));
3455
  if (G1ScrubRemSets) {
3456
    gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3457
                           _total_rs_scrub_time,
3458
                           (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3459
                            (double)_cleanup_times.num()
3460
                           : 0.0));
3461
  }
3462
  gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3463
                         (_init_times.sum() + _remark_times.sum() +
3464
                          _cleanup_times.sum())/1000.0);
3465
  gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3466
                "(%8.2f s marking).",
3467
                cmThread()->vtime_accum(),
3468
                cmThread()->vtime_mark_accum());
3469
}
3470

3471
void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3472
  if (use_parallel_marking_threads()) {
3473
    _parallel_workers->print_worker_threads_on(st);
3474
  }
3475
}
3476

3477
void ConcurrentMark::print_on_error(outputStream* st) const {
3478
  st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3479
      p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3480
  _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3481
  _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3482
}
3483

3484
// We take a break if someone is trying to stop the world.
3485
bool ConcurrentMark::do_yield_check(uint worker_id) {
3486
  if (SuspendibleThreadSet::should_yield()) {
3487
    if (worker_id == 0) {
3488
      _g1h->g1_policy()->record_concurrent_pause();
3489
    }
3490
    SuspendibleThreadSet::yield();
3491
    return true;
3492
  } else {
3493
    return false;
3494
  }
3495
}
3496

3497
#ifndef PRODUCT
3498
// for debugging purposes
3499
void ConcurrentMark::print_finger() {
3500
  gclog_or_tty->print_cr("heap [" PTR_FORMAT ", " PTR_FORMAT "), global finger = " PTR_FORMAT,
3501
                         p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3502
  for (uint i = 0; i < _max_worker_id; ++i) {
3503
    gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3504
  }
3505
  gclog_or_tty->cr();
3506
}
3507
#endif
3508

3509
template<bool scan>
3510
inline void CMTask::process_grey_object(oop obj) {
3511
  assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray");
3512

3513
  if (_cm->verbose_high()) {
3514
    gclog_or_tty->print_cr("[%u] processing grey object " PTR_FORMAT,
3515
                           _worker_id, p2i((void*) obj));
3516
  }
3517

3518
  assert(G1CMObjArrayProcessor::is_array_slice(obj) || _nextMarkBitMap->isMarked((HeapWord*) obj),
3519
         "Any stolen object should be a slice or marked");
3520

3521
  if (scan) {
3522
    if (G1CMObjArrayProcessor::is_array_slice(obj)) {
3523
      _words_scanned += _objArray_processor.process_slice(obj);
3524
    } else if (G1CMObjArrayProcessor::should_be_sliced(obj)) {
3525
      _words_scanned += _objArray_processor.process_obj(obj);
3526
    } else {
3527
      size_t obj_size = obj->size();
3528
      _words_scanned += obj_size;
3529
      obj->oop_iterate(_cm_oop_closure);;
3530
    }
3531
  }
3532
  statsOnly( ++_objs_scanned );
3533
  check_limits();
3534
}
3535

3536
template void CMTask::process_grey_object<true>(oop);
3537
template void CMTask::process_grey_object<false>(oop);
3538

3539
// Closure for iteration over bitmaps
3540
class CMBitMapClosure : public BitMapClosure {
3541
private:
3542
  // the bitmap that is being iterated over
3543
  CMBitMap*                   _nextMarkBitMap;
3544
  ConcurrentMark*             _cm;
3545
  CMTask*                     _task;
3546

3547
public:
3548
  CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3549
    _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3550

3551
  bool do_bit(size_t offset) {
3552
    HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3553
    assert(_nextMarkBitMap->isMarked(addr), "invariant");
3554
    assert( addr < _cm->finger(), "invariant");
3555

3556
    statsOnly( _task->increase_objs_found_on_bitmap() );
3557
    assert(addr >= _task->finger(), "invariant");
3558

3559
    // We move that task's local finger along.
3560
    _task->move_finger_to(addr);
3561

3562
    _task->scan_object(oop(addr));
3563
    // we only partially drain the local queue and global stack
3564
    _task->drain_local_queue(true);
3565
    _task->drain_global_stack(true);
3566

3567
    // if the has_aborted flag has been raised, we need to bail out of
3568
    // the iteration
3569
    return !_task->has_aborted();
3570
  }
3571
};
3572

3573
G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3574
                               ConcurrentMark* cm,
3575
                               CMTask* task)
3576
  : _g1h(g1h), _cm(cm), _task(task) {
3577
  assert(_ref_processor == NULL, "should be initialized to NULL");
3578

3579
  if (G1UseConcMarkReferenceProcessing) {
3580
    _ref_processor = g1h->ref_processor_cm();
3581
    assert(_ref_processor != NULL, "should not be NULL");
3582
  }
3583
}
3584

3585
void CMTask::setup_for_region(HeapRegion* hr) {
3586
  assert(hr != NULL,
3587
        "claim_region() should have filtered out NULL regions");
3588
  assert(!hr->continuesHumongous(),
3589
        "claim_region() should have filtered out continues humongous regions");
3590

3591
  if (_cm->verbose_low()) {
3592
    gclog_or_tty->print_cr("[%u] setting up for region " PTR_FORMAT,
3593
                           _worker_id, p2i(hr));
3594
  }
3595

3596
  _curr_region  = hr;
3597
  _finger       = hr->bottom();
3598
  update_region_limit();
3599
}
3600

3601
void CMTask::update_region_limit() {
3602
  HeapRegion* hr            = _curr_region;
3603
  HeapWord* bottom          = hr->bottom();
3604
  HeapWord* limit           = hr->next_top_at_mark_start();
3605

3606
  if (limit == bottom) {
3607
    if (_cm->verbose_low()) {
3608
      gclog_or_tty->print_cr("[%u] found an empty region "
3609
                             "[" PTR_FORMAT ", " PTR_FORMAT ")",
3610
                             _worker_id, p2i(bottom), p2i(limit));
3611
    }
3612
    // The region was collected underneath our feet.
3613
    // We set the finger to bottom to ensure that the bitmap
3614
    // iteration that will follow this will not do anything.
3615
    // (this is not a condition that holds when we set the region up,
3616
    // as the region is not supposed to be empty in the first place)
3617
    _finger = bottom;
3618
  } else if (limit >= _region_limit) {
3619
    assert(limit >= _finger, "peace of mind");
3620
  } else {
3621
    assert(limit < _region_limit, "only way to get here");
3622
    // This can happen under some pretty unusual circumstances.  An
3623
    // evacuation pause empties the region underneath our feet (NTAMS
3624
    // at bottom). We then do some allocation in the region (NTAMS
3625
    // stays at bottom), followed by the region being used as a GC
3626
    // alloc region (NTAMS will move to top() and the objects
3627
    // originally below it will be grayed). All objects now marked in
3628
    // the region are explicitly grayed, if below the global finger,
3629
    // and we do not need in fact to scan anything else. So, we simply
3630
    // set _finger to be limit to ensure that the bitmap iteration
3631
    // doesn't do anything.
3632
    _finger = limit;
3633
  }
3634

3635
  _region_limit = limit;
3636
}
3637

3638
void CMTask::giveup_current_region() {
3639
  assert(_curr_region != NULL, "invariant");
3640
  if (_cm->verbose_low()) {
3641
    gclog_or_tty->print_cr("[%u] giving up region " PTR_FORMAT,
3642
                           _worker_id, p2i(_curr_region));
3643
  }
3644
  clear_region_fields();
3645
}
3646

3647
void CMTask::clear_region_fields() {
3648
  // Values for these three fields that indicate that we're not
3649
  // holding on to a region.
3650
  _curr_region   = NULL;
3651
  _finger        = NULL;
3652
  _region_limit  = NULL;
3653
}
3654

3655
void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3656
  if (cm_oop_closure == NULL) {
3657
    assert(_cm_oop_closure != NULL, "invariant");
3658
  } else {
3659
    assert(_cm_oop_closure == NULL, "invariant");
3660
  }
3661
  _cm_oop_closure = cm_oop_closure;
3662
}
3663

3664
void CMTask::reset(CMBitMap* nextMarkBitMap) {
3665
  guarantee(nextMarkBitMap != NULL, "invariant");
3666

3667
  if (_cm->verbose_low()) {
3668
    gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3669
  }
3670

3671
  _nextMarkBitMap                = nextMarkBitMap;
3672
  clear_region_fields();
3673

3674
  _calls                         = 0;
3675
  _elapsed_time_ms               = 0.0;
3676
  _termination_time_ms           = 0.0;
3677
  _termination_start_time_ms     = 0.0;
3678

3679
#if _MARKING_STATS_
3680
  _local_pushes                  = 0;
3681
  _local_pops                    = 0;
3682
  _local_max_size                = 0;
3683
  _objs_scanned                  = 0;
3684
  _global_pushes                 = 0;
3685
  _global_pops                   = 0;
3686
  _global_max_size               = 0;
3687
  _global_transfers_to           = 0;
3688
  _global_transfers_from         = 0;
3689
  _regions_claimed               = 0;
3690
  _objs_found_on_bitmap          = 0;
3691
  _satb_buffers_processed        = 0;
3692
  _steal_attempts                = 0;
3693
  _steals                        = 0;
3694
  _aborted                       = 0;
3695
  _aborted_overflow              = 0;
3696
  _aborted_cm_aborted            = 0;
3697
  _aborted_yield                 = 0;
3698
  _aborted_timed_out             = 0;
3699
  _aborted_satb                  = 0;
3700
  _aborted_termination           = 0;
3701
#endif // _MARKING_STATS_
3702
}
3703

3704
bool CMTask::should_exit_termination() {
3705
  regular_clock_call();
3706
  // This is called when we are in the termination protocol. We should
3707
  // quit if, for some reason, this task wants to abort or the global
3708
  // stack is not empty (this means that we can get work from it).
3709
  return !_cm->mark_stack_empty() || has_aborted();
3710
}
3711

3712
void CMTask::reached_limit() {
3713
  assert(_words_scanned >= _words_scanned_limit ||
3714
         _refs_reached >= _refs_reached_limit ,
3715
         "shouldn't have been called otherwise");
3716
  regular_clock_call();
3717
}
3718

3719
void CMTask::regular_clock_call() {
3720
  if (has_aborted()) return;
3721

3722
  // First, we need to recalculate the words scanned and refs reached
3723
  // limits for the next clock call.
3724
  recalculate_limits();
3725

3726
  // During the regular clock call we do the following
3727

3728
  // (1) If an overflow has been flagged, then we abort.
3729
  if (_cm->has_overflown()) {
3730
    set_has_aborted();
3731
    return;
3732
  }
3733

3734
  // If we are not concurrent (i.e. we're doing remark) we don't need
3735
  // to check anything else. The other steps are only needed during
3736
  // the concurrent marking phase.
3737
  if (!concurrent()) return;
3738

3739
  // (2) If marking has been aborted for Full GC, then we also abort.
3740
  if (_cm->has_aborted()) {
3741
    set_has_aborted();
3742
    statsOnly( ++_aborted_cm_aborted );
3743
    return;
3744
  }
3745

3746
  double curr_time_ms = os::elapsedVTime() * 1000.0;
3747

3748
  // (3) If marking stats are enabled, then we update the step history.
3749
#if _MARKING_STATS_
3750
  if (_words_scanned >= _words_scanned_limit) {
3751
    ++_clock_due_to_scanning;
3752
  }
3753
  if (_refs_reached >= _refs_reached_limit) {
3754
    ++_clock_due_to_marking;
3755
  }
3756

3757
  double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3758
  _interval_start_time_ms = curr_time_ms;
3759
  _all_clock_intervals_ms.add(last_interval_ms);
3760

3761
  if (_cm->verbose_medium()) {
3762
      gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3763
                        "scanned = " SIZE_FORMAT "%s, refs reached = " SIZE_FORMAT "%s",
3764
                        _worker_id, last_interval_ms,
3765
                        _words_scanned,
3766
                        (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3767
                        _refs_reached,
3768
                        (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3769
  }
3770
#endif // _MARKING_STATS_
3771

3772
  // (4) We check whether we should yield. If we have to, then we abort.
3773
  if (SuspendibleThreadSet::should_yield()) {
3774
    // We should yield. To do this we abort the task. The caller is
3775
    // responsible for yielding.
3776
    set_has_aborted();
3777
    statsOnly( ++_aborted_yield );
3778
    return;
3779
  }
3780

3781
  // (5) We check whether we've reached our time quota. If we have,
3782
  // then we abort.
3783
  double elapsed_time_ms = curr_time_ms - _start_time_ms;
3784
  if (elapsed_time_ms > _time_target_ms) {
3785
    set_has_aborted();
3786
    _has_timed_out = true;
3787
    statsOnly( ++_aborted_timed_out );
3788
    return;
3789
  }
3790

3791
  // (6) Finally, we check whether there are enough completed STAB
3792
  // buffers available for processing. If there are, we abort.
3793
  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3794
  if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3795
    if (_cm->verbose_low()) {
3796
      gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3797
                             _worker_id);
3798
    }
3799
    // we do need to process SATB buffers, we'll abort and restart
3800
    // the marking task to do so
3801
    set_has_aborted();
3802
    statsOnly( ++_aborted_satb );
3803
    return;
3804
  }
3805
}
3806

3807
void CMTask::recalculate_limits() {
3808
  _real_words_scanned_limit = _words_scanned + words_scanned_period;
3809
  _words_scanned_limit      = _real_words_scanned_limit;
3810

3811
  _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3812
  _refs_reached_limit       = _real_refs_reached_limit;
3813
}
3814

3815
void CMTask::decrease_limits() {
3816
  // This is called when we believe that we're going to do an infrequent
3817
  // operation which will increase the per byte scanned cost (i.e. move
3818
  // entries to/from the global stack). It basically tries to decrease the
3819
  // scanning limit so that the clock is called earlier.
3820

3821
  if (_cm->verbose_medium()) {
3822
    gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3823
  }
3824

3825
  _words_scanned_limit = _real_words_scanned_limit -
3826
    3 * words_scanned_period / 4;
3827
  _refs_reached_limit  = _real_refs_reached_limit -
3828
    3 * refs_reached_period / 4;
3829
}
3830

3831
void CMTask::move_entries_to_global_stack() {
3832
  // local array where we'll store the entries that will be popped
3833
  // from the local queue
3834
  oop buffer[global_stack_transfer_size];
3835

3836
  int n = 0;
3837
  oop obj;
3838
  while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3839
    buffer[n] = obj;
3840
    ++n;
3841
  }
3842

3843
  if (n > 0) {
3844
    // we popped at least one entry from the local queue
3845

3846
    statsOnly( ++_global_transfers_to; _local_pops += n );
3847

3848
    if (!_cm->mark_stack_push(buffer, n)) {
3849
      if (_cm->verbose_low()) {
3850
        gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3851
                               _worker_id);
3852
      }
3853
      set_has_aborted();
3854
    } else {
3855
      // the transfer was successful
3856

3857
      if (_cm->verbose_medium()) {
3858
        gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3859
                               _worker_id, n);
3860
      }
3861
      statsOnly( int tmp_size = _cm->mark_stack_size();
3862
                 if (tmp_size > _global_max_size) {
3863
                   _global_max_size = tmp_size;
3864
                 }
3865
                 _global_pushes += n );
3866
    }
3867
  }
3868

3869
  // this operation was quite expensive, so decrease the limits
3870
  decrease_limits();
3871
}
3872

3873
void CMTask::get_entries_from_global_stack() {
3874
  // local array where we'll store the entries that will be popped
3875
  // from the global stack.
3876
  oop buffer[global_stack_transfer_size];
3877
  int n;
3878
  _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3879
  assert(n <= global_stack_transfer_size,
3880
         "we should not pop more than the given limit");
3881
  if (n > 0) {
3882
    // yes, we did actually pop at least one entry
3883

3884
    statsOnly( ++_global_transfers_from; _global_pops += n );
3885
    if (_cm->verbose_medium()) {
3886
      gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3887
                             _worker_id, n);
3888
    }
3889
    for (int i = 0; i < n; ++i) {
3890
      assert(G1CMObjArrayProcessor::is_array_slice(buffer[i]) || buffer[i]->is_oop(),
3891
             err_msg("Element " PTR_FORMAT " must be an array slice or oop", p2i(buffer[i])));
3892
      bool success = _task_queue->push(buffer[i]);
3893
      // We only call this when the local queue is empty or under a
3894
      // given target limit. So, we do not expect this push to fail.
3895
      assert(success, "invariant");
3896
    }
3897

3898
    statsOnly( int tmp_size = _task_queue->size();
3899
               if (tmp_size > _local_max_size) {
3900
                 _local_max_size = tmp_size;
3901
               }
3902
               _local_pushes += n );
3903
  }
3904

3905
  // this operation was quite expensive, so decrease the limits
3906
  decrease_limits();
3907
}
3908

3909
void CMTask::drain_local_queue(bool partially) {
3910
  if (has_aborted()) {
3911
    return;
3912
  }
3913

3914
  // Decide what the target size is, depending whether we're going to
3915
  // drain it partially (so that other tasks can steal if they run out
3916
  // of things to do) or totally (at the very end).
3917
  size_t target_size;
3918
  if (partially) {
3919
    target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3920
  } else {
3921
    target_size = 0;
3922
  }
3923

3924
  if (_task_queue->size() > target_size) {
3925
    if (_cm->verbose_high()) {
3926
      gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3927
                             _worker_id, target_size);
3928
    }
3929

3930
    oop obj;
3931
    bool ret = _task_queue->pop_local(obj);
3932
    while (ret) {
3933
      statsOnly( ++_local_pops );
3934

3935
      if (_cm->verbose_high()) {
3936
        gclog_or_tty->print_cr("[%u] popped " PTR_FORMAT, _worker_id,
3937
                               p2i((void*) obj));
3938
      }
3939

3940
      scan_object(obj);
3941

3942
      if (_task_queue->size() <= target_size || has_aborted()) {
3943
        ret = false;
3944
      } else {
3945
        ret = _task_queue->pop_local(obj);
3946
      }
3947
    }
3948

3949
    if (_cm->verbose_high()) {
3950
      gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3951
                             _worker_id, _task_queue->size());
3952
    }
3953
  }
3954
}
3955

3956
void CMTask::drain_global_stack(bool partially) {
3957
  if (has_aborted()) return;
3958

3959
  // We have a policy to drain the local queue before we attempt to
3960
  // drain the global stack.
3961
  assert(partially || _task_queue->size() == 0, "invariant");
3962

3963
  // Decide what the target size is, depending whether we're going to
3964
  // drain it partially (so that other tasks can steal if they run out
3965
  // of things to do) or totally (at the very end).  Notice that,
3966
  // because we move entries from the global stack in chunks or
3967
  // because another task might be doing the same, we might in fact
3968
  // drop below the target. But, this is not a problem.
3969
  size_t target_size;
3970
  if (partially) {
3971
    target_size = _cm->partial_mark_stack_size_target();
3972
  } else {
3973
    target_size = 0;
3974
  }
3975

3976
  if (_cm->mark_stack_size() > target_size) {
3977
    if (_cm->verbose_low()) {
3978
      gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3979
                             _worker_id, target_size);
3980
    }
3981

3982
    while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3983
      get_entries_from_global_stack();
3984
      drain_local_queue(partially);
3985
    }
3986

3987
    if (_cm->verbose_low()) {
3988
      gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3989
                             _worker_id, _cm->mark_stack_size());
3990
    }
3991
  }
3992
}
3993

3994
// SATB Queue has several assumptions on whether to call the par or
3995
// non-par versions of the methods. this is why some of the code is
3996
// replicated. We should really get rid of the single-threaded version
3997
// of the code to simplify things.
3998
void CMTask::drain_satb_buffers() {
3999
  if (has_aborted()) return;
4000

4001
  // We set this so that the regular clock knows that we're in the
4002
  // middle of draining buffers and doesn't set the abort flag when it
4003
  // notices that SATB buffers are available for draining. It'd be
4004
  // very counter productive if it did that. :-)
4005
  _draining_satb_buffers = true;
4006

4007
  CMSATBBufferClosure satb_cl(this, _g1h);
4008
  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
4009

4010
  // This keeps claiming and applying the closure to completed buffers
4011
  // until we run out of buffers or we need to abort.
4012
  while (!has_aborted() &&
4013
         satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
4014
    if (_cm->verbose_medium()) {
4015
      gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
4016
    }
4017
    statsOnly( ++_satb_buffers_processed );
4018
    regular_clock_call();
4019
  }
4020

4021
  _draining_satb_buffers = false;
4022

4023
  assert(has_aborted() ||
4024
         concurrent() ||
4025
         satb_mq_set.completed_buffers_num() == 0, "invariant");
4026

4027
  // again, this was a potentially expensive operation, decrease the
4028
  // limits to get the regular clock call early
4029
  decrease_limits();
4030
}
4031

4032
void CMTask::print_stats() {
4033
  gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
4034
                         _worker_id, _calls);
4035
  gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4036
                         _elapsed_time_ms, _termination_time_ms);
4037
  gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4038
                         _step_times_ms.num(), _step_times_ms.avg(),
4039
                         _step_times_ms.sd());
4040
  gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
4041
                         _step_times_ms.maximum(), _step_times_ms.sum());
4042

4043
#if _MARKING_STATS_
4044
  gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4045
                         _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4046
                         _all_clock_intervals_ms.sd());
4047
  gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
4048
                         _all_clock_intervals_ms.maximum(),
4049
                         _all_clock_intervals_ms.sum());
4050
  gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
4051
                         _clock_due_to_scanning, _clock_due_to_marking);
4052
  gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
4053
                         _objs_scanned, _objs_found_on_bitmap);
4054
  gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
4055
                         _local_pushes, _local_pops, _local_max_size);
4056
  gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
4057
                         _global_pushes, _global_pops, _global_max_size);
4058
  gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
4059
                         _global_transfers_to,_global_transfers_from);
4060
  gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
4061
  gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
4062
  gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
4063
                         _steal_attempts, _steals);
4064
  gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
4065
  gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
4066
                         _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4067
  gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
4068
                         _aborted_timed_out, _aborted_satb, _aborted_termination);
4069
#endif // _MARKING_STATS_
4070
}
4071

4072
/*****************************************************************************
4073

4074
    The do_marking_step(time_target_ms, ...) method is the building
4075
    block of the parallel marking framework. It can be called in parallel
4076
    with other invocations of do_marking_step() on different tasks
4077
    (but only one per task, obviously) and concurrently with the
4078
    mutator threads, or during remark, hence it eliminates the need
4079
    for two versions of the code. When called during remark, it will
4080
    pick up from where the task left off during the concurrent marking
4081
    phase. Interestingly, tasks are also claimable during evacuation
4082
    pauses too, since do_marking_step() ensures that it aborts before
4083
    it needs to yield.
4084

4085
    The data structures that it uses to do marking work are the
4086
    following:
4087

4088
      (1) Marking Bitmap. If there are gray objects that appear only
4089
      on the bitmap (this happens either when dealing with an overflow
4090
      or when the initial marking phase has simply marked the roots
4091
      and didn't push them on the stack), then tasks claim heap
4092
      regions whose bitmap they then scan to find gray objects. A
4093
      global finger indicates where the end of the last claimed region
4094
      is. A local finger indicates how far into the region a task has
4095
      scanned. The two fingers are used to determine how to gray an
4096
      object (i.e. whether simply marking it is OK, as it will be
4097
      visited by a task in the future, or whether it needs to be also
4098
      pushed on a stack).
4099

4100
      (2) Local Queue. The local queue of the task which is accessed
4101
      reasonably efficiently by the task. Other tasks can steal from
4102
      it when they run out of work. Throughout the marking phase, a
4103
      task attempts to keep its local queue short but not totally
4104
      empty, so that entries are available for stealing by other
4105
      tasks. Only when there is no more work, a task will totally
4106
      drain its local queue.
4107

4108
      (3) Global Mark Stack. This handles local queue overflow. During
4109
      marking only sets of entries are moved between it and the local
4110
      queues, as access to it requires a mutex and more fine-grain
4111
      interaction with it which might cause contention. If it
4112
      overflows, then the marking phase should restart and iterate
4113
      over the bitmap to identify gray objects. Throughout the marking
4114
      phase, tasks attempt to keep the global mark stack at a small
4115
      length but not totally empty, so that entries are available for
4116
      popping by other tasks. Only when there is no more work, tasks
4117
      will totally drain the global mark stack.
4118

4119
      (4) SATB Buffer Queue. This is where completed SATB buffers are
4120
      made available. Buffers are regularly removed from this queue
4121
      and scanned for roots, so that the queue doesn't get too
4122
      long. During remark, all completed buffers are processed, as
4123
      well as the filled in parts of any uncompleted buffers.
4124

4125
    The do_marking_step() method tries to abort when the time target
4126
    has been reached. There are a few other cases when the
4127
    do_marking_step() method also aborts:
4128

4129
      (1) When the marking phase has been aborted (after a Full GC).
4130

4131
      (2) When a global overflow (on the global stack) has been
4132
      triggered. Before the task aborts, it will actually sync up with
4133
      the other tasks to ensure that all the marking data structures
4134
      (local queues, stacks, fingers etc.)  are re-initialized so that
4135
      when do_marking_step() completes, the marking phase can
4136
      immediately restart.
4137

4138
      (3) When enough completed SATB buffers are available. The
4139
      do_marking_step() method only tries to drain SATB buffers right
4140
      at the beginning. So, if enough buffers are available, the
4141
      marking step aborts and the SATB buffers are processed at
4142
      the beginning of the next invocation.
4143

4144
      (4) To yield. when we have to yield then we abort and yield
4145
      right at the end of do_marking_step(). This saves us from a lot
4146
      of hassle as, by yielding we might allow a Full GC. If this
4147
      happens then objects will be compacted underneath our feet, the
4148
      heap might shrink, etc. We save checking for this by just
4149
      aborting and doing the yield right at the end.
4150

4151
    From the above it follows that the do_marking_step() method should
4152
    be called in a loop (or, otherwise, regularly) until it completes.
4153

4154
    If a marking step completes without its has_aborted() flag being
4155
    true, it means it has completed the current marking phase (and
4156
    also all other marking tasks have done so and have all synced up).
4157

4158
    A method called regular_clock_call() is invoked "regularly" (in
4159
    sub ms intervals) throughout marking. It is this clock method that
4160
    checks all the abort conditions which were mentioned above and
4161
    decides when the task should abort. A work-based scheme is used to
4162
    trigger this clock method: when the number of object words the
4163
    marking phase has scanned or the number of references the marking
4164
    phase has visited reach a given limit. Additional invocations to
4165
    the method clock have been planted in a few other strategic places
4166
    too. The initial reason for the clock method was to avoid calling
4167
    vtime too regularly, as it is quite expensive. So, once it was in
4168
    place, it was natural to piggy-back all the other conditions on it
4169
    too and not constantly check them throughout the code.
4170

4171
    If do_termination is true then do_marking_step will enter its
4172
    termination protocol.
4173

4174
    The value of is_serial must be true when do_marking_step is being
4175
    called serially (i.e. by the VMThread) and do_marking_step should
4176
    skip any synchronization in the termination and overflow code.
4177
    Examples include the serial remark code and the serial reference
4178
    processing closures.
4179

4180
    The value of is_serial must be false when do_marking_step is
4181
    being called by any of the worker threads in a work gang.
4182
    Examples include the concurrent marking code (CMMarkingTask),
4183
    the MT remark code, and the MT reference processing closures.
4184

4185
 *****************************************************************************/
4186

4187
void CMTask::do_marking_step(double time_target_ms,
4188
                             bool do_termination,
4189
                             bool is_serial) {
4190
  assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4191
  assert(concurrent() == _cm->concurrent(), "they should be the same");
4192

4193
  G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4194
  assert(_task_queues != NULL, "invariant");
4195
  assert(_task_queue != NULL, "invariant");
4196
  assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4197

4198
  assert(!_claimed,
4199
         "only one thread should claim this task at any one time");
4200

4201
  // OK, this doesn't safeguard again all possible scenarios, as it is
4202
  // possible for two threads to set the _claimed flag at the same
4203
  // time. But it is only for debugging purposes anyway and it will
4204
  // catch most problems.
4205
  _claimed = true;
4206

4207
  _start_time_ms = os::elapsedVTime() * 1000.0;
4208
  statsOnly( _interval_start_time_ms = _start_time_ms );
4209

4210
  // If do_stealing is true then do_marking_step will attempt to
4211
  // steal work from the other CMTasks. It only makes sense to
4212
  // enable stealing when the termination protocol is enabled
4213
  // and do_marking_step() is not being called serially.
4214
  bool do_stealing = do_termination && !is_serial;
4215

4216
  double diff_prediction_ms =
4217
    g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4218
  _time_target_ms = time_target_ms - diff_prediction_ms;
4219

4220
  // set up the variables that are used in the work-based scheme to
4221
  // call the regular clock method
4222
  _words_scanned = 0;
4223
  _refs_reached  = 0;
4224
  recalculate_limits();
4225

4226
  // clear all flags
4227
  clear_has_aborted();
4228
  _has_timed_out = false;
4229
  _draining_satb_buffers = false;
4230

4231
  ++_calls;
4232

4233
  if (_cm->verbose_low()) {
4234
    gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4235
                           "target = %1.2lfms >>>>>>>>>>",
4236
                           _worker_id, _calls, _time_target_ms);
4237
  }
4238

4239
  // Set up the bitmap and oop closures. Anything that uses them is
4240
  // eventually called from this method, so it is OK to allocate these
4241
  // statically.
4242
  CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4243
  G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4244
  set_cm_oop_closure(&cm_oop_closure);
4245

4246
  if (_cm->has_overflown()) {
4247
    // This can happen if the mark stack overflows during a GC pause
4248
    // and this task, after a yield point, restarts. We have to abort
4249
    // as we need to get into the overflow protocol which happens
4250
    // right at the end of this task.
4251
    set_has_aborted();
4252
  }
4253

4254
  // First drain any available SATB buffers. After this, we will not
4255
  // look at SATB buffers before the next invocation of this method.
4256
  // If enough completed SATB buffers are queued up, the regular clock
4257
  // will abort this task so that it restarts.
4258
  drain_satb_buffers();
4259
  // ...then partially drain the local queue and the global stack
4260
  drain_local_queue(true);
4261
  drain_global_stack(true);
4262

4263
  do {
4264
    if (!has_aborted() && _curr_region != NULL) {
4265
      // This means that we're already holding on to a region.
4266
      assert(_finger != NULL, "if region is not NULL, then the finger "
4267
             "should not be NULL either");
4268

4269
      // We might have restarted this task after an evacuation pause
4270
      // which might have evacuated the region we're holding on to
4271
      // underneath our feet. Let's read its limit again to make sure
4272
      // that we do not iterate over a region of the heap that
4273
      // contains garbage (update_region_limit() will also move
4274
      // _finger to the start of the region if it is found empty).
4275
      update_region_limit();
4276
      // We will start from _finger not from the start of the region,
4277
      // as we might be restarting this task after aborting half-way
4278
      // through scanning this region. In this case, _finger points to
4279
      // the address where we last found a marked object. If this is a
4280
      // fresh region, _finger points to start().
4281
      MemRegion mr = MemRegion(_finger, _region_limit);
4282

4283
      if (_cm->verbose_low()) {
4284
        gclog_or_tty->print_cr("[%u] we're scanning part "
4285
                               "[" PTR_FORMAT ", " PTR_FORMAT ") "
4286
                               "of region " HR_FORMAT,
4287
                               _worker_id, p2i(_finger), p2i(_region_limit),
4288
                               HR_FORMAT_PARAMS(_curr_region));
4289
      }
4290

4291
      assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4292
             "humongous regions should go around loop once only");
4293

4294
      // Some special cases:
4295
      // If the memory region is empty, we can just give up the region.
4296
      // If the current region is humongous then we only need to check
4297
      // the bitmap for the bit associated with the start of the object,
4298
      // scan the object if it's live, and give up the region.
4299
      // Otherwise, let's iterate over the bitmap of the part of the region
4300
      // that is left.
4301
      // If the iteration is successful, give up the region.
4302
      if (mr.is_empty()) {
4303
        giveup_current_region();
4304
        regular_clock_call();
4305
      } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4306
        if (_nextMarkBitMap->isMarked(mr.start())) {
4307
          // The object is marked - apply the closure
4308
          BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4309
          bitmap_closure.do_bit(offset);
4310
        }
4311
        // Even if this task aborted while scanning the humongous object
4312
        // we can (and should) give up the current region.
4313
        giveup_current_region();
4314
        regular_clock_call();
4315
      } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4316
        giveup_current_region();
4317
        regular_clock_call();
4318
      } else {
4319
        assert(has_aborted(), "currently the only way to do so");
4320
        // The only way to abort the bitmap iteration is to return
4321
        // false from the do_bit() method. However, inside the
4322
        // do_bit() method we move the _finger to point to the
4323
        // object currently being looked at. So, if we bail out, we
4324
        // have definitely set _finger to something non-null.
4325
        assert(_finger != NULL, "invariant");
4326

4327
        // Region iteration was actually aborted. So now _finger
4328
        // points to the address of the object we last scanned. If we
4329
        // leave it there, when we restart this task, we will rescan
4330
        // the object. It is easy to avoid this. We move the finger by
4331
        // enough to point to the next possible object header (the
4332
        // bitmap knows by how much we need to move it as it knows its
4333
        // granularity).
4334
        assert(_finger < _region_limit, "invariant");
4335
        HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4336
        // Check if bitmap iteration was aborted while scanning the last object
4337
        if (new_finger >= _region_limit) {
4338
          giveup_current_region();
4339
        } else {
4340
          move_finger_to(new_finger);
4341
        }
4342
      }
4343
    }
4344
    // At this point we have either completed iterating over the
4345
    // region we were holding on to, or we have aborted.
4346

4347
    // We then partially drain the local queue and the global stack.
4348
    // (Do we really need this?)
4349
    drain_local_queue(true);
4350
    drain_global_stack(true);
4351

4352
    // Read the note on the claim_region() method on why it might
4353
    // return NULL with potentially more regions available for
4354
    // claiming and why we have to check out_of_regions() to determine
4355
    // whether we're done or not.
4356
    while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4357
      // We are going to try to claim a new region. We should have
4358
      // given up on the previous one.
4359
      // Separated the asserts so that we know which one fires.
4360
      assert(_curr_region  == NULL, "invariant");
4361
      assert(_finger       == NULL, "invariant");
4362
      assert(_region_limit == NULL, "invariant");
4363
      if (_cm->verbose_low()) {
4364
        gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4365
      }
4366
      HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4367
      if (claimed_region != NULL) {
4368
        // Yes, we managed to claim one
4369
        statsOnly( ++_regions_claimed );
4370

4371
        if (_cm->verbose_low()) {
4372
          gclog_or_tty->print_cr("[%u] we successfully claimed "
4373
                                 "region " PTR_FORMAT,
4374
                                 _worker_id, p2i(claimed_region));
4375
        }
4376

4377
        setup_for_region(claimed_region);
4378
        assert(_curr_region == claimed_region, "invariant");
4379
      }
4380
      // It is important to call the regular clock here. It might take
4381
      // a while to claim a region if, for example, we hit a large
4382
      // block of empty regions. So we need to call the regular clock
4383
      // method once round the loop to make sure it's called
4384
      // frequently enough.
4385
      regular_clock_call();
4386
    }
4387

4388
    if (!has_aborted() && _curr_region == NULL) {
4389
      assert(_cm->out_of_regions(),
4390
             "at this point we should be out of regions");
4391
    }
4392
  } while ( _curr_region != NULL && !has_aborted());
4393

4394
  if (!has_aborted()) {
4395
    // We cannot check whether the global stack is empty, since other
4396
    // tasks might be pushing objects to it concurrently.
4397
    assert(_cm->out_of_regions(),
4398
           "at this point we should be out of regions");
4399

4400
    if (_cm->verbose_low()) {
4401
      gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4402
    }
4403

4404
    // Try to reduce the number of available SATB buffers so that
4405
    // remark has less work to do.
4406
    drain_satb_buffers();
4407
  }
4408

4409
  // Since we've done everything else, we can now totally drain the
4410
  // local queue and global stack.
4411
  drain_local_queue(false);
4412
  drain_global_stack(false);
4413

4414
  // Attempt at work stealing from other task's queues.
4415
  if (do_stealing && !has_aborted()) {
4416
    // We have not aborted. This means that we have finished all that
4417
    // we could. Let's try to do some stealing...
4418

4419
    // We cannot check whether the global stack is empty, since other
4420
    // tasks might be pushing objects to it concurrently.
4421
    assert(_cm->out_of_regions() && _task_queue->size() == 0,
4422
           "only way to reach here");
4423

4424
    if (_cm->verbose_low()) {
4425
      gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4426
    }
4427

4428
    while (!has_aborted()) {
4429
      oop obj;
4430
      statsOnly( ++_steal_attempts );
4431

4432
      if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4433
        if (_cm->verbose_medium()) {
4434
          gclog_or_tty->print_cr("[%u] stolen " PTR_FORMAT " successfully",
4435
                                 _worker_id, p2i((void*) obj));
4436
        }
4437

4438
        statsOnly( ++_steals );
4439

4440
        scan_object(obj);
4441

4442
        // And since we're towards the end, let's totally drain the
4443
        // local queue and global stack.
4444
        drain_local_queue(false);
4445
        drain_global_stack(false);
4446
      } else {
4447
        break;
4448
      }
4449
    }
4450
  }
4451

4452
  // If we are about to wrap up and go into termination, check if we
4453
  // should raise the overflow flag.
4454
  if (do_termination && !has_aborted()) {
4455
    if (_cm->force_overflow()->should_force()) {
4456
      _cm->set_has_overflown();
4457
      regular_clock_call();
4458
    }
4459
  }
4460

4461
  // We still haven't aborted. Now, let's try to get into the
4462
  // termination protocol.
4463
  if (do_termination && !has_aborted()) {
4464
    // We cannot check whether the global stack is empty, since other
4465
    // tasks might be concurrently pushing objects on it.
4466
    // Separated the asserts so that we know which one fires.
4467
    assert(_cm->out_of_regions(), "only way to reach here");
4468
    assert(_task_queue->size() == 0, "only way to reach here");
4469

4470
    if (_cm->verbose_low()) {
4471
      gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4472
    }
4473

4474
    _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4475

4476
    // The CMTask class also extends the TerminatorTerminator class,
4477
    // hence its should_exit_termination() method will also decide
4478
    // whether to exit the termination protocol or not.
4479
    bool finished = (is_serial ||
4480
                     _cm->terminator()->offer_termination(this));
4481
    double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4482
    _termination_time_ms +=
4483
      termination_end_time_ms - _termination_start_time_ms;
4484

4485
    if (finished) {
4486
      // We're all done.
4487

4488
      if (_worker_id == 0) {
4489
        // let's allow task 0 to do this
4490
        if (concurrent()) {
4491
          assert(_cm->concurrent_marking_in_progress(), "invariant");
4492
          // we need to set this to false before the next
4493
          // safepoint. This way we ensure that the marking phase
4494
          // doesn't observe any more heap expansions.
4495
          _cm->clear_concurrent_marking_in_progress();
4496
        }
4497
      }
4498

4499
      // We can now guarantee that the global stack is empty, since
4500
      // all other tasks have finished. We separated the guarantees so
4501
      // that, if a condition is false, we can immediately find out
4502
      // which one.
4503
      guarantee(_cm->out_of_regions(), "only way to reach here");
4504
      guarantee(_cm->mark_stack_empty(), "only way to reach here");
4505
      guarantee(_task_queue->size() == 0, "only way to reach here");
4506
      guarantee(!_cm->has_overflown(), "only way to reach here");
4507
      guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4508

4509
      if (_cm->verbose_low()) {
4510
        gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4511
      }
4512
    } else {
4513
      // Apparently there's more work to do. Let's abort this task. It
4514
      // will restart it and we can hopefully find more things to do.
4515

4516
      if (_cm->verbose_low()) {
4517
        gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4518
                               _worker_id);
4519
      }
4520

4521
      set_has_aborted();
4522
      statsOnly( ++_aborted_termination );
4523
    }
4524
  }
4525

4526
  // Mainly for debugging purposes to make sure that a pointer to the
4527
  // closure which was statically allocated in this frame doesn't
4528
  // escape it by accident.
4529
  set_cm_oop_closure(NULL);
4530
  double end_time_ms = os::elapsedVTime() * 1000.0;
4531
  double elapsed_time_ms = end_time_ms - _start_time_ms;
4532
  // Update the step history.
4533
  _step_times_ms.add(elapsed_time_ms);
4534

4535
  if (has_aborted()) {
4536
    // The task was aborted for some reason.
4537

4538
    statsOnly( ++_aborted );
4539

4540
    if (_has_timed_out) {
4541
      double diff_ms = elapsed_time_ms - _time_target_ms;
4542
      // Keep statistics of how well we did with respect to hitting
4543
      // our target only if we actually timed out (if we aborted for
4544
      // other reasons, then the results might get skewed).
4545
      _marking_step_diffs_ms.add(diff_ms);
4546
    }
4547

4548
    if (_cm->has_overflown()) {
4549
      // This is the interesting one. We aborted because a global
4550
      // overflow was raised. This means we have to restart the
4551
      // marking phase and start iterating over regions. However, in
4552
      // order to do this we have to make sure that all tasks stop
4553
      // what they are doing and re-initialise in a safe manner. We
4554
      // will achieve this with the use of two barrier sync points.
4555

4556
      if (_cm->verbose_low()) {
4557
        gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4558
      }
4559

4560
      if (!is_serial) {
4561
        // We only need to enter the sync barrier if being called
4562
        // from a parallel context
4563
        _cm->enter_first_sync_barrier(_worker_id);
4564

4565
        // When we exit this sync barrier we know that all tasks have
4566
        // stopped doing marking work. So, it's now safe to
4567
        // re-initialise our data structures. At the end of this method,
4568
        // task 0 will clear the global data structures.
4569
      }
4570

4571
      statsOnly( ++_aborted_overflow );
4572

4573
      // We clear the local state of this task...
4574
      clear_region_fields();
4575

4576
      if (!is_serial) {
4577
        // ...and enter the second barrier.
4578
        _cm->enter_second_sync_barrier(_worker_id);
4579
      }
4580
      // At this point, if we're during the concurrent phase of
4581
      // marking, everything has been re-initialized and we're
4582
      // ready to restart.
4583
    }
4584

4585
    if (_cm->verbose_low()) {
4586
      gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4587
                             "elapsed = %1.2lfms <<<<<<<<<<",
4588
                             _worker_id, _time_target_ms, elapsed_time_ms);
4589
      if (_cm->has_aborted()) {
4590
        gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4591
                               _worker_id);
4592
      }
4593
    }
4594
  } else {
4595
    if (_cm->verbose_low()) {
4596
      gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4597
                             "elapsed = %1.2lfms <<<<<<<<<<",
4598
                             _worker_id, _time_target_ms, elapsed_time_ms);
4599
    }
4600
  }
4601

4602
  _claimed = false;
4603
}
4604

4605
CMTask::CMTask(uint worker_id,
4606
               ConcurrentMark* cm,
4607
               size_t* marked_bytes,
4608
               BitMap* card_bm,
4609
               CMTaskQueue* task_queue,
4610
               CMTaskQueueSet* task_queues)
4611
  : _g1h(G1CollectedHeap::heap()),
4612
    _worker_id(worker_id), _cm(cm),
4613
    _objArray_processor(this),
4614
    _claimed(false),
4615
    _nextMarkBitMap(NULL), _hash_seed(17),
4616
    _task_queue(task_queue),
4617
    _task_queues(task_queues),
4618
    _cm_oop_closure(NULL),
4619
    _marked_bytes_array(marked_bytes),
4620
    _card_bm(card_bm) {
4621
  guarantee(task_queue != NULL, "invariant");
4622
  guarantee(task_queues != NULL, "invariant");
4623

4624
  statsOnly( _clock_due_to_scanning = 0;
4625
             _clock_due_to_marking  = 0 );
4626

4627
  _marking_step_diffs_ms.add(0.5);
4628
}
4629

4630
// These are formatting macros that are used below to ensure
4631
// consistent formatting. The *_H_* versions are used to format the
4632
// header for a particular value and they should be kept consistent
4633
// with the corresponding macro. Also note that most of the macros add
4634
// the necessary white space (as a prefix) which makes them a bit
4635
// easier to compose.
4636

4637
// All the output lines are prefixed with this string to be able to
4638
// identify them easily in a large log file.
4639
#define G1PPRL_LINE_PREFIX            "###"
4640

4641
#define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
4642
#ifdef _LP64
4643
#define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4644
#else // _LP64
4645
#define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4646
#endif // _LP64
4647

4648
// For per-region info
4649
#define G1PPRL_TYPE_FORMAT            "   %-4s"
4650
#define G1PPRL_TYPE_H_FORMAT          "   %4s"
4651
#define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
4652
#define G1PPRL_BYTE_H_FORMAT          "  %9s"
4653
#define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4654
#define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4655

4656
// For summary info
4657
#define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
4658
#define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
4659
#define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
4660
#define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
4661

4662
G1PrintRegionLivenessInfoClosure::
4663
G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4664
  : _out(out),
4665
    _total_used_bytes(0), _total_capacity_bytes(0),
4666
    _total_prev_live_bytes(0), _total_next_live_bytes(0),
4667
    _hum_used_bytes(0), _hum_capacity_bytes(0),
4668
    _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4669
    _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4670
  G1CollectedHeap* g1h = G1CollectedHeap::heap();
4671
  MemRegion g1_reserved = g1h->g1_reserved();
4672
  double now = os::elapsedTime();
4673

4674
  // Print the header of the output.
4675
  _out->cr();
4676
  _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4677
  _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4678
                 G1PPRL_SUM_ADDR_FORMAT("reserved")
4679
                 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4680
                 p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4681
                 HeapRegion::GrainBytes);
4682
  _out->print_cr(G1PPRL_LINE_PREFIX);
4683
  _out->print_cr(G1PPRL_LINE_PREFIX
4684
                G1PPRL_TYPE_H_FORMAT
4685
                G1PPRL_ADDR_BASE_H_FORMAT
4686
                G1PPRL_BYTE_H_FORMAT
4687
                G1PPRL_BYTE_H_FORMAT
4688
                G1PPRL_BYTE_H_FORMAT
4689
                G1PPRL_DOUBLE_H_FORMAT
4690
                G1PPRL_BYTE_H_FORMAT
4691
                G1PPRL_BYTE_H_FORMAT,
4692
                "type", "address-range",
4693
                "used", "prev-live", "next-live", "gc-eff",
4694
                "remset", "code-roots");
4695
  _out->print_cr(G1PPRL_LINE_PREFIX
4696
                G1PPRL_TYPE_H_FORMAT
4697
                G1PPRL_ADDR_BASE_H_FORMAT
4698
                G1PPRL_BYTE_H_FORMAT
4699
                G1PPRL_BYTE_H_FORMAT
4700
                G1PPRL_BYTE_H_FORMAT
4701
                G1PPRL_DOUBLE_H_FORMAT
4702
                G1PPRL_BYTE_H_FORMAT
4703
                G1PPRL_BYTE_H_FORMAT,
4704
                "", "",
4705
                "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4706
                "(bytes)", "(bytes)");
4707
}
4708

4709
// It takes as a parameter a reference to one of the _hum_* fields, it
4710
// deduces the corresponding value for a region in a humongous region
4711
// series (either the region size, or what's left if the _hum_* field
4712
// is < the region size), and updates the _hum_* field accordingly.
4713
size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4714
  size_t bytes = 0;
4715
  // The > 0 check is to deal with the prev and next live bytes which
4716
  // could be 0.
4717
  if (*hum_bytes > 0) {
4718
    bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4719
    *hum_bytes -= bytes;
4720
  }
4721
  return bytes;
4722
}
4723

4724
// It deduces the values for a region in a humongous region series
4725
// from the _hum_* fields and updates those accordingly. It assumes
4726
// that that _hum_* fields have already been set up from the "starts
4727
// humongous" region and we visit the regions in address order.
4728
void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4729
                                                     size_t* capacity_bytes,
4730
                                                     size_t* prev_live_bytes,
4731
                                                     size_t* next_live_bytes) {
4732
  assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4733
  *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4734
  *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4735
  *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4736
  *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4737
}
4738

4739
bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4740
  const char* type       = r->get_type_str();
4741
  HeapWord* bottom       = r->bottom();
4742
  HeapWord* end          = r->end();
4743
  size_t capacity_bytes  = r->capacity();
4744
  size_t used_bytes      = r->used();
4745
  size_t prev_live_bytes = r->live_bytes();
4746
  size_t next_live_bytes = r->next_live_bytes();
4747
  double gc_eff          = r->gc_efficiency();
4748
  size_t remset_bytes    = r->rem_set()->mem_size();
4749
  size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4750

4751
  if (r->startsHumongous()) {
4752
    assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4753
           _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4754
           "they should have been zeroed after the last time we used them");
4755
    // Set up the _hum_* fields.
4756
    _hum_capacity_bytes  = capacity_bytes;
4757
    _hum_used_bytes      = used_bytes;
4758
    _hum_prev_live_bytes = prev_live_bytes;
4759
    _hum_next_live_bytes = next_live_bytes;
4760
    get_hum_bytes(&used_bytes, &capacity_bytes,
4761
                  &prev_live_bytes, &next_live_bytes);
4762
    end = bottom + HeapRegion::GrainWords;
4763
  } else if (r->continuesHumongous()) {
4764
    get_hum_bytes(&used_bytes, &capacity_bytes,
4765
                  &prev_live_bytes, &next_live_bytes);
4766
    assert(end == bottom + HeapRegion::GrainWords, "invariant");
4767
  }
4768

4769
  _total_used_bytes      += used_bytes;
4770
  _total_capacity_bytes  += capacity_bytes;
4771
  _total_prev_live_bytes += prev_live_bytes;
4772
  _total_next_live_bytes += next_live_bytes;
4773
  _total_remset_bytes    += remset_bytes;
4774
  _total_strong_code_roots_bytes += strong_code_roots_bytes;
4775

4776
  // Print a line for this particular region.
4777
  _out->print_cr(G1PPRL_LINE_PREFIX
4778
                 G1PPRL_TYPE_FORMAT
4779
                 G1PPRL_ADDR_BASE_FORMAT
4780
                 G1PPRL_BYTE_FORMAT
4781
                 G1PPRL_BYTE_FORMAT
4782
                 G1PPRL_BYTE_FORMAT
4783
                 G1PPRL_DOUBLE_FORMAT
4784
                 G1PPRL_BYTE_FORMAT
4785
                 G1PPRL_BYTE_FORMAT,
4786
                 type, p2i(bottom), p2i(end),
4787
                 used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4788
                 remset_bytes, strong_code_roots_bytes);
4789

4790
  return false;
4791
}
4792

4793
G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4794
  // add static memory usages to remembered set sizes
4795
  _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4796
  // Print the footer of the output.
4797
  _out->print_cr(G1PPRL_LINE_PREFIX);
4798
  _out->print_cr(G1PPRL_LINE_PREFIX
4799
                 " SUMMARY"
4800
                 G1PPRL_SUM_MB_FORMAT("capacity")
4801
                 G1PPRL_SUM_MB_PERC_FORMAT("used")
4802
                 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4803
                 G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4804
                 G1PPRL_SUM_MB_FORMAT("remset")
4805
                 G1PPRL_SUM_MB_FORMAT("code-roots"),
4806
                 bytes_to_mb(_total_capacity_bytes),
4807
                 bytes_to_mb(_total_used_bytes),
4808
                 perc(_total_used_bytes, _total_capacity_bytes),
4809
                 bytes_to_mb(_total_prev_live_bytes),
4810
                 perc(_total_prev_live_bytes, _total_capacity_bytes),
4811
                 bytes_to_mb(_total_next_live_bytes),
4812
                 perc(_total_next_live_bytes, _total_capacity_bytes),
4813
                 bytes_to_mb(_total_remset_bytes),
4814
                 bytes_to_mb(_total_strong_code_roots_bytes));
4815
  _out->cr();
4816
}
4817

4818
Product

Resources

Company