Path: blob/master/src/hotspot/share/gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.cpp
40975 views
/*1* Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include "precompiled.hpp"2526#include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp"27#include "gc/shenandoah/shenandoahCollectionSet.hpp"28#include "gc/shenandoah/shenandoahFreeSet.hpp"29#include "gc/shenandoah/shenandoahHeap.inline.hpp"30#include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"31#include "logging/log.hpp"32#include "logging/logTag.hpp"33#include "utilities/quickSort.hpp"3435// These constants are used to adjust the margin of error for the moving36// average of the allocation rate and cycle time. The units are standard37// deviations.38const double ShenandoahAdaptiveHeuristics::FULL_PENALTY_SD = 0.2;39const double ShenandoahAdaptiveHeuristics::DEGENERATE_PENALTY_SD = 0.1;4041// These are used to decide if we want to make any adjustments at all42// at the end of a successful concurrent cycle.43const double ShenandoahAdaptiveHeuristics::LOWEST_EXPECTED_AVAILABLE_AT_END = -0.5;44const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0.5;4546// These values are the confidence interval expressed as standard deviations.47// At the minimum confidence level, there is a 25% chance that the true value of48// the estimate (average cycle time or allocation rate) is not more than49// MINIMUM_CONFIDENCE standard deviations away from our estimate. Similarly, the50// MAXIMUM_CONFIDENCE interval here means there is a one in a thousand chance51// that the true value of our estimate is outside the interval. These are used52// as bounds on the adjustments applied at the outcome of a GC cycle.53const double ShenandoahAdaptiveHeuristics::MINIMUM_CONFIDENCE = 0.319; // 25%54const double ShenandoahAdaptiveHeuristics::MAXIMUM_CONFIDENCE = 3.291; // 99.9%5556ShenandoahAdaptiveHeuristics::ShenandoahAdaptiveHeuristics() :57ShenandoahHeuristics(),58_margin_of_error_sd(ShenandoahAdaptiveInitialConfidence),59_spike_threshold_sd(ShenandoahAdaptiveInitialSpikeThreshold),60_last_trigger(OTHER) { }6162ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() {}6364void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset,65RegionData* data, size_t size,66size_t actual_free) {67size_t garbage_threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahGarbageThreshold / 100;6869// The logic for cset selection in adaptive is as follows:70//71// 1. We cannot get cset larger than available free space. Otherwise we guarantee OOME72// during evacuation, and thus guarantee full GC. In practice, we also want to let73// application to allocate something. This is why we limit CSet to some fraction of74// available space. In non-overloaded heap, max_cset would contain all plausible candidates75// over garbage threshold.76//77// 2. We should not get cset too low so that free threshold would not be met right78// after the cycle. Otherwise we get back-to-back cycles for no reason if heap is79// too fragmented. In non-overloaded non-fragmented heap min_garbage would be around zero.80//81// Therefore, we start by sorting the regions by garbage. Then we unconditionally add the best candidates82// before we meet min_garbage. Then we add all candidates that fit with a garbage threshold before83// we hit max_cset. When max_cset is hit, we terminate the cset selection. Note that in this scheme,84// ShenandoahGarbageThreshold is the soft threshold which would be ignored until min_garbage is hit.8586size_t capacity = ShenandoahHeap::heap()->soft_max_capacity();87size_t max_cset = (size_t)((1.0 * capacity / 100 * ShenandoahEvacReserve) / ShenandoahEvacWaste);88size_t free_target = (capacity / 100 * ShenandoahMinFreeThreshold) + max_cset;89size_t min_garbage = (free_target > actual_free ? (free_target - actual_free) : 0);9091log_info(gc, ergo)("Adaptive CSet Selection. Target Free: " SIZE_FORMAT "%s, Actual Free: "92SIZE_FORMAT "%s, Max CSet: " SIZE_FORMAT "%s, Min Garbage: " SIZE_FORMAT "%s",93byte_size_in_proper_unit(free_target), proper_unit_for_byte_size(free_target),94byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free),95byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset),96byte_size_in_proper_unit(min_garbage), proper_unit_for_byte_size(min_garbage));9798// Better select garbage-first regions99QuickSort::sort<RegionData>(data, (int)size, compare_by_garbage, false);100101size_t cur_cset = 0;102size_t cur_garbage = 0;103104for (size_t idx = 0; idx < size; idx++) {105ShenandoahHeapRegion* r = data[idx]._region;106107size_t new_cset = cur_cset + r->get_live_data_bytes();108size_t new_garbage = cur_garbage + r->garbage();109110if (new_cset > max_cset) {111break;112}113114if ((new_garbage < min_garbage) || (r->garbage() > garbage_threshold)) {115cset->add_region(r);116cur_cset = new_cset;117cur_garbage = new_garbage;118}119}120}121122void ShenandoahAdaptiveHeuristics::record_cycle_start() {123ShenandoahHeuristics::record_cycle_start();124_allocation_rate.allocation_counter_reset();125}126127void ShenandoahAdaptiveHeuristics::record_success_concurrent() {128ShenandoahHeuristics::record_success_concurrent();129130size_t available = ShenandoahHeap::heap()->free_set()->available();131132_available.add(available);133double z_score = 0.0;134if (_available.sd() > 0) {135z_score = (available - _available.avg()) / _available.sd();136}137138log_debug(gc, ergo)("Available: " SIZE_FORMAT " %sB, z-score=%.3f. Average available: %.1f %sB +/- %.1f %sB.",139byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),140z_score,141byte_size_in_proper_unit(_available.avg()), proper_unit_for_byte_size(_available.avg()),142byte_size_in_proper_unit(_available.sd()), proper_unit_for_byte_size(_available.sd()));143144// In the case when a concurrent GC cycle completes successfully but with an145// unusually small amount of available memory we will adjust our trigger146// parameters so that they are more likely to initiate a new cycle.147// Conversely, when a GC cycle results in an above average amount of available148// memory, we will adjust the trigger parameters to be less likely to initiate149// a GC cycle.150//151// The z-score we've computed is in no way statistically related to the152// trigger parameters, but it has the nice property that worse z-scores for153// available memory indicate making larger adjustments to the trigger154// parameters. It also results in fewer adjustments as the application155// stabilizes.156//157// In order to avoid making endless and likely unnecessary adjustments to the158// trigger parameters, the change in available memory (with respect to the159// average) at the end of a cycle must be beyond these threshold values.160if (z_score < LOWEST_EXPECTED_AVAILABLE_AT_END ||161z_score > HIGHEST_EXPECTED_AVAILABLE_AT_END) {162// The sign is flipped because a negative z-score indicates that the163// available memory at the end of the cycle is below average. Positive164// adjustments make the triggers more sensitive (i.e., more likely to fire).165// The z-score also gives us a measure of just how far below normal. This166// property allows us to adjust the trigger parameters proportionally.167//168// The `100` here is used to attenuate the size of our adjustments. This169// number was chosen empirically. It also means the adjustments at the end of170// a concurrent cycle are an order of magnitude smaller than the adjustments171// made for a degenerated or full GC cycle (which themselves were also172// chosen empirically).173adjust_last_trigger_parameters(z_score / -100);174}175}176177void ShenandoahAdaptiveHeuristics::record_success_degenerated() {178ShenandoahHeuristics::record_success_degenerated();179// Adjust both trigger's parameters in the case of a degenerated GC because180// either of them should have triggered earlier to avoid this case.181adjust_margin_of_error(DEGENERATE_PENALTY_SD);182adjust_spike_threshold(DEGENERATE_PENALTY_SD);183}184185void ShenandoahAdaptiveHeuristics::record_success_full() {186ShenandoahHeuristics::record_success_full();187// Adjust both trigger's parameters in the case of a full GC because188// either of them should have triggered earlier to avoid this case.189adjust_margin_of_error(FULL_PENALTY_SD);190adjust_spike_threshold(FULL_PENALTY_SD);191}192193static double saturate(double value, double min, double max) {194return MAX2(MIN2(value, max), min);195}196197bool ShenandoahAdaptiveHeuristics::should_start_gc() {198ShenandoahHeap* heap = ShenandoahHeap::heap();199size_t max_capacity = heap->max_capacity();200size_t capacity = heap->soft_max_capacity();201size_t available = heap->free_set()->available();202size_t allocated = heap->bytes_allocated_since_gc_start();203204// Make sure the code below treats available without the soft tail.205size_t soft_tail = max_capacity - capacity;206available = (available > soft_tail) ? (available - soft_tail) : 0;207208// Track allocation rate even if we decide to start a cycle for other reasons.209double rate = _allocation_rate.sample(allocated);210_last_trigger = OTHER;211212size_t min_threshold = capacity / 100 * ShenandoahMinFreeThreshold;213if (available < min_threshold) {214log_info(gc)("Trigger: Free (" SIZE_FORMAT "%s) is below minimum threshold (" SIZE_FORMAT "%s)",215byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),216byte_size_in_proper_unit(min_threshold), proper_unit_for_byte_size(min_threshold));217return true;218}219220const size_t max_learn = ShenandoahLearningSteps;221if (_gc_times_learned < max_learn) {222size_t init_threshold = capacity / 100 * ShenandoahInitFreeThreshold;223if (available < init_threshold) {224log_info(gc)("Trigger: Learning " SIZE_FORMAT " of " SIZE_FORMAT ". Free (" SIZE_FORMAT "%s) is below initial threshold (" SIZE_FORMAT "%s)",225_gc_times_learned + 1, max_learn,226byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),227byte_size_in_proper_unit(init_threshold), proper_unit_for_byte_size(init_threshold));228return true;229}230}231232// Check if allocation headroom is still okay. This also factors in:233// 1. Some space to absorb allocation spikes234// 2. Accumulated penalties from Degenerated and Full GC235size_t allocation_headroom = available;236237size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor;238size_t penalties = capacity / 100 * _gc_time_penalties;239240allocation_headroom -= MIN2(allocation_headroom, spike_headroom);241allocation_headroom -= MIN2(allocation_headroom, penalties);242243double avg_cycle_time = _gc_time_history->davg() + (_margin_of_error_sd * _gc_time_history->dsd());244double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd);245if (avg_cycle_time > allocation_headroom / avg_alloc_rate) {246log_info(gc)("Trigger: Average GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s) to deplete free headroom (" SIZE_FORMAT "%s) (margin of error = %.2f)",247avg_cycle_time * 1000,248byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate),249byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),250_margin_of_error_sd);251252log_info(gc, ergo)("Free headroom: " SIZE_FORMAT "%s (free) - " SIZE_FORMAT "%s (spike) - " SIZE_FORMAT "%s (penalties) = " SIZE_FORMAT "%s",253byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),254byte_size_in_proper_unit(spike_headroom), proper_unit_for_byte_size(spike_headroom),255byte_size_in_proper_unit(penalties), proper_unit_for_byte_size(penalties),256byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom));257258_last_trigger = RATE;259return true;260}261262bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd);263if (is_spiking && avg_cycle_time > allocation_headroom / rate) {264log_info(gc)("Trigger: Average GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s) to deplete free headroom (" SIZE_FORMAT "%s) (spike threshold = %.2f)",265avg_cycle_time * 1000,266byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate),267byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),268_spike_threshold_sd);269_last_trigger = SPIKE;270return true;271}272273return ShenandoahHeuristics::should_start_gc();274}275276void ShenandoahAdaptiveHeuristics::adjust_last_trigger_parameters(double amount) {277switch (_last_trigger) {278case RATE:279adjust_margin_of_error(amount);280break;281case SPIKE:282adjust_spike_threshold(amount);283break;284case OTHER:285// nothing to adjust here.286break;287default:288ShouldNotReachHere();289}290}291292void ShenandoahAdaptiveHeuristics::adjust_margin_of_error(double amount) {293_margin_of_error_sd = saturate(_margin_of_error_sd + amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE);294log_debug(gc, ergo)("Margin of error now %.2f", _margin_of_error_sd);295}296297void ShenandoahAdaptiveHeuristics::adjust_spike_threshold(double amount) {298_spike_threshold_sd = saturate(_spike_threshold_sd - amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE);299log_debug(gc, ergo)("Spike threshold now: %.2f", _spike_threshold_sd);300}301302ShenandoahAllocationRate::ShenandoahAllocationRate() :303_last_sample_time(os::elapsedTime()),304_last_sample_value(0),305_interval_sec(1.0 / ShenandoahAdaptiveSampleFrequencyHz),306_rate(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor),307_rate_avg(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor) {308}309310double ShenandoahAllocationRate::sample(size_t allocated) {311double now = os::elapsedTime();312double rate = 0.0;313if (now - _last_sample_time > _interval_sec) {314if (allocated >= _last_sample_value) {315rate = instantaneous_rate(now, allocated);316_rate.add(rate);317_rate_avg.add(_rate.avg());318}319320_last_sample_time = now;321_last_sample_value = allocated;322}323return rate;324}325326double ShenandoahAllocationRate::upper_bound(double sds) const {327// Here we are using the standard deviation of the computed running328// average, rather than the standard deviation of the samples that went329// into the moving average. This is a much more stable value and is tied330// to the actual statistic in use (moving average over samples of averages).331return _rate.davg() + (sds * _rate_avg.dsd());332}333334void ShenandoahAllocationRate::allocation_counter_reset() {335_last_sample_time = os::elapsedTime();336_last_sample_value = 0;337}338339bool ShenandoahAllocationRate::is_spiking(double rate, double threshold) const {340if (rate <= 0.0) {341return false;342}343344double sd = _rate.sd();345if (sd > 0) {346// There is a small chance that that rate has already been sampled, but it347// seems not to matter in practice.348double z_score = (rate - _rate.avg()) / sd;349if (z_score > threshold) {350return true;351}352}353return false;354}355356double ShenandoahAllocationRate::instantaneous_rate(size_t allocated) const {357return instantaneous_rate(os::elapsedTime(), allocated);358}359360double ShenandoahAllocationRate::instantaneous_rate(double time, size_t allocated) const {361size_t last_value = _last_sample_value;362double last_time = _last_sample_time;363size_t allocation_delta = (allocated > last_value) ? (allocated - last_value) : 0;364double time_delta_sec = time - last_time;365return (time_delta_sec > 0) ? (allocation_delta / time_delta_sec) : 0;366}367368369