Path: blob/master/src/hotspot/share/jfr/periodic/sampling/jfrThreadSampler.cpp
66645 views
/*1* Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include "precompiled.hpp"25#include "jfr/jfrEvents.hpp"26#include "jfr/recorder/jfrRecorder.hpp"27#include "jfr/periodic/sampling/jfrCallTrace.hpp"28#include "jfr/periodic/sampling/jfrThreadSampler.hpp"29#include "jfr/recorder/service/jfrOptionSet.hpp"30#include "jfr/recorder/stacktrace/jfrStackTraceRepository.hpp"31#include "jfr/support/jfrThreadId.hpp"32#include "jfr/support/jfrThreadLocal.hpp"33#include "jfr/utilities/jfrTime.hpp"34#include "jfrfiles/jfrEventClasses.hpp"35#include "logging/log.hpp"36#include "runtime/frame.inline.hpp"37#include "runtime/os.hpp"38#include "runtime/semaphore.hpp"39#include "runtime/thread.inline.hpp"40#include "runtime/threadSMR.hpp"4142enum JfrSampleType {43NO_SAMPLE = 0,44JAVA_SAMPLE = 1,45NATIVE_SAMPLE = 246};4748static bool thread_state_in_java(JavaThread* thread) {49assert(thread != NULL, "invariant");50switch(thread->thread_state()) {51case _thread_new:52case _thread_uninitialized:53case _thread_new_trans:54case _thread_in_vm_trans:55case _thread_blocked_trans:56case _thread_in_native_trans:57case _thread_blocked:58case _thread_in_vm:59case _thread_in_native:60case _thread_in_Java_trans:61break;62case _thread_in_Java:63return true;64default:65ShouldNotReachHere();66break;67}68return false;69}7071static bool thread_state_in_native(JavaThread* thread) {72assert(thread != NULL, "invariant");73switch(thread->thread_state()) {74case _thread_new:75case _thread_uninitialized:76case _thread_new_trans:77case _thread_blocked_trans:78case _thread_blocked:79case _thread_in_vm:80case _thread_in_vm_trans:81case _thread_in_Java_trans:82case _thread_in_Java:83case _thread_in_native_trans:84break;85case _thread_in_native:86return true;87default:88ShouldNotReachHere();89break;90}91return false;92}9394class JfrThreadSampleClosure {95public:96JfrThreadSampleClosure(EventExecutionSample* events, EventNativeMethodSample* events_native);97~JfrThreadSampleClosure() {}98EventExecutionSample* next_event() { return &_events[_added_java++]; }99EventNativeMethodSample* next_event_native() { return &_events_native[_added_native++]; }100void commit_events(JfrSampleType type);101bool do_sample_thread(JavaThread* thread, JfrStackFrame* frames, u4 max_frames, JfrSampleType type);102uint java_entries() { return _added_java; }103uint native_entries() { return _added_native; }104105private:106bool sample_thread_in_java(JavaThread* thread, JfrStackFrame* frames, u4 max_frames);107bool sample_thread_in_native(JavaThread* thread, JfrStackFrame* frames, u4 max_frames);108EventExecutionSample* _events;109EventNativeMethodSample* _events_native;110Thread* _self;111uint _added_java;112uint _added_native;113};114115class OSThreadSampler : public os::SuspendedThreadTask {116public:117OSThreadSampler(JavaThread* thread,118JfrThreadSampleClosure& closure,119JfrStackFrame *frames,120u4 max_frames) : os::SuspendedThreadTask((Thread*)thread),121_success(false),122_thread_oop(thread->threadObj()),123_stacktrace(frames, max_frames),124_closure(closure),125_suspend_time() {}126127void take_sample();128void do_task(const os::SuspendedThreadTaskContext& context);129void protected_task(const os::SuspendedThreadTaskContext& context);130bool success() const { return _success; }131const JfrStackTrace& stacktrace() const { return _stacktrace; }132133private:134bool _success;135oop _thread_oop;136JfrStackTrace _stacktrace;137JfrThreadSampleClosure& _closure;138JfrTicks _suspend_time;139};140141class OSThreadSamplerCallback : public os::CrashProtectionCallback {142public:143OSThreadSamplerCallback(OSThreadSampler& sampler, const os::SuspendedThreadTaskContext &context) :144_sampler(sampler), _context(context) {145}146virtual void call() {147_sampler.protected_task(_context);148}149private:150OSThreadSampler& _sampler;151const os::SuspendedThreadTaskContext& _context;152};153154void OSThreadSampler::do_task(const os::SuspendedThreadTaskContext& context) {155#ifndef ASSERT156guarantee(JfrOptionSet::sample_protection(), "Sample Protection should be on in product builds");157#endif158assert(_suspend_time.value() == 0, "already timestamped!");159_suspend_time = JfrTicks::now();160161if (JfrOptionSet::sample_protection()) {162OSThreadSamplerCallback cb(*this, context);163os::ThreadCrashProtection crash_protection;164if (!crash_protection.call(cb)) {165log_error(jfr)("Thread method sampler crashed");166}167} else {168protected_task(context);169}170}171172/*173* From this method and down the call tree we attempt to protect against crashes174* using a signal handler / __try block. Don't take locks, rely on destructors or175* leave memory (in case of signal / exception) in an inconsistent state. */176void OSThreadSampler::protected_task(const os::SuspendedThreadTaskContext& context) {177JavaThread* jth = context.thread()->as_Java_thread();178// Skip sample if we signaled a thread that moved to other state179if (!thread_state_in_java(jth)) {180return;181}182JfrGetCallTrace trace(true, jth);183frame topframe;184if (trace.get_topframe(context.ucontext(), topframe)) {185if (_stacktrace.record_thread(*jth, topframe)) {186/* If we managed to get a topframe and a stacktrace, create an event187* and put it into our array. We can't call Jfr::_stacktraces.add()188* here since it would allocate memory using malloc. Doing so while189* the stopped thread is inside malloc would deadlock. */190_success = true;191EventExecutionSample *ev = _closure.next_event();192ev->set_starttime(_suspend_time);193ev->set_endtime(_suspend_time); // fake to not take an end time194ev->set_sampledThread(JFR_THREAD_ID(jth));195ev->set_state(static_cast<u8>(java_lang_Thread::get_thread_status(_thread_oop)));196}197}198}199200void OSThreadSampler::take_sample() {201run();202}203204class JfrNativeSamplerCallback : public os::CrashProtectionCallback {205public:206JfrNativeSamplerCallback(JfrThreadSampleClosure& closure, JavaThread* jt, JfrStackFrame* frames, u4 max_frames) :207_closure(closure), _jt(jt), _thread_oop(jt->threadObj()), _stacktrace(frames, max_frames), _success(false) {208}209virtual void call();210bool success() { return _success; }211JfrStackTrace& stacktrace() { return _stacktrace; }212213private:214JfrThreadSampleClosure& _closure;215JavaThread* _jt;216oop _thread_oop;217JfrStackTrace _stacktrace;218bool _success;219};220221static void write_native_event(JfrThreadSampleClosure& closure, JavaThread* jt, oop thread_oop) {222EventNativeMethodSample *ev = closure.next_event_native();223ev->set_starttime(JfrTicks::now());224ev->set_sampledThread(JFR_THREAD_ID(jt));225ev->set_state(static_cast<u8>(java_lang_Thread::get_thread_status(thread_oop)));226}227228void JfrNativeSamplerCallback::call() {229// When a thread is only attach it will be native without a last java frame230if (!_jt->has_last_Java_frame()) {231return;232}233234frame topframe = _jt->last_frame();235frame first_java_frame;236Method* method = NULL;237JfrGetCallTrace gct(false, _jt);238if (!gct.find_top_frame(topframe, &method, first_java_frame)) {239return;240}241if (method == NULL) {242return;243}244topframe = first_java_frame;245_success = _stacktrace.record_thread(*_jt, topframe);246if (_success) {247write_native_event(_closure, _jt, _thread_oop);248}249}250251bool JfrThreadSampleClosure::sample_thread_in_java(JavaThread* thread, JfrStackFrame* frames, u4 max_frames) {252OSThreadSampler sampler(thread, *this, frames, max_frames);253sampler.take_sample();254/* We don't want to allocate any memory using malloc/etc while the thread255* is stopped, so everything is stored in stack allocated memory until this256* point where the thread has been resumed again, if the sampling was a success257* we need to store the stacktrace in the stacktrace repository and update258* the event with the id that was returned. */259if (!sampler.success()) {260return false;261}262EventExecutionSample *event = &_events[_added_java - 1];263traceid id = JfrStackTraceRepository::add(sampler.stacktrace());264assert(id != 0, "Stacktrace id should not be 0");265event->set_stackTrace(id);266return true;267}268269bool JfrThreadSampleClosure::sample_thread_in_native(JavaThread* thread, JfrStackFrame* frames, u4 max_frames) {270JfrNativeSamplerCallback cb(*this, thread, frames, max_frames);271if (JfrOptionSet::sample_protection()) {272os::ThreadCrashProtection crash_protection;273if (!crash_protection.call(cb)) {274log_error(jfr)("Thread method sampler crashed for native");275}276} else {277cb.call();278}279if (!cb.success()) {280return false;281}282EventNativeMethodSample *event = &_events_native[_added_native - 1];283traceid id = JfrStackTraceRepository::add(cb.stacktrace());284assert(id != 0, "Stacktrace id should not be 0");285event->set_stackTrace(id);286return true;287}288289static const uint MAX_NR_OF_JAVA_SAMPLES = 5;290static const uint MAX_NR_OF_NATIVE_SAMPLES = 1;291292void JfrThreadSampleClosure::commit_events(JfrSampleType type) {293if (JAVA_SAMPLE == type) {294assert(_added_java > 0 && _added_java <= MAX_NR_OF_JAVA_SAMPLES, "invariant");295for (uint i = 0; i < _added_java; ++i) {296_events[i].commit();297}298} else {299assert(NATIVE_SAMPLE == type, "invariant");300assert(_added_native > 0 && _added_native <= MAX_NR_OF_NATIVE_SAMPLES, "invariant");301for (uint i = 0; i < _added_native; ++i) {302_events_native[i].commit();303}304}305}306307JfrThreadSampleClosure::JfrThreadSampleClosure(EventExecutionSample* events, EventNativeMethodSample* events_native) :308_events(events),309_events_native(events_native),310_self(Thread::current()),311_added_java(0),312_added_native(0) {313}314315class JfrThreadSampler : public NonJavaThread {316friend class JfrThreadSampling;317private:318Semaphore _sample;319Thread* _sampler_thread;320JfrStackFrame* const _frames;321JavaThread* _last_thread_java;322JavaThread* _last_thread_native;323size_t _interval_java;324size_t _interval_native;325int _cur_index;326const u4 _max_frames;327volatile bool _disenrolled;328329JavaThread* next_thread(ThreadsList* t_list, JavaThread* first_sampled, JavaThread* current);330void task_stacktrace(JfrSampleType type, JavaThread** last_thread);331JfrThreadSampler(size_t interval_java, size_t interval_native, u4 max_frames);332~JfrThreadSampler();333334void start_thread();335336void enroll();337void disenroll();338void set_java_interval(size_t interval) { _interval_java = interval; };339void set_native_interval(size_t interval) { _interval_native = interval; };340size_t get_java_interval() { return _interval_java; };341size_t get_native_interval() { return _interval_native; };342protected:343virtual void post_run();344public:345virtual char* name() const { return (char*)"JFR Thread Sampler"; }346bool is_JfrSampler_thread() const { return true; }347void run();348static Monitor* transition_block() { return JfrThreadSampler_lock; }349static void on_javathread_suspend(JavaThread* thread);350};351352static void clear_transition_block(JavaThread* jt) {353assert(Threads_lock->owned_by_self(), "Holding the thread table lock.");354jt->clear_trace_flag();355JfrThreadLocal* const tl = jt->jfr_thread_local();356MutexLocker ml(JfrThreadSampler::transition_block(), Mutex::_no_safepoint_check_flag);357if (tl->is_trace_block()) {358JfrThreadSampler::transition_block()->notify();359}360}361362static bool is_excluded(JavaThread* thread) {363assert(thread != NULL, "invariant");364return thread->is_hidden_from_external_view() || thread->in_deopt_handler() || thread->jfr_thread_local()->is_excluded();365}366367bool JfrThreadSampleClosure::do_sample_thread(JavaThread* thread, JfrStackFrame* frames, u4 max_frames, JfrSampleType type) {368assert(Threads_lock->owned_by_self(), "Holding the thread table lock.");369if (is_excluded(thread)) {370return false;371}372373bool ret = false;374thread->set_trace_flag(); // Provides StoreLoad, needed to keep read of thread state from floating up.375if (JAVA_SAMPLE == type) {376if (thread_state_in_java(thread)) {377ret = sample_thread_in_java(thread, frames, max_frames);378}379} else {380assert(NATIVE_SAMPLE == type, "invariant");381if (thread_state_in_native(thread)) {382ret = sample_thread_in_native(thread, frames, max_frames);383}384}385clear_transition_block(thread);386return ret;387}388389JfrThreadSampler::JfrThreadSampler(size_t interval_java, size_t interval_native, u4 max_frames) :390_sample(),391_sampler_thread(NULL),392_frames(JfrCHeapObj::new_array<JfrStackFrame>(max_frames)),393_last_thread_java(NULL),394_last_thread_native(NULL),395_interval_java(interval_java),396_interval_native(interval_native),397_cur_index(-1),398_max_frames(max_frames),399_disenrolled(true) {400}401402JfrThreadSampler::~JfrThreadSampler() {403JfrCHeapObj::free(_frames, sizeof(JfrStackFrame) * _max_frames);404}405406static inline bool is_released(JavaThread* jt) {407return !jt->is_trace_suspend();408}409410void JfrThreadSampler::on_javathread_suspend(JavaThread* thread) {411if (is_released(thread)) {412return;413}414JfrThreadLocal* const tl = thread->jfr_thread_local();415MonitorLocker ml(transition_block(), Mutex::_no_safepoint_check_flag);416tl->set_trace_block();417while (!is_released(thread)) {418ml.wait();419}420tl->clear_trace_block();421}422423JavaThread* JfrThreadSampler::next_thread(ThreadsList* t_list, JavaThread* first_sampled, JavaThread* current) {424assert(t_list != NULL, "invariant");425assert(Threads_lock->owned_by_self(), "Holding the thread table lock.");426assert(_cur_index >= -1 && (uint)_cur_index + 1 <= t_list->length(), "invariant");427assert((current == NULL && -1 == _cur_index) || (t_list->find_index_of_JavaThread(current) == _cur_index), "invariant");428if ((uint)_cur_index + 1 == t_list->length()) {429// wrap430_cur_index = 0;431} else {432_cur_index++;433}434assert(_cur_index >= 0 && (uint)_cur_index < t_list->length(), "invariant");435JavaThread* const next = t_list->thread_at(_cur_index);436return next != first_sampled ? next : NULL;437}438439void JfrThreadSampler::start_thread() {440if (os::create_thread(this, os::os_thread)) {441os::start_thread(this);442} else {443log_error(jfr)("Failed to create thread for thread sampling");444}445}446447void JfrThreadSampler::enroll() {448if (_disenrolled) {449log_trace(jfr)("Enrolling thread sampler");450_sample.signal();451_disenrolled = false;452}453}454455void JfrThreadSampler::disenroll() {456if (!_disenrolled) {457_sample.wait();458_disenrolled = true;459log_trace(jfr)("Disenrolling thread sampler");460}461}462463static jlong get_monotonic_ms() {464return os::javaTimeNanos() / 1000000;465}466467void JfrThreadSampler::run() {468assert(_sampler_thread == NULL, "invariant");469470_sampler_thread = this;471472jlong last_java_ms = get_monotonic_ms();473jlong last_native_ms = last_java_ms;474while (true) {475if (!_sample.trywait()) {476// disenrolled477_sample.wait();478last_java_ms = get_monotonic_ms();479last_native_ms = last_java_ms;480}481_sample.signal();482jlong java_interval = _interval_java == 0 ? max_jlong : MAX2<jlong>(_interval_java, 1);483jlong native_interval = _interval_native == 0 ? max_jlong : MAX2<jlong>(_interval_native, 1);484485jlong now_ms = get_monotonic_ms();486487/*488* Let I be java_interval or native_interval.489* Let L be last_java_ms or last_native_ms.490* Let N be now_ms.491*492* Interval, I, might be max_jlong so the addition493* could potentially overflow without parenthesis (UB). Also note that494* L - N < 0. Avoid UB, by adding parenthesis.495*/496jlong next_j = java_interval + (last_java_ms - now_ms);497jlong next_n = native_interval + (last_native_ms - now_ms);498499jlong sleep_to_next = MIN2<jlong>(next_j, next_n);500501if (sleep_to_next > 0) {502os::naked_short_sleep(sleep_to_next);503}504505if ((next_j - sleep_to_next) <= 0) {506task_stacktrace(JAVA_SAMPLE, &_last_thread_java);507last_java_ms = get_monotonic_ms();508}509if ((next_n - sleep_to_next) <= 0) {510task_stacktrace(NATIVE_SAMPLE, &_last_thread_native);511last_native_ms = get_monotonic_ms();512}513}514}515516void JfrThreadSampler::post_run() {517this->NonJavaThread::post_run();518delete this;519}520521522void JfrThreadSampler::task_stacktrace(JfrSampleType type, JavaThread** last_thread) {523ResourceMark rm;524EventExecutionSample samples[MAX_NR_OF_JAVA_SAMPLES];525EventNativeMethodSample samples_native[MAX_NR_OF_NATIVE_SAMPLES];526JfrThreadSampleClosure sample_task(samples, samples_native);527528const uint sample_limit = JAVA_SAMPLE == type ? MAX_NR_OF_JAVA_SAMPLES : MAX_NR_OF_NATIVE_SAMPLES;529uint num_samples = 0;530JavaThread* start = NULL;531532{533elapsedTimer sample_time;534sample_time.start();535{536MutexLocker tlock(Threads_lock);537ThreadsListHandle tlh;538// Resolve a sample session relative start position index into the thread list array.539// In cases where the last sampled thread is NULL or not-NULL but stale, find_index() returns -1.540_cur_index = tlh.list()->find_index_of_JavaThread(*last_thread);541JavaThread* current = _cur_index != -1 ? *last_thread : NULL;542543while (num_samples < sample_limit) {544current = next_thread(tlh.list(), start, current);545if (current == NULL) {546break;547}548if (start == NULL) {549start = current; // remember the thread where we started to attempt sampling550}551if (current->is_Compiler_thread()) {552continue;553}554if (sample_task.do_sample_thread(current, _frames, _max_frames, type)) {555num_samples++;556}557}558*last_thread = current; // remember the thread we last attempted to sample559}560sample_time.stop();561log_trace(jfr)("JFR thread sampling done in %3.7f secs with %d java %d native samples",562sample_time.seconds(), sample_task.java_entries(), sample_task.native_entries());563}564if (num_samples > 0) {565sample_task.commit_events(type);566}567}568569static JfrThreadSampling* _instance = NULL;570571JfrThreadSampling& JfrThreadSampling::instance() {572return *_instance;573}574575JfrThreadSampling* JfrThreadSampling::create() {576assert(_instance == NULL, "invariant");577_instance = new JfrThreadSampling();578return _instance;579}580581void JfrThreadSampling::destroy() {582if (_instance != NULL) {583delete _instance;584_instance = NULL;585}586}587588JfrThreadSampling::JfrThreadSampling() : _sampler(NULL) {}589590JfrThreadSampling::~JfrThreadSampling() {591if (_sampler != NULL) {592_sampler->disenroll();593}594}595596static void log(size_t interval_java, size_t interval_native) {597log_trace(jfr)("Updated thread sampler for java: " SIZE_FORMAT " ms, native " SIZE_FORMAT " ms", interval_java, interval_native);598}599600void JfrThreadSampling::start_sampler(size_t interval_java, size_t interval_native) {601assert(_sampler == NULL, "invariant");602log_trace(jfr)("Enrolling thread sampler");603_sampler = new JfrThreadSampler(interval_java, interval_native, JfrOptionSet::stackdepth());604_sampler->start_thread();605_sampler->enroll();606}607608void JfrThreadSampling::set_sampling_interval(bool java_interval, size_t period) {609size_t interval_java = 0;610size_t interval_native = 0;611if (_sampler != NULL) {612interval_java = _sampler->get_java_interval();613interval_native = _sampler->get_native_interval();614}615if (java_interval) {616interval_java = period;617} else {618interval_native = period;619}620if (interval_java > 0 || interval_native > 0) {621if (_sampler == NULL) {622log_trace(jfr)("Creating thread sampler for java:%zu ms, native %zu ms", interval_java, interval_native);623start_sampler(interval_java, interval_native);624} else {625_sampler->set_java_interval(interval_java);626_sampler->set_native_interval(interval_native);627_sampler->enroll();628}629assert(_sampler != NULL, "invariant");630log(interval_java, interval_native);631} else if (_sampler != NULL) {632_sampler->disenroll();633}634}635636void JfrThreadSampling::set_java_sample_interval(size_t period) {637if (_instance == NULL && 0 == period) {638return;639}640instance().set_sampling_interval(true, period);641}642643void JfrThreadSampling::set_native_sample_interval(size_t period) {644if (_instance == NULL && 0 == period) {645return;646}647instance().set_sampling_interval(false, period);648}649650void JfrThreadSampling::on_javathread_suspend(JavaThread* thread) {651JfrThreadSampler::on_javathread_suspend(thread);652}653654655