Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp_itt.h
35258 views
#if USE_ITT_BUILD1/*2* kmp_itt.h -- ITT Notify interface.3*/45//===----------------------------------------------------------------------===//6//7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.8// See https://llvm.org/LICENSE.txt for license information.9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception10//11//===----------------------------------------------------------------------===//1213#ifndef KMP_ITT_H14#define KMP_ITT_H1516#include "kmp_lock.h"1718#define INTEL_ITTNOTIFY_API_PRIVATE19#include "ittnotify.h"20#include "legacy/ittnotify.h"2122#if KMP_DEBUG23#define __kmp_inline // Turn off inlining in debug mode.24#else25#define __kmp_inline static inline26#endif2728#if USE_ITT_NOTIFY29extern kmp_int32 __kmp_itt_prepare_delay;30#ifdef __cplusplus31extern "C" void __kmp_itt_fini_ittlib(void);32#else33extern void __kmp_itt_fini_ittlib(void);34#endif35#endif3637// Simplify the handling of an argument that is only required when USE_ITT_BUILD38// is enabled.39#define USE_ITT_BUILD_ARG(x) , x4041void __kmp_itt_initialize();42void __kmp_itt_destroy();43void __kmp_itt_reset();4445// -----------------------------------------------------------------------------46// New stuff for reporting high-level constructs.4748// Note the naming convention:49// __kmp_itt_xxxing() function should be called before action, while50// __kmp_itt_xxxed() function should be called after action.5152// --- Parallel region reporting ---53__kmp_inline void54__kmp_itt_region_forking(int gtid, int team_size,55int barriers); // Primary only, before forking threads.56__kmp_inline void57__kmp_itt_region_joined(int gtid); // Primary only, after joining threads.58// (*) Note: A thread may execute tasks after this point, though.5960// --- Frame reporting ---61// region=0: no regions, region=1: parallel, region=2: serialized parallel62__kmp_inline void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,63__itt_timestamp end, int imbalance,64ident_t *loc, int team_size,65int region = 0);6667// --- Metadata reporting ---68// begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated69// wait time value, reduction -if this is a reduction barrier70__kmp_inline void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,71kmp_uint64 end,72kmp_uint64 imbalance,73kmp_uint64 reduction);74// sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others);75// iterations - loop trip count, chunk - chunk size76__kmp_inline void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,77kmp_uint64 iterations,78kmp_uint64 chunk);79__kmp_inline void __kmp_itt_metadata_single(ident_t *loc);8081// --- Barrier reporting ---82__kmp_inline void *__kmp_itt_barrier_object(int gtid, int bt, int set_name = 0,83int delta = 0);84__kmp_inline void __kmp_itt_barrier_starting(int gtid, void *object);85__kmp_inline void __kmp_itt_barrier_middle(int gtid, void *object);86__kmp_inline void __kmp_itt_barrier_finished(int gtid, void *object);8788// --- Taskwait reporting ---89__kmp_inline void *__kmp_itt_taskwait_object(int gtid);90__kmp_inline void __kmp_itt_taskwait_starting(int gtid, void *object);91__kmp_inline void __kmp_itt_taskwait_finished(int gtid, void *object);92#define KMP_ITT_TASKWAIT_STARTING(obj) \93if (UNLIKELY(__itt_sync_create_ptr)) { \94obj = __kmp_itt_taskwait_object(gtid); \95if (obj != NULL) { \96__kmp_itt_taskwait_starting(gtid, obj); \97} \98}99#define KMP_ITT_TASKWAIT_FINISHED(obj) \100if (UNLIKELY(obj != NULL)) \101__kmp_itt_taskwait_finished(gtid, obj);102103// --- Task reporting ---104__kmp_inline void __kmp_itt_task_starting(void *object);105__kmp_inline void __kmp_itt_task_finished(void *object);106107// --- Lock reporting ---108#if KMP_USE_DYNAMIC_LOCK109__kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock,110const ident_t *);111#else112__kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock);113#endif114__kmp_inline void __kmp_itt_lock_acquiring(kmp_user_lock_p lock);115__kmp_inline void __kmp_itt_lock_acquired(kmp_user_lock_p lock);116__kmp_inline void __kmp_itt_lock_releasing(kmp_user_lock_p lock);117__kmp_inline void __kmp_itt_lock_cancelled(kmp_user_lock_p lock);118__kmp_inline void __kmp_itt_lock_destroyed(kmp_user_lock_p lock);119120// --- Critical reporting ---121#if KMP_USE_DYNAMIC_LOCK122__kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock,123const ident_t *);124#else125__kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock);126#endif127__kmp_inline void __kmp_itt_critical_acquiring(kmp_user_lock_p lock);128__kmp_inline void __kmp_itt_critical_acquired(kmp_user_lock_p lock);129__kmp_inline void __kmp_itt_critical_releasing(kmp_user_lock_p lock);130__kmp_inline void __kmp_itt_critical_destroyed(kmp_user_lock_p lock);131132// --- Single reporting ---133__kmp_inline void __kmp_itt_single_start(int gtid);134__kmp_inline void __kmp_itt_single_end(int gtid);135136// --- Ordered reporting ---137__kmp_inline void __kmp_itt_ordered_init(int gtid);138__kmp_inline void __kmp_itt_ordered_prep(int gtid);139__kmp_inline void __kmp_itt_ordered_start(int gtid);140__kmp_inline void __kmp_itt_ordered_end(int gtid);141142// --- Threads reporting ---143__kmp_inline void __kmp_itt_thread_ignore();144__kmp_inline void __kmp_itt_thread_name(int gtid);145146// --- System objects ---147__kmp_inline void __kmp_itt_system_object_created(void *object,148char const *name);149150// --- Stack stitching ---151__kmp_inline __itt_caller __kmp_itt_stack_caller_create(void);152__kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller);153__kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller);154__kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);155156// -----------------------------------------------------------------------------157// Old stuff for reporting low-level internal synchronization.158159#if USE_ITT_NOTIFY160161/* Support for SSC marks, which are used by SDE162http://software.intel.com/en-us/articles/intel-software-development-emulator163to mark points in instruction traces that represent spin-loops and are164therefore uninteresting when collecting traces for architecture simulation.165*/166#ifndef INCLUDE_SSC_MARKS167#define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64)168#endif169170/* Linux 64 only for now */171#if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64)172// Portable (at least for gcc and icc) code to insert the necessary instructions173// to set %ebx and execute the unlikely no-op.174#if defined(__INTEL_COMPILER)175#define INSERT_SSC_MARK(tag) __SSC_MARK(tag)176#else177#define INSERT_SSC_MARK(tag) \178__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag) \179: "%ebx")180#endif181#else182#define INSERT_SSC_MARK(tag) ((void)0)183#endif184185/* Markers for the start and end of regions that represent polling and are186therefore uninteresting to architectural simulations 0x4376 and 0x4377 are187arbitrary numbers that should be unique in the space of SSC tags, but there188is no central issuing authority rather randomness is expected to work. */189#define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376)190#define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377)191192// Markers for architecture simulation.193// FORKING : Before the primary thread forks.194// JOINING : At the start of the join.195// INVOKING : Before the threads invoke microtasks.196// DISPATCH_INIT: At the start of dynamically scheduled loop.197// DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop.198#define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693)199#define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694)200#define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695)201#define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696)202#define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697)203204// The object is an address that associates a specific set of the prepare,205// acquire, release, and cancel operations.206207/* Sync prepare indicates a thread is going to start waiting for another thread208to send a release event. This operation should be done just before the209thread begins checking for the existence of the release event */210211/* Sync cancel indicates a thread is cancelling a wait on another thread and212continuing execution without waiting for the other thread to release it */213214/* Sync acquired indicates a thread has received a release event from another215thread and has stopped waiting. This operation must occur only after the216release event is received. */217218/* Sync release indicates a thread is going to send a release event to another219thread so it will stop waiting and continue execution. This operation must220just happen before the release event. */221222#define KMP_FSYNC_PREPARE(obj) __itt_fsync_prepare((void *)(obj))223#define KMP_FSYNC_CANCEL(obj) __itt_fsync_cancel((void *)(obj))224#define KMP_FSYNC_ACQUIRED(obj) __itt_fsync_acquired((void *)(obj))225#define KMP_FSYNC_RELEASING(obj) __itt_fsync_releasing((void *)(obj))226227/* In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called228with a delay (and not called at all if waiting time is small). So, in spin229loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before230spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and231KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT() for example. */232233#undef KMP_FSYNC_SPIN_INIT234#define KMP_FSYNC_SPIN_INIT(obj, spin) \235int sync_iters = 0; \236if (__itt_fsync_prepare_ptr) { \237if (obj == NULL) { \238obj = spin; \239} /* if */ \240} /* if */ \241SSC_MARK_SPIN_START()242243#undef KMP_FSYNC_SPIN_PREPARE244#define KMP_FSYNC_SPIN_PREPARE(obj) \245do { \246if (__itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay) { \247++sync_iters; \248if (sync_iters >= __kmp_itt_prepare_delay) { \249KMP_FSYNC_PREPARE((void *)obj); \250} /* if */ \251} /* if */ \252} while (0)253#undef KMP_FSYNC_SPIN_ACQUIRED254#define KMP_FSYNC_SPIN_ACQUIRED(obj) \255do { \256SSC_MARK_SPIN_END(); \257if (sync_iters >= __kmp_itt_prepare_delay) { \258KMP_FSYNC_ACQUIRED((void *)obj); \259} /* if */ \260} while (0)261262/* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.:263KMP_ITT_IGNORE(264ptr = malloc( size );265);266*/267#define KMP_ITT_IGNORE(statement) \268do { \269__itt_state_t __itt_state_; \270if (__itt_state_get_ptr) { \271__itt_state_ = __itt_state_get(); \272__itt_obj_mode_set(__itt_obj_prop_ignore, __itt_obj_state_set); \273} /* if */ \274{ statement } \275if (__itt_state_get_ptr) { \276__itt_state_set(__itt_state_); \277} /* if */ \278} while (0)279280// Maximum number of frame domains to use (maps to281// different OpenMP regions in the user source code).282const int KMP_MAX_FRAME_DOMAINS = 997;283typedef struct kmp_itthash_entry {284ident_t *loc;285int team_size;286__itt_domain *d;287struct kmp_itthash_entry *next_in_bucket;288} kmp_itthash_entry_t;289typedef struct kmp_itthash {290kmp_itthash_entry_t *buckets[KMP_MAX_FRAME_DOMAINS];291int count; // just a heuristic to limit number of entries292} kmp_itthash_t;293extern kmp_itthash_t __kmp_itt_region_domains;294extern kmp_itthash_t __kmp_itt_barrier_domains;295extern __itt_domain *metadata_domain;296extern __itt_string_handle *string_handle_imbl;297extern __itt_string_handle *string_handle_loop;298extern __itt_string_handle *string_handle_sngl;299300#else301302// Null definitions of the synchronization tracing functions.303#define KMP_FSYNC_PREPARE(obj) ((void)0)304#define KMP_FSYNC_CANCEL(obj) ((void)0)305#define KMP_FSYNC_ACQUIRED(obj) ((void)0)306#define KMP_FSYNC_RELEASING(obj) ((void)0)307308#define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)309#define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)310#define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)311312#define KMP_ITT_IGNORE(stmt) \313do { \314stmt \315} while (0)316317#endif // USE_ITT_NOTIFY318319#if !KMP_DEBUG320// In release mode include definitions of inline functions.321#include "kmp_itt.inl"322#endif323324#endif // KMP_ITT_H325326#else /* USE_ITT_BUILD */327328// Null definitions of the synchronization tracing functions.329// If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either.330// By defining these we avoid unpleasant ifdef tests in many places.331#define KMP_FSYNC_PREPARE(obj) ((void)0)332#define KMP_FSYNC_CANCEL(obj) ((void)0)333#define KMP_FSYNC_ACQUIRED(obj) ((void)0)334#define KMP_FSYNC_RELEASING(obj) ((void)0)335336#define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)337#define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)338#define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)339340#define KMP_ITT_IGNORE(stmt) \341do { \342stmt \343} while (0)344345#define USE_ITT_BUILD_ARG(x)346347#endif /* USE_ITT_BUILD */348349350