Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp_collapse.h
35258 views
/*1* kmp_collapse.h -- header for loop collapse feature2*/34//===----------------------------------------------------------------------===//5//6// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.7// See https://llvm.org/LICENSE.txt for license information.8// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception9//10//===----------------------------------------------------------------------===//1112#ifndef KMP_COLLAPSE_H13#define KMP_COLLAPSE_H1415#include <type_traits>1617// Type of the index into the loop nest structures18// (with values from 0 to less than n from collapse(n))19typedef kmp_int32 kmp_index_t;2021// Type for combined loop nest space IV:22typedef kmp_uint64 kmp_loop_nest_iv_t;2324// Loop has <, <=, etc. as a comparison:25enum comparison_t : kmp_int32 {26comp_less_or_eq = 0,27comp_greater_or_eq = 1,28comp_not_eq = 2,29comp_less = 3,30comp_greater = 431};3233// Type of loop IV.34// Type of bounds and step, after usual promotions35// are a subset of these types (32 & 64 only):36enum loop_type_t : kmp_int32 {37loop_type_uint8 = 0,38loop_type_int8 = 1,39loop_type_uint16 = 2,40loop_type_int16 = 3,41loop_type_uint32 = 4,42loop_type_int32 = 5,43loop_type_uint64 = 6,44loop_type_int64 = 745};4647// Defining loop types to handle special cases48enum nested_loop_type_t : kmp_int32 {49nested_loop_type_unkown = 0,50nested_loop_type_lower_triangular_matrix = 1,51nested_loop_type_upper_triangular_matrix = 252};5354/*!55@ingroup WORK_SHARING56* Describes the structure for rectangular nested loops.57*/58template <typename T> struct bounds_infoXX_template {5960// typedef typename traits_t<T>::unsigned_t UT;61typedef typename traits_t<T>::signed_t ST;6263loop_type_t loop_type; // The differentiator64loop_type_t loop_iv_type;65comparison_t comparison;66// outer_iv should be 0 (or any other less then number of dimentions)67// if loop doesn't depend on it (lb1 and ub1 will be 0).68// This way we can do multiplication without a check.69kmp_index_t outer_iv;7071// unions to keep the size constant:72union {73T lb0;74kmp_uint64 lb0_u64; // real type can be signed75};7677union {78T lb1;79kmp_uint64 lb1_u64; // real type can be signed80};8182union {83T ub0;84kmp_uint64 ub0_u64; // real type can be signed85};8687union {88T ub1;89kmp_uint64 ub1_u64; // real type can be signed90};9192union {93ST step; // signed even if bounds type is unsigned94kmp_int64 step_64; // signed95};9697kmp_loop_nest_iv_t trip_count;98};99100/*!101@ingroup WORK_SHARING102* Interface struct for rectangular nested loops.103* Same size as bounds_infoXX_template.104*/105struct bounds_info_t {106107loop_type_t loop_type; // The differentiator108loop_type_t loop_iv_type;109comparison_t comparison;110// outer_iv should be 0 (or any other less then number of dimentions)111// if loop doesn't depend on it (lb1 and ub1 will be 0).112// This way we can do multiplication without a check.113kmp_index_t outer_iv;114115kmp_uint64 lb0_u64; // real type can be signed116kmp_uint64 lb1_u64; // real type can be signed117kmp_uint64 ub0_u64; // real type can be signed118kmp_uint64 ub1_u64; // real type can be signed119kmp_int64 step_64; // signed120121// This is internal, but it's the only internal thing we need122// in rectangular case, so let's expose it here:123kmp_loop_nest_iv_t trip_count;124};125126//-------------------------------------------------------------------------127// Additional types for internal representation:128129// Array for a point in the loop space, in the original space.130// It's represented in kmp_uint64, but each dimention is calculated in131// that loop IV type. Also dimentions have to be converted to those types132// when used in generated code.133typedef kmp_uint64 *kmp_point_t;134135// Array: Number of loop iterations on each nesting level to achieve some point,136// in expanded space or in original space.137// OMPTODO: move from using iterations to using offsets (iterations multiplied138// by steps). For those we need to be careful with the types, as step can be139// negative, but it'll remove multiplications and divisions in several places.140typedef kmp_loop_nest_iv_t *kmp_iterations_t;141142// Internal struct with additional info:143template <typename T> struct bounds_info_internalXX_template {144145// OMPTODO: should span have type T or should it better be146// kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than147// updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to148// use big_span_t, if it can be resolved at compile time.149typedef150typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64>151big_span_t;152153// typedef typename big_span_t span_t;154typedef T span_t;155156bounds_infoXX_template<T> b; // possibly adjusted bounds157158// Leaving this as a union in case we'll switch to span_t with different sizes159// (depending on T)160union {161// Smallest possible value of iv (may be smaller than actually possible)162span_t span_smallest;163kmp_uint64 span_smallest_u64;164};165166// Leaving this as a union in case we'll switch to span_t with different sizes167// (depending on T)168union {169// Biggest possible value of iv (may be bigger than actually possible)170span_t span_biggest;171kmp_uint64 span_biggest_u64;172};173174// Did we adjust loop bounds (not counting canonicalization)?175bool loop_bounds_adjusted;176};177178// Internal struct with additional info:179struct bounds_info_internal_t {180181bounds_info_t b; // possibly adjusted bounds182183// Smallest possible value of iv (may be smaller than actually possible)184kmp_uint64 span_smallest_u64;185186// Biggest possible value of iv (may be bigger than actually possible)187kmp_uint64 span_biggest_u64;188189// Did we adjust loop bounds (not counting canonicalization)?190bool loop_bounds_adjusted;191};192193//----------APIs for rectangular loop nests--------------------------------194195// Canonicalize loop nest and calculate overall trip count.196// "bounds_nest" has to be allocated per thread.197// API will modify original bounds_nest array to bring it to a canonical form198// (only <= and >=, no !=, <, >). If the original loop nest was already in a199// canonical form there will be no changes to bounds in bounds_nest array200// (only trip counts will be calculated).201// Returns trip count of overall space.202extern "C" kmp_loop_nest_iv_t203__kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid,204/*in/out*/ bounds_info_t *original_bounds_nest,205kmp_index_t n);206207// Calculate old induction variables corresponding to overall new_iv.208// Note: original IV will be returned as if it had kmp_uint64 type,209// will have to be converted to original type in user code.210// Note: trip counts should be already calculated by211// __kmpc_process_loop_nest_rectang.212// OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline213// that into user code.214extern "C" void215__kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv,216const bounds_info_t *original_bounds_nest,217/*out*/ kmp_uint64 *original_ivs,218kmp_index_t n);219220//----------Init API for non-rectangular loops--------------------------------221222// Init API for collapsed loops (static, no chunks defined).223// "bounds_nest" has to be allocated per thread.224// API will modify original bounds_nest array to bring it to a canonical form225// (only <= and >=, no !=, <, >). If the original loop nest was already in a226// canonical form there will be no changes to bounds in bounds_nest array227// (only trip counts will be calculated). Internally API will expand the space228// to parallelogram/parallelepiped, calculate total, calculate bounds for the229// chunks in terms of the new IV, re-calc them in terms of old IVs (especially230// important on the left side, to hit the lower bounds and not step over), and231// pick the correct chunk for this thread (so it will calculate chunks up to the232// needed one). It could be optimized to calculate just this chunk, potentially233// a bit less well distributed among threads. It is designed to make sure that234// threads will receive predictable chunks, deterministically (so that next nest235// of loops with similar characteristics will get exactly same chunks on same236// threads).237// Current contract: chunk_bounds_nest has only lb0 and ub0,238// lb1 and ub1 are set to 0 and can be ignored. (This may change in the future).239extern "C" kmp_int32240__kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid,241/*in/out*/ bounds_info_t *original_bounds_nest,242/*out*/ bounds_info_t *chunk_bounds_nest,243kmp_index_t n,244/*out*/ kmp_int32 *plastiter);245246#endif // KMP_COLLAPSE_H247248249