CoCalc -- kmp_collapse.h

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp_collapse.h
³⁵²⁵⁸ views
1
/*
2
 * kmp_collapse.h -- header for loop collapse feature
3
 */
4

5
//===----------------------------------------------------------------------===//
6
//
7
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8
// See https://llvm.org/LICENSE.txt for license information.
9
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10
//
11
//===----------------------------------------------------------------------===//
12

13
#ifndef KMP_COLLAPSE_H
14
#define KMP_COLLAPSE_H
15

16
#include <type_traits>
17

18
// Type of the index into the loop nest structures
19
// (with values from 0 to less than n from collapse(n))
20
typedef kmp_int32 kmp_index_t;
21

22
// Type for combined loop nest space IV:
23
typedef kmp_uint64 kmp_loop_nest_iv_t;
24

25
// Loop has <, <=, etc. as a comparison:
26
enum comparison_t : kmp_int32 {
27
  comp_less_or_eq = 0,
28
  comp_greater_or_eq = 1,
29
  comp_not_eq = 2,
30
  comp_less = 3,
31
  comp_greater = 4
32
};
33

34
// Type of loop IV.
35
// Type of bounds and step, after usual promotions
36
// are a subset of these types (32 & 64 only):
37
enum loop_type_t : kmp_int32 {
38
  loop_type_uint8 = 0,
39
  loop_type_int8 = 1,
40
  loop_type_uint16 = 2,
41
  loop_type_int16 = 3,
42
  loop_type_uint32 = 4,
43
  loop_type_int32 = 5,
44
  loop_type_uint64 = 6,
45
  loop_type_int64 = 7
46
};
47

48
// Defining loop types to handle special cases
49
enum nested_loop_type_t : kmp_int32 {
50
  nested_loop_type_unkown = 0,
51
  nested_loop_type_lower_triangular_matrix = 1,
52
  nested_loop_type_upper_triangular_matrix = 2
53
};
54

55
/*!
56
 @ingroup WORK_SHARING
57
 * Describes the structure for rectangular nested loops.
58
 */
59
template <typename T> struct bounds_infoXX_template {
60

61
  // typedef typename traits_t<T>::unsigned_t UT;
62
  typedef typename traits_t<T>::signed_t ST;
63

64
  loop_type_t loop_type; // The differentiator
65
  loop_type_t loop_iv_type;
66
  comparison_t comparison;
67
  // outer_iv should be 0 (or any other less then number of dimentions)
68
  // if loop doesn't depend on it (lb1 and ub1 will be 0).
69
  // This way we can do multiplication without a check.
70
  kmp_index_t outer_iv;
71

72
  // unions to keep the size constant:
73
  union {
74
    T lb0;
75
    kmp_uint64 lb0_u64; // real type can be signed
76
  };
77

78
  union {
79
    T lb1;
80
    kmp_uint64 lb1_u64; // real type can be signed
81
  };
82

83
  union {
84
    T ub0;
85
    kmp_uint64 ub0_u64; // real type can be signed
86
  };
87

88
  union {
89
    T ub1;
90
    kmp_uint64 ub1_u64; // real type can be signed
91
  };
92

93
  union {
94
    ST step; // signed even if bounds type is unsigned
95
    kmp_int64 step_64; // signed
96
  };
97

98
  kmp_loop_nest_iv_t trip_count;
99
};
100

101
/*!
102
 @ingroup WORK_SHARING
103
 * Interface struct for rectangular nested loops.
104
 * Same size as bounds_infoXX_template.
105
 */
106
struct bounds_info_t {
107

108
  loop_type_t loop_type; // The differentiator
109
  loop_type_t loop_iv_type;
110
  comparison_t comparison;
111
  // outer_iv should be 0  (or any other less then number of dimentions)
112
  // if loop doesn't depend on it (lb1 and ub1 will be 0).
113
  // This way we can do multiplication without a check.
114
  kmp_index_t outer_iv;
115

116
  kmp_uint64 lb0_u64; // real type can be signed
117
  kmp_uint64 lb1_u64; // real type can be signed
118
  kmp_uint64 ub0_u64; // real type can be signed
119
  kmp_uint64 ub1_u64; // real type can be signed
120
  kmp_int64 step_64; // signed
121

122
  // This is internal, but it's the only internal thing we need
123
  // in rectangular case, so let's expose it here:
124
  kmp_loop_nest_iv_t trip_count;
125
};
126

127
//-------------------------------------------------------------------------
128
// Additional types for internal representation:
129

130
// Array for a point in the loop space, in the original space.
131
// It's represented in kmp_uint64, but each dimention is calculated in
132
// that loop IV type. Also dimentions have to be converted to those types
133
// when used in generated code.
134
typedef kmp_uint64 *kmp_point_t;
135

136
// Array: Number of loop iterations on each nesting level to achieve some point,
137
// in expanded space or in original space.
138
// OMPTODO: move from using iterations to using offsets (iterations multiplied
139
// by steps). For those we need to be careful with the types, as step can be
140
// negative, but it'll remove multiplications and divisions in several places.
141
typedef kmp_loop_nest_iv_t *kmp_iterations_t;
142

143
// Internal struct with additional info:
144
template <typename T> struct bounds_info_internalXX_template {
145

146
  // OMPTODO: should span have type T or should it better be
147
  // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than
148
  // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to
149
  // use big_span_t, if it can be resolved at compile time.
150
  typedef
151
      typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64>
152
          big_span_t;
153

154
  // typedef typename big_span_t span_t;
155
  typedef T span_t;
156

157
  bounds_infoXX_template<T> b; // possibly adjusted bounds
158

159
  // Leaving this as a union in case we'll switch to span_t with different sizes
160
  // (depending on T)
161
  union {
162
    // Smallest possible value of iv (may be smaller than actually possible)
163
    span_t span_smallest;
164
    kmp_uint64 span_smallest_u64;
165
  };
166

167
  // Leaving this as a union in case we'll switch to span_t with different sizes
168
  // (depending on T)
169
  union {
170
    // Biggest possible value of iv (may be bigger than actually possible)
171
    span_t span_biggest;
172
    kmp_uint64 span_biggest_u64;
173
  };
174

175
  // Did we adjust loop bounds (not counting canonicalization)?
176
  bool loop_bounds_adjusted;
177
};
178

179
// Internal struct with additional info:
180
struct bounds_info_internal_t {
181

182
  bounds_info_t b; // possibly adjusted bounds
183

184
  // Smallest possible value of iv (may be smaller than actually possible)
185
  kmp_uint64 span_smallest_u64;
186

187
  // Biggest possible value of iv (may be bigger than actually possible)
188
  kmp_uint64 span_biggest_u64;
189

190
  // Did we adjust loop bounds (not counting canonicalization)?
191
  bool loop_bounds_adjusted;
192
};
193

194
//----------APIs for rectangular loop nests--------------------------------
195

196
// Canonicalize loop nest and calculate overall trip count.
197
// "bounds_nest" has to be allocated per thread.
198
// API will modify original bounds_nest array to bring it to a canonical form
199
// (only <= and >=, no !=, <, >). If the original loop nest was already in a
200
// canonical form there will be no changes to bounds in bounds_nest array
201
// (only trip counts will be calculated).
202
// Returns trip count of overall space.
203
extern "C" kmp_loop_nest_iv_t
204
__kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid,
205
                                 /*in/out*/ bounds_info_t *original_bounds_nest,
206
                                 kmp_index_t n);
207

208
// Calculate old induction variables corresponding to overall new_iv.
209
// Note: original IV will be returned as if it had kmp_uint64 type,
210
// will have to be converted to original type in user code.
211
// Note: trip counts should be already calculated by
212
// __kmpc_process_loop_nest_rectang.
213
// OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline
214
// that into user code.
215
extern "C" void
216
__kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv,
217
                                 const bounds_info_t *original_bounds_nest,
218
                                 /*out*/ kmp_uint64 *original_ivs,
219
                                 kmp_index_t n);
220

221
//----------Init API for non-rectangular loops--------------------------------
222

223
// Init API for collapsed loops (static, no chunks defined).
224
// "bounds_nest" has to be allocated per thread.
225
// API will modify original bounds_nest array to bring it to a canonical form
226
// (only <= and >=, no !=, <, >). If the original loop nest was already in a
227
// canonical form there will be no changes to bounds in bounds_nest array
228
// (only trip counts will be calculated). Internally API will expand the space
229
// to parallelogram/parallelepiped, calculate total, calculate bounds for the
230
// chunks in terms of the new IV, re-calc them in terms of old IVs (especially
231
// important on the left side, to hit the lower bounds and not step over), and
232
// pick the correct chunk for this thread (so it will calculate chunks up to the
233
// needed one). It could be optimized to calculate just this chunk, potentially
234
// a bit less well distributed among threads. It is designed to make sure that
235
// threads will receive predictable chunks, deterministically (so that next nest
236
// of loops with similar characteristics will get exactly same chunks on same
237
// threads).
238
// Current contract: chunk_bounds_nest has only lb0 and ub0,
239
// lb1 and ub1 are set to 0 and can be ignored. (This may change in the future).
240
extern "C" kmp_int32
241
__kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid,
242
                          /*in/out*/ bounds_info_t *original_bounds_nest,
243
                          /*out*/ bounds_info_t *chunk_bounds_nest,
244
                          kmp_index_t n,
245
                          /*out*/ kmp_int32 *plastiter);
246

247
#endif // KMP_COLLAPSE_H
248

249
Product

Resources

Company