Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp_atomic.cpp
35258 views
1
/*
2
* kmp_atomic.cpp -- ATOMIC implementation routines
3
*/
4
5
//===----------------------------------------------------------------------===//
6
//
7
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8
// See https://llvm.org/LICENSE.txt for license information.
9
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "kmp_atomic.h"
14
#include "kmp.h" // TRUE, asm routines prototypes
15
16
typedef unsigned char uchar;
17
typedef unsigned short ushort;
18
19
/*!
20
@defgroup ATOMIC_OPS Atomic Operations
21
These functions are used for implementing the many different varieties of atomic
22
operations.
23
24
The compiler is at liberty to inline atomic operations that are naturally
25
supported by the target architecture. For instance on IA-32 architecture an
26
atomic like this can be inlined
27
@code
28
static int s = 0;
29
#pragma omp atomic
30
s++;
31
@endcode
32
using the single instruction: `lock; incl s`
33
34
However the runtime does provide entrypoints for these operations to support
35
compilers that choose not to inline them. (For instance,
36
`__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
37
38
The names of the functions are encoded by using the data type name and the
39
operation name, as in these tables.
40
41
Data Type | Data type encoding
42
-----------|---------------
43
int8_t | `fixed1`
44
uint8_t | `fixed1u`
45
int16_t | `fixed2`
46
uint16_t | `fixed2u`
47
int32_t | `fixed4`
48
uint32_t | `fixed4u`
49
int32_t | `fixed8`
50
uint32_t | `fixed8u`
51
float | `float4`
52
double | `float8`
53
float 10 (8087 eighty bit float) | `float10`
54
complex<float> | `cmplx4`
55
complex<double> | `cmplx8`
56
complex<float10> | `cmplx10`
57
<br>
58
59
Operation | Operation encoding
60
----------|-------------------
61
+ | add
62
- | sub
63
\* | mul
64
/ | div
65
& | andb
66
<< | shl
67
\>\> | shr
68
\| | orb
69
^ | xor
70
&& | andl
71
\|\| | orl
72
maximum | max
73
minimum | min
74
.eqv. | eqv
75
.neqv. | neqv
76
77
<br>
78
For non-commutative operations, `_rev` can also be added for the reversed
79
operation. For the functions that capture the result, the suffix `_cpt` is
80
added.
81
82
Update Functions
83
================
84
The general form of an atomic function that just performs an update (without a
85
`capture`)
86
@code
87
void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
88
lhs, TYPE rhs );
89
@endcode
90
@param ident_t a pointer to source location
91
@param gtid the global thread id
92
@param lhs a pointer to the left operand
93
@param rhs the right operand
94
95
`capture` functions
96
===================
97
The capture functions perform an atomic update and return a result, which is
98
either the value before the capture, or that after. They take an additional
99
argument to determine which result is returned.
100
Their general form is therefore
101
@code
102
TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103
lhs, TYPE rhs, int flag );
104
@endcode
105
@param ident_t a pointer to source location
106
@param gtid the global thread id
107
@param lhs a pointer to the left operand
108
@param rhs the right operand
109
@param flag one if the result is to be captured *after* the operation, zero if
110
captured *before*.
111
112
The one set of exceptions to this is the `complex<float>` type where the value
113
is not returned, rather an extra argument pointer is passed.
114
115
They look like
116
@code
117
void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 *
118
lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
119
@endcode
120
121
Read and Write Operations
122
=========================
123
The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124
ensure that the value is read or written atomically, with no modification
125
performed. In many cases on IA-32 architecture these operations can be inlined
126
since the architecture guarantees that no tearing occurs on aligned objects
127
accessed with a single memory operation of up to 64 bits in size.
128
129
The general form of the read operations is
130
@code
131
TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
132
@endcode
133
134
For the write operations the form is
135
@code
136
void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
137
);
138
@endcode
139
140
Full list of functions
141
======================
142
This leads to the generation of 376 atomic functions, as follows.
143
144
Functions for integers
145
---------------------
146
There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147
unsigned (where that matters).
148
@code
149
__kmpc_atomic_fixed1_add
150
__kmpc_atomic_fixed1_add_cpt
151
__kmpc_atomic_fixed1_add_fp
152
__kmpc_atomic_fixed1_andb
153
__kmpc_atomic_fixed1_andb_cpt
154
__kmpc_atomic_fixed1_andl
155
__kmpc_atomic_fixed1_andl_cpt
156
__kmpc_atomic_fixed1_div
157
__kmpc_atomic_fixed1_div_cpt
158
__kmpc_atomic_fixed1_div_cpt_rev
159
__kmpc_atomic_fixed1_div_float8
160
__kmpc_atomic_fixed1_div_fp
161
__kmpc_atomic_fixed1_div_rev
162
__kmpc_atomic_fixed1_eqv
163
__kmpc_atomic_fixed1_eqv_cpt
164
__kmpc_atomic_fixed1_max
165
__kmpc_atomic_fixed1_max_cpt
166
__kmpc_atomic_fixed1_min
167
__kmpc_atomic_fixed1_min_cpt
168
__kmpc_atomic_fixed1_mul
169
__kmpc_atomic_fixed1_mul_cpt
170
__kmpc_atomic_fixed1_mul_float8
171
__kmpc_atomic_fixed1_mul_fp
172
__kmpc_atomic_fixed1_neqv
173
__kmpc_atomic_fixed1_neqv_cpt
174
__kmpc_atomic_fixed1_orb
175
__kmpc_atomic_fixed1_orb_cpt
176
__kmpc_atomic_fixed1_orl
177
__kmpc_atomic_fixed1_orl_cpt
178
__kmpc_atomic_fixed1_rd
179
__kmpc_atomic_fixed1_shl
180
__kmpc_atomic_fixed1_shl_cpt
181
__kmpc_atomic_fixed1_shl_cpt_rev
182
__kmpc_atomic_fixed1_shl_rev
183
__kmpc_atomic_fixed1_shr
184
__kmpc_atomic_fixed1_shr_cpt
185
__kmpc_atomic_fixed1_shr_cpt_rev
186
__kmpc_atomic_fixed1_shr_rev
187
__kmpc_atomic_fixed1_sub
188
__kmpc_atomic_fixed1_sub_cpt
189
__kmpc_atomic_fixed1_sub_cpt_rev
190
__kmpc_atomic_fixed1_sub_fp
191
__kmpc_atomic_fixed1_sub_rev
192
__kmpc_atomic_fixed1_swp
193
__kmpc_atomic_fixed1_wr
194
__kmpc_atomic_fixed1_xor
195
__kmpc_atomic_fixed1_xor_cpt
196
__kmpc_atomic_fixed1u_add_fp
197
__kmpc_atomic_fixed1u_sub_fp
198
__kmpc_atomic_fixed1u_mul_fp
199
__kmpc_atomic_fixed1u_div
200
__kmpc_atomic_fixed1u_div_cpt
201
__kmpc_atomic_fixed1u_div_cpt_rev
202
__kmpc_atomic_fixed1u_div_fp
203
__kmpc_atomic_fixed1u_div_rev
204
__kmpc_atomic_fixed1u_shr
205
__kmpc_atomic_fixed1u_shr_cpt
206
__kmpc_atomic_fixed1u_shr_cpt_rev
207
__kmpc_atomic_fixed1u_shr_rev
208
__kmpc_atomic_fixed2_add
209
__kmpc_atomic_fixed2_add_cpt
210
__kmpc_atomic_fixed2_add_fp
211
__kmpc_atomic_fixed2_andb
212
__kmpc_atomic_fixed2_andb_cpt
213
__kmpc_atomic_fixed2_andl
214
__kmpc_atomic_fixed2_andl_cpt
215
__kmpc_atomic_fixed2_div
216
__kmpc_atomic_fixed2_div_cpt
217
__kmpc_atomic_fixed2_div_cpt_rev
218
__kmpc_atomic_fixed2_div_float8
219
__kmpc_atomic_fixed2_div_fp
220
__kmpc_atomic_fixed2_div_rev
221
__kmpc_atomic_fixed2_eqv
222
__kmpc_atomic_fixed2_eqv_cpt
223
__kmpc_atomic_fixed2_max
224
__kmpc_atomic_fixed2_max_cpt
225
__kmpc_atomic_fixed2_min
226
__kmpc_atomic_fixed2_min_cpt
227
__kmpc_atomic_fixed2_mul
228
__kmpc_atomic_fixed2_mul_cpt
229
__kmpc_atomic_fixed2_mul_float8
230
__kmpc_atomic_fixed2_mul_fp
231
__kmpc_atomic_fixed2_neqv
232
__kmpc_atomic_fixed2_neqv_cpt
233
__kmpc_atomic_fixed2_orb
234
__kmpc_atomic_fixed2_orb_cpt
235
__kmpc_atomic_fixed2_orl
236
__kmpc_atomic_fixed2_orl_cpt
237
__kmpc_atomic_fixed2_rd
238
__kmpc_atomic_fixed2_shl
239
__kmpc_atomic_fixed2_shl_cpt
240
__kmpc_atomic_fixed2_shl_cpt_rev
241
__kmpc_atomic_fixed2_shl_rev
242
__kmpc_atomic_fixed2_shr
243
__kmpc_atomic_fixed2_shr_cpt
244
__kmpc_atomic_fixed2_shr_cpt_rev
245
__kmpc_atomic_fixed2_shr_rev
246
__kmpc_atomic_fixed2_sub
247
__kmpc_atomic_fixed2_sub_cpt
248
__kmpc_atomic_fixed2_sub_cpt_rev
249
__kmpc_atomic_fixed2_sub_fp
250
__kmpc_atomic_fixed2_sub_rev
251
__kmpc_atomic_fixed2_swp
252
__kmpc_atomic_fixed2_wr
253
__kmpc_atomic_fixed2_xor
254
__kmpc_atomic_fixed2_xor_cpt
255
__kmpc_atomic_fixed2u_add_fp
256
__kmpc_atomic_fixed2u_sub_fp
257
__kmpc_atomic_fixed2u_mul_fp
258
__kmpc_atomic_fixed2u_div
259
__kmpc_atomic_fixed2u_div_cpt
260
__kmpc_atomic_fixed2u_div_cpt_rev
261
__kmpc_atomic_fixed2u_div_fp
262
__kmpc_atomic_fixed2u_div_rev
263
__kmpc_atomic_fixed2u_shr
264
__kmpc_atomic_fixed2u_shr_cpt
265
__kmpc_atomic_fixed2u_shr_cpt_rev
266
__kmpc_atomic_fixed2u_shr_rev
267
__kmpc_atomic_fixed4_add
268
__kmpc_atomic_fixed4_add_cpt
269
__kmpc_atomic_fixed4_add_fp
270
__kmpc_atomic_fixed4_andb
271
__kmpc_atomic_fixed4_andb_cpt
272
__kmpc_atomic_fixed4_andl
273
__kmpc_atomic_fixed4_andl_cpt
274
__kmpc_atomic_fixed4_div
275
__kmpc_atomic_fixed4_div_cpt
276
__kmpc_atomic_fixed4_div_cpt_rev
277
__kmpc_atomic_fixed4_div_float8
278
__kmpc_atomic_fixed4_div_fp
279
__kmpc_atomic_fixed4_div_rev
280
__kmpc_atomic_fixed4_eqv
281
__kmpc_atomic_fixed4_eqv_cpt
282
__kmpc_atomic_fixed4_max
283
__kmpc_atomic_fixed4_max_cpt
284
__kmpc_atomic_fixed4_min
285
__kmpc_atomic_fixed4_min_cpt
286
__kmpc_atomic_fixed4_mul
287
__kmpc_atomic_fixed4_mul_cpt
288
__kmpc_atomic_fixed4_mul_float8
289
__kmpc_atomic_fixed4_mul_fp
290
__kmpc_atomic_fixed4_neqv
291
__kmpc_atomic_fixed4_neqv_cpt
292
__kmpc_atomic_fixed4_orb
293
__kmpc_atomic_fixed4_orb_cpt
294
__kmpc_atomic_fixed4_orl
295
__kmpc_atomic_fixed4_orl_cpt
296
__kmpc_atomic_fixed4_rd
297
__kmpc_atomic_fixed4_shl
298
__kmpc_atomic_fixed4_shl_cpt
299
__kmpc_atomic_fixed4_shl_cpt_rev
300
__kmpc_atomic_fixed4_shl_rev
301
__kmpc_atomic_fixed4_shr
302
__kmpc_atomic_fixed4_shr_cpt
303
__kmpc_atomic_fixed4_shr_cpt_rev
304
__kmpc_atomic_fixed4_shr_rev
305
__kmpc_atomic_fixed4_sub
306
__kmpc_atomic_fixed4_sub_cpt
307
__kmpc_atomic_fixed4_sub_cpt_rev
308
__kmpc_atomic_fixed4_sub_fp
309
__kmpc_atomic_fixed4_sub_rev
310
__kmpc_atomic_fixed4_swp
311
__kmpc_atomic_fixed4_wr
312
__kmpc_atomic_fixed4_xor
313
__kmpc_atomic_fixed4_xor_cpt
314
__kmpc_atomic_fixed4u_add_fp
315
__kmpc_atomic_fixed4u_sub_fp
316
__kmpc_atomic_fixed4u_mul_fp
317
__kmpc_atomic_fixed4u_div
318
__kmpc_atomic_fixed4u_div_cpt
319
__kmpc_atomic_fixed4u_div_cpt_rev
320
__kmpc_atomic_fixed4u_div_fp
321
__kmpc_atomic_fixed4u_div_rev
322
__kmpc_atomic_fixed4u_shr
323
__kmpc_atomic_fixed4u_shr_cpt
324
__kmpc_atomic_fixed4u_shr_cpt_rev
325
__kmpc_atomic_fixed4u_shr_rev
326
__kmpc_atomic_fixed8_add
327
__kmpc_atomic_fixed8_add_cpt
328
__kmpc_atomic_fixed8_add_fp
329
__kmpc_atomic_fixed8_andb
330
__kmpc_atomic_fixed8_andb_cpt
331
__kmpc_atomic_fixed8_andl
332
__kmpc_atomic_fixed8_andl_cpt
333
__kmpc_atomic_fixed8_div
334
__kmpc_atomic_fixed8_div_cpt
335
__kmpc_atomic_fixed8_div_cpt_rev
336
__kmpc_atomic_fixed8_div_float8
337
__kmpc_atomic_fixed8_div_fp
338
__kmpc_atomic_fixed8_div_rev
339
__kmpc_atomic_fixed8_eqv
340
__kmpc_atomic_fixed8_eqv_cpt
341
__kmpc_atomic_fixed8_max
342
__kmpc_atomic_fixed8_max_cpt
343
__kmpc_atomic_fixed8_min
344
__kmpc_atomic_fixed8_min_cpt
345
__kmpc_atomic_fixed8_mul
346
__kmpc_atomic_fixed8_mul_cpt
347
__kmpc_atomic_fixed8_mul_float8
348
__kmpc_atomic_fixed8_mul_fp
349
__kmpc_atomic_fixed8_neqv
350
__kmpc_atomic_fixed8_neqv_cpt
351
__kmpc_atomic_fixed8_orb
352
__kmpc_atomic_fixed8_orb_cpt
353
__kmpc_atomic_fixed8_orl
354
__kmpc_atomic_fixed8_orl_cpt
355
__kmpc_atomic_fixed8_rd
356
__kmpc_atomic_fixed8_shl
357
__kmpc_atomic_fixed8_shl_cpt
358
__kmpc_atomic_fixed8_shl_cpt_rev
359
__kmpc_atomic_fixed8_shl_rev
360
__kmpc_atomic_fixed8_shr
361
__kmpc_atomic_fixed8_shr_cpt
362
__kmpc_atomic_fixed8_shr_cpt_rev
363
__kmpc_atomic_fixed8_shr_rev
364
__kmpc_atomic_fixed8_sub
365
__kmpc_atomic_fixed8_sub_cpt
366
__kmpc_atomic_fixed8_sub_cpt_rev
367
__kmpc_atomic_fixed8_sub_fp
368
__kmpc_atomic_fixed8_sub_rev
369
__kmpc_atomic_fixed8_swp
370
__kmpc_atomic_fixed8_wr
371
__kmpc_atomic_fixed8_xor
372
__kmpc_atomic_fixed8_xor_cpt
373
__kmpc_atomic_fixed8u_add_fp
374
__kmpc_atomic_fixed8u_sub_fp
375
__kmpc_atomic_fixed8u_mul_fp
376
__kmpc_atomic_fixed8u_div
377
__kmpc_atomic_fixed8u_div_cpt
378
__kmpc_atomic_fixed8u_div_cpt_rev
379
__kmpc_atomic_fixed8u_div_fp
380
__kmpc_atomic_fixed8u_div_rev
381
__kmpc_atomic_fixed8u_shr
382
__kmpc_atomic_fixed8u_shr_cpt
383
__kmpc_atomic_fixed8u_shr_cpt_rev
384
__kmpc_atomic_fixed8u_shr_rev
385
@endcode
386
387
Functions for floating point
388
----------------------------
389
There are versions here for floating point numbers of size 4, 8, 10 and 16
390
bytes. (Ten byte floats are used by X87, but are now rare).
391
@code
392
__kmpc_atomic_float4_add
393
__kmpc_atomic_float4_add_cpt
394
__kmpc_atomic_float4_add_float8
395
__kmpc_atomic_float4_add_fp
396
__kmpc_atomic_float4_div
397
__kmpc_atomic_float4_div_cpt
398
__kmpc_atomic_float4_div_cpt_rev
399
__kmpc_atomic_float4_div_float8
400
__kmpc_atomic_float4_div_fp
401
__kmpc_atomic_float4_div_rev
402
__kmpc_atomic_float4_max
403
__kmpc_atomic_float4_max_cpt
404
__kmpc_atomic_float4_min
405
__kmpc_atomic_float4_min_cpt
406
__kmpc_atomic_float4_mul
407
__kmpc_atomic_float4_mul_cpt
408
__kmpc_atomic_float4_mul_float8
409
__kmpc_atomic_float4_mul_fp
410
__kmpc_atomic_float4_rd
411
__kmpc_atomic_float4_sub
412
__kmpc_atomic_float4_sub_cpt
413
__kmpc_atomic_float4_sub_cpt_rev
414
__kmpc_atomic_float4_sub_float8
415
__kmpc_atomic_float4_sub_fp
416
__kmpc_atomic_float4_sub_rev
417
__kmpc_atomic_float4_swp
418
__kmpc_atomic_float4_wr
419
__kmpc_atomic_float8_add
420
__kmpc_atomic_float8_add_cpt
421
__kmpc_atomic_float8_add_fp
422
__kmpc_atomic_float8_div
423
__kmpc_atomic_float8_div_cpt
424
__kmpc_atomic_float8_div_cpt_rev
425
__kmpc_atomic_float8_div_fp
426
__kmpc_atomic_float8_div_rev
427
__kmpc_atomic_float8_max
428
__kmpc_atomic_float8_max_cpt
429
__kmpc_atomic_float8_min
430
__kmpc_atomic_float8_min_cpt
431
__kmpc_atomic_float8_mul
432
__kmpc_atomic_float8_mul_cpt
433
__kmpc_atomic_float8_mul_fp
434
__kmpc_atomic_float8_rd
435
__kmpc_atomic_float8_sub
436
__kmpc_atomic_float8_sub_cpt
437
__kmpc_atomic_float8_sub_cpt_rev
438
__kmpc_atomic_float8_sub_fp
439
__kmpc_atomic_float8_sub_rev
440
__kmpc_atomic_float8_swp
441
__kmpc_atomic_float8_wr
442
__kmpc_atomic_float10_add
443
__kmpc_atomic_float10_add_cpt
444
__kmpc_atomic_float10_add_fp
445
__kmpc_atomic_float10_div
446
__kmpc_atomic_float10_div_cpt
447
__kmpc_atomic_float10_div_cpt_rev
448
__kmpc_atomic_float10_div_fp
449
__kmpc_atomic_float10_div_rev
450
__kmpc_atomic_float10_mul
451
__kmpc_atomic_float10_mul_cpt
452
__kmpc_atomic_float10_mul_fp
453
__kmpc_atomic_float10_rd
454
__kmpc_atomic_float10_sub
455
__kmpc_atomic_float10_sub_cpt
456
__kmpc_atomic_float10_sub_cpt_rev
457
__kmpc_atomic_float10_sub_fp
458
__kmpc_atomic_float10_sub_rev
459
__kmpc_atomic_float10_swp
460
__kmpc_atomic_float10_wr
461
__kmpc_atomic_float16_add
462
__kmpc_atomic_float16_add_cpt
463
__kmpc_atomic_float16_div
464
__kmpc_atomic_float16_div_cpt
465
__kmpc_atomic_float16_div_cpt_rev
466
__kmpc_atomic_float16_div_rev
467
__kmpc_atomic_float16_max
468
__kmpc_atomic_float16_max_cpt
469
__kmpc_atomic_float16_min
470
__kmpc_atomic_float16_min_cpt
471
__kmpc_atomic_float16_mul
472
__kmpc_atomic_float16_mul_cpt
473
__kmpc_atomic_float16_rd
474
__kmpc_atomic_float16_sub
475
__kmpc_atomic_float16_sub_cpt
476
__kmpc_atomic_float16_sub_cpt_rev
477
__kmpc_atomic_float16_sub_rev
478
__kmpc_atomic_float16_swp
479
__kmpc_atomic_float16_wr
480
@endcode
481
482
Functions for Complex types
483
---------------------------
484
Functions for complex types whose component floating point variables are of size
485
4,8,10 or 16 bytes. The names here are based on the size of the component float,
486
*not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
487
operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
488
489
@code
490
__kmpc_atomic_cmplx4_add
491
__kmpc_atomic_cmplx4_add_cmplx8
492
__kmpc_atomic_cmplx4_add_cpt
493
__kmpc_atomic_cmplx4_div
494
__kmpc_atomic_cmplx4_div_cmplx8
495
__kmpc_atomic_cmplx4_div_cpt
496
__kmpc_atomic_cmplx4_div_cpt_rev
497
__kmpc_atomic_cmplx4_div_rev
498
__kmpc_atomic_cmplx4_mul
499
__kmpc_atomic_cmplx4_mul_cmplx8
500
__kmpc_atomic_cmplx4_mul_cpt
501
__kmpc_atomic_cmplx4_rd
502
__kmpc_atomic_cmplx4_sub
503
__kmpc_atomic_cmplx4_sub_cmplx8
504
__kmpc_atomic_cmplx4_sub_cpt
505
__kmpc_atomic_cmplx4_sub_cpt_rev
506
__kmpc_atomic_cmplx4_sub_rev
507
__kmpc_atomic_cmplx4_swp
508
__kmpc_atomic_cmplx4_wr
509
__kmpc_atomic_cmplx8_add
510
__kmpc_atomic_cmplx8_add_cpt
511
__kmpc_atomic_cmplx8_div
512
__kmpc_atomic_cmplx8_div_cpt
513
__kmpc_atomic_cmplx8_div_cpt_rev
514
__kmpc_atomic_cmplx8_div_rev
515
__kmpc_atomic_cmplx8_mul
516
__kmpc_atomic_cmplx8_mul_cpt
517
__kmpc_atomic_cmplx8_rd
518
__kmpc_atomic_cmplx8_sub
519
__kmpc_atomic_cmplx8_sub_cpt
520
__kmpc_atomic_cmplx8_sub_cpt_rev
521
__kmpc_atomic_cmplx8_sub_rev
522
__kmpc_atomic_cmplx8_swp
523
__kmpc_atomic_cmplx8_wr
524
__kmpc_atomic_cmplx10_add
525
__kmpc_atomic_cmplx10_add_cpt
526
__kmpc_atomic_cmplx10_div
527
__kmpc_atomic_cmplx10_div_cpt
528
__kmpc_atomic_cmplx10_div_cpt_rev
529
__kmpc_atomic_cmplx10_div_rev
530
__kmpc_atomic_cmplx10_mul
531
__kmpc_atomic_cmplx10_mul_cpt
532
__kmpc_atomic_cmplx10_rd
533
__kmpc_atomic_cmplx10_sub
534
__kmpc_atomic_cmplx10_sub_cpt
535
__kmpc_atomic_cmplx10_sub_cpt_rev
536
__kmpc_atomic_cmplx10_sub_rev
537
__kmpc_atomic_cmplx10_swp
538
__kmpc_atomic_cmplx10_wr
539
__kmpc_atomic_cmplx16_add
540
__kmpc_atomic_cmplx16_add_cpt
541
__kmpc_atomic_cmplx16_div
542
__kmpc_atomic_cmplx16_div_cpt
543
__kmpc_atomic_cmplx16_div_cpt_rev
544
__kmpc_atomic_cmplx16_div_rev
545
__kmpc_atomic_cmplx16_mul
546
__kmpc_atomic_cmplx16_mul_cpt
547
__kmpc_atomic_cmplx16_rd
548
__kmpc_atomic_cmplx16_sub
549
__kmpc_atomic_cmplx16_sub_cpt
550
__kmpc_atomic_cmplx16_sub_cpt_rev
551
__kmpc_atomic_cmplx16_swp
552
__kmpc_atomic_cmplx16_wr
553
@endcode
554
*/
555
556
/*!
557
@ingroup ATOMIC_OPS
558
@{
559
*/
560
561
/*
562
* Global vars
563
*/
564
565
#ifndef KMP_GOMP_COMPAT
566
int __kmp_atomic_mode = 1; // Intel perf
567
#else
568
int __kmp_atomic_mode = 2; // GOMP compatibility
569
#endif /* KMP_GOMP_COMPAT */
570
571
KMP_ALIGN(128)
572
573
// Control access to all user coded atomics in Gnu compat mode
574
kmp_atomic_lock_t __kmp_atomic_lock;
575
// Control access to all user coded atomics for 1-byte fixed data types
576
kmp_atomic_lock_t __kmp_atomic_lock_1i;
577
// Control access to all user coded atomics for 2-byte fixed data types
578
kmp_atomic_lock_t __kmp_atomic_lock_2i;
579
// Control access to all user coded atomics for 4-byte fixed data types
580
kmp_atomic_lock_t __kmp_atomic_lock_4i;
581
// Control access to all user coded atomics for kmp_real32 data type
582
kmp_atomic_lock_t __kmp_atomic_lock_4r;
583
// Control access to all user coded atomics for 8-byte fixed data types
584
kmp_atomic_lock_t __kmp_atomic_lock_8i;
585
// Control access to all user coded atomics for kmp_real64 data type
586
kmp_atomic_lock_t __kmp_atomic_lock_8r;
587
// Control access to all user coded atomics for complex byte data type
588
kmp_atomic_lock_t __kmp_atomic_lock_8c;
589
// Control access to all user coded atomics for long double data type
590
kmp_atomic_lock_t __kmp_atomic_lock_10r;
591
// Control access to all user coded atomics for _Quad data type
592
kmp_atomic_lock_t __kmp_atomic_lock_16r;
593
// Control access to all user coded atomics for double complex data type
594
kmp_atomic_lock_t __kmp_atomic_lock_16c;
595
// Control access to all user coded atomics for long double complex type
596
kmp_atomic_lock_t __kmp_atomic_lock_20c;
597
// Control access to all user coded atomics for _Quad complex data type
598
kmp_atomic_lock_t __kmp_atomic_lock_32c;
599
600
/* 2007-03-02:
601
Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602
on *_32 and *_32e. This is just a temporary workaround for the problem. It
603
seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604
in assembler language. */
605
#define KMP_ATOMIC_VOLATILE volatile
606
607
#if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608
609
static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610
return lhs.q + rhs.q;
611
}
612
static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613
return lhs.q - rhs.q;
614
}
615
static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616
return lhs.q * rhs.q;
617
}
618
static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619
return lhs.q / rhs.q;
620
}
621
static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622
return lhs.q < rhs.q;
623
}
624
static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625
return lhs.q > rhs.q;
626
}
627
628
static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629
return lhs.q + rhs.q;
630
}
631
static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632
return lhs.q - rhs.q;
633
}
634
static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635
return lhs.q * rhs.q;
636
}
637
static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638
return lhs.q / rhs.q;
639
}
640
static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641
return lhs.q < rhs.q;
642
}
643
static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644
return lhs.q > rhs.q;
645
}
646
647
static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648
kmp_cmplx128_a4_t &rhs) {
649
return lhs.q + rhs.q;
650
}
651
static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652
kmp_cmplx128_a4_t &rhs) {
653
return lhs.q - rhs.q;
654
}
655
static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656
kmp_cmplx128_a4_t &rhs) {
657
return lhs.q * rhs.q;
658
}
659
static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660
kmp_cmplx128_a4_t &rhs) {
661
return lhs.q / rhs.q;
662
}
663
664
static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665
kmp_cmplx128_a16_t &rhs) {
666
return lhs.q + rhs.q;
667
}
668
static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669
kmp_cmplx128_a16_t &rhs) {
670
return lhs.q - rhs.q;
671
}
672
static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673
kmp_cmplx128_a16_t &rhs) {
674
return lhs.q * rhs.q;
675
}
676
static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677
kmp_cmplx128_a16_t &rhs) {
678
return lhs.q / rhs.q;
679
}
680
681
#endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682
683
// ATOMIC implementation routines -----------------------------------------
684
// One routine for each operation and operand type.
685
// All routines declarations looks like
686
// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687
688
#define KMP_CHECK_GTID \
689
if (gtid == KMP_GTID_UNKNOWN) { \
690
gtid = __kmp_entry_gtid(); \
691
} // check and get gtid when needed
692
693
// Beginning of a definition (provides name, parameters, gebug trace)
694
// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695
// fixed)
696
// OP_ID - operation identifier (add, sub, mul, ...)
697
// TYPE - operands' type
698
#define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
699
RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
700
TYPE *lhs, TYPE rhs) { \
701
KMP_DEBUG_ASSERT(__kmp_init_serial); \
702
KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703
704
// ------------------------------------------------------------------------
705
// Lock variables used for critical sections for various size operands
706
#define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707
#define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708
#define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709
#define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710
#define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711
#define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712
#define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713
#define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714
#define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715
#define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716
#define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717
#define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718
#define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719
720
// ------------------------------------------------------------------------
721
// Operation on *lhs, rhs bound by critical section
722
// OP - operator (it's supposed to contain an assignment)
723
// LCK_ID - lock identifier
724
// Note: don't check gtid as it should always be valid
725
// 1, 2-byte - expect valid parameter, other - check before this macro
726
#define OP_CRITICAL(OP, LCK_ID) \
727
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
728
\
729
(*lhs) OP(rhs); \
730
\
731
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732
733
#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
734
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
735
(*lhs) = (TYPE)((*lhs)OP rhs); \
736
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737
738
// ------------------------------------------------------------------------
739
// For GNU compatibility, we may need to use a critical section,
740
// even though it is not required by the ISA.
741
//
742
// On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743
// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744
// critical section. On Intel(R) 64, all atomic operations are done with fetch
745
// and add or compare and exchange. Therefore, the FLAG parameter to this
746
// macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747
// require a critical section, where we predict that they will be implemented
748
// in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749
//
750
// When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751
// the FLAG parameter should always be 1. If we know that we will be using
752
// a critical section, then we want to make certain that we use the generic
753
// lock __kmp_atomic_lock to protect the atomic update, and not of of the
754
// locks that are specialized based upon the size or type of the data.
755
//
756
// If FLAG is 0, then we are relying on dead code elimination by the build
757
// compiler to get rid of the useless block of code, and save a needless
758
// branch at runtime.
759
760
#ifdef KMP_GOMP_COMPAT
761
#define OP_GOMP_CRITICAL(OP, FLAG) \
762
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
763
KMP_CHECK_GTID; \
764
OP_CRITICAL(OP, 0); \
765
return; \
766
}
767
768
#define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
769
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
770
KMP_CHECK_GTID; \
771
OP_UPDATE_CRITICAL(TYPE, OP, 0); \
772
return; \
773
}
774
#else
775
#define OP_GOMP_CRITICAL(OP, FLAG)
776
#define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777
#endif /* KMP_GOMP_COMPAT */
778
779
#if KMP_MIC
780
#define KMP_DO_PAUSE _mm_delay_32(1)
781
#else
782
#define KMP_DO_PAUSE
783
#endif /* KMP_MIC */
784
785
// ------------------------------------------------------------------------
786
// Operation on *lhs, rhs using "compare_and_store" routine
787
// TYPE - operands' type
788
// BITS - size in bits, used to distinguish low level calls
789
// OP - operator
790
#define OP_CMPXCHG(TYPE, BITS, OP) \
791
{ \
792
TYPE old_value, new_value; \
793
old_value = *(TYPE volatile *)lhs; \
794
new_value = (TYPE)(old_value OP rhs); \
795
while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
796
(kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
797
*VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
798
KMP_DO_PAUSE; \
799
\
800
old_value = *(TYPE volatile *)lhs; \
801
new_value = (TYPE)(old_value OP rhs); \
802
} \
803
}
804
805
#if USE_CMPXCHG_FIX
806
// 2007-06-25:
807
// workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808
// and win_32e are affected (I verified the asm). Compiler ignores the volatile
809
// qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810
// compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811
// the workaround.
812
#define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
813
{ \
814
struct _sss { \
815
TYPE cmp; \
816
kmp_int##BITS *vvv; \
817
}; \
818
struct _sss old_value, new_value; \
819
old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
820
new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
821
*old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
822
new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
823
while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
824
(kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
825
*VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
826
KMP_DO_PAUSE; \
827
\
828
*old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
829
new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
830
} \
831
}
832
// end of the first part of the workaround for C78287
833
#endif // USE_CMPXCHG_FIX
834
835
#if KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM)
836
// Undo explicit type casts to get MSVC ARM64 to build. Uses
837
// OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838
#undef OP_CMPXCHG
839
#define OP_CMPXCHG(TYPE, BITS, OP) \
840
{ \
841
struct _sss { \
842
TYPE cmp; \
843
kmp_int##BITS *vvv; \
844
}; \
845
struct _sss old_value, new_value; \
846
old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
847
new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
848
*old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
849
new_value.cmp = old_value.cmp OP rhs; \
850
while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
851
(kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
852
*VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
853
KMP_DO_PAUSE; \
854
\
855
*old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
856
new_value.cmp = old_value.cmp OP rhs; \
857
} \
858
}
859
860
#undef OP_UPDATE_CRITICAL
861
#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
862
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
863
(*lhs) = (*lhs)OP rhs; \
864
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865
866
#endif // KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM)
867
868
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
869
870
// ------------------------------------------------------------------------
871
// X86 or X86_64: no alignment problems ====================================
872
#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
873
GOMP_FLAG) \
874
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
875
OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
876
/* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
877
KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
878
}
879
// -------------------------------------------------------------------------
880
#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
881
GOMP_FLAG) \
882
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
883
OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
884
OP_CMPXCHG(TYPE, BITS, OP) \
885
}
886
#if USE_CMPXCHG_FIX
887
// -------------------------------------------------------------------------
888
// workaround for C78287 (complex(kind=4) data type)
889
#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
890
MASK, GOMP_FLAG) \
891
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
892
OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
893
OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
894
}
895
// end of the second part of the workaround for C78287
896
#endif // USE_CMPXCHG_FIX
897
898
#else
899
// -------------------------------------------------------------------------
900
// Code for other architectures that don't handle unaligned accesses.
901
#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
902
GOMP_FLAG) \
903
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
904
OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
905
if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
906
/* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
907
KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
908
} else { \
909
KMP_CHECK_GTID; \
910
OP_UPDATE_CRITICAL(TYPE, OP, \
911
LCK_ID) /* unaligned address - use critical */ \
912
} \
913
}
914
// -------------------------------------------------------------------------
915
#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
916
GOMP_FLAG) \
917
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
918
OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
919
if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
920
OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
921
} else { \
922
KMP_CHECK_GTID; \
923
OP_UPDATE_CRITICAL(TYPE, OP, \
924
LCK_ID) /* unaligned address - use critical */ \
925
} \
926
}
927
#if USE_CMPXCHG_FIX
928
// -------------------------------------------------------------------------
929
// workaround for C78287 (complex(kind=4) data type)
930
#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
931
MASK, GOMP_FLAG) \
932
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
933
OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
934
if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
935
OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
936
} else { \
937
KMP_CHECK_GTID; \
938
OP_UPDATE_CRITICAL(TYPE, OP, \
939
LCK_ID) /* unaligned address - use critical */ \
940
} \
941
}
942
// end of the second part of the workaround for C78287
943
#endif // USE_CMPXCHG_FIX
944
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945
946
// Routines for ATOMIC 4-byte operands addition and subtraction
947
ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948
0) // __kmpc_atomic_fixed4_add
949
ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950
0) // __kmpc_atomic_fixed4_sub
951
952
ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953
KMP_ARCH_X86) // __kmpc_atomic_float4_add
954
ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955
KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956
957
// Routines for ATOMIC 8-byte operands addition and subtraction
958
ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959
KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960
ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961
KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962
963
ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964
KMP_ARCH_X86) // __kmpc_atomic_float8_add
965
ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966
KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967
968
// ------------------------------------------------------------------------
969
// Entries definition for integer operands
970
// TYPE_ID - operands type and size (fixed4, float4)
971
// OP_ID - operation identifier (add, sub, mul, ...)
972
// TYPE - operand type
973
// BITS - size in bits, used to distinguish low level calls
974
// OP - operator (used in critical section)
975
// LCK_ID - lock identifier, used to possibly distinguish lock variable
976
// MASK - used for alignment check
977
978
// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
979
// ------------------------------------------------------------------------
980
// Routines for ATOMIC integer operands, other operators
981
// ------------------------------------------------------------------------
982
// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
983
ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984
KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985
ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986
0) // __kmpc_atomic_fixed1_andb
987
ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988
KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989
ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991
ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992
KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993
ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994
0) // __kmpc_atomic_fixed1_orb
995
ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996
KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997
ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998
KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999
ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001
ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002
KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003
ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004
0) // __kmpc_atomic_fixed1_xor
1005
ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006
KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007
ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008
0) // __kmpc_atomic_fixed2_andb
1009
ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010
KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011
ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013
ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014
KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015
ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016
0) // __kmpc_atomic_fixed2_orb
1017
ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018
KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019
ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020
KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021
ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023
ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024
KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025
ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026
0) // __kmpc_atomic_fixed2_xor
1027
ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028
0) // __kmpc_atomic_fixed4_andb
1029
ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030
KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031
ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032
KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033
ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034
KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035
ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036
0) // __kmpc_atomic_fixed4_orb
1037
ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038
KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039
ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040
KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041
ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042
KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043
ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044
0) // __kmpc_atomic_fixed4_xor
1045
ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046
KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047
ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048
KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049
ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051
ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052
KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053
ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054
KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055
ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056
KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057
ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058
KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059
ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061
ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062
KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063
ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064
KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065
ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066
KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067
ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068
KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069
ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070
KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071
// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1072
1073
/* ------------------------------------------------------------------------ */
1074
/* Routines for C/C++ Reduction operators && and || */
1075
1076
// ------------------------------------------------------------------------
1077
// Need separate macros for &&, || because there is no combined assignment
1078
// TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079
#define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1080
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1081
OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1082
OP_CRITICAL(= *lhs OP, LCK_ID) \
1083
}
1084
1085
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086
1087
// ------------------------------------------------------------------------
1088
// X86 or X86_64: no alignment problems ===================================
1089
#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1091
OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1092
OP_CMPXCHG(TYPE, BITS, OP) \
1093
}
1094
1095
#else
1096
// ------------------------------------------------------------------------
1097
// Code for other architectures that don't handle unaligned accesses.
1098
#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1100
OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1101
if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1102
OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1103
} else { \
1104
KMP_CHECK_GTID; \
1105
OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1106
} \
1107
}
1108
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109
1110
ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111
KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112
ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113
KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114
ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115
KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116
ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117
KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118
ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119
0) // __kmpc_atomic_fixed4_andl
1120
ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121
0) // __kmpc_atomic_fixed4_orl
1122
ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123
KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124
ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125
KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126
1127
/* ------------------------------------------------------------------------- */
1128
/* Routines for Fortran operators that matched no one in C: */
1129
/* MAX, MIN, .EQV., .NEQV. */
1130
/* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1131
/* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1132
1133
// -------------------------------------------------------------------------
1134
// MIN and MAX need separate macros
1135
// OP - operator to check if we need any actions?
1136
#define MIN_MAX_CRITSECT(OP, LCK_ID) \
1137
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1138
\
1139
if (*lhs OP rhs) { /* still need actions? */ \
1140
*lhs = rhs; \
1141
} \
1142
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143
1144
// -------------------------------------------------------------------------
1145
#ifdef KMP_GOMP_COMPAT
1146
#define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1147
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1148
KMP_CHECK_GTID; \
1149
MIN_MAX_CRITSECT(OP, 0); \
1150
return; \
1151
}
1152
#else
1153
#define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154
#endif /* KMP_GOMP_COMPAT */
1155
1156
// -------------------------------------------------------------------------
1157
#define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1158
{ \
1159
TYPE KMP_ATOMIC_VOLATILE temp_val; \
1160
TYPE old_value; \
1161
temp_val = *lhs; \
1162
old_value = temp_val; \
1163
while (old_value OP rhs && /* still need actions? */ \
1164
!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1165
(kmp_int##BITS *)lhs, \
1166
*VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1167
*VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1168
temp_val = *lhs; \
1169
old_value = temp_val; \
1170
} \
1171
}
1172
1173
// -------------------------------------------------------------------------
1174
// 1-byte, 2-byte operands - use critical section
1175
#define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1176
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1177
if (*lhs OP rhs) { /* need actions? */ \
1178
GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1179
MIN_MAX_CRITSECT(OP, LCK_ID) \
1180
} \
1181
}
1182
1183
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184
1185
// -------------------------------------------------------------------------
1186
// X86 or X86_64: no alignment problems ====================================
1187
#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1188
GOMP_FLAG) \
1189
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1190
if (*lhs OP rhs) { \
1191
GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1192
MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1193
} \
1194
}
1195
1196
#else
1197
// -------------------------------------------------------------------------
1198
// Code for other architectures that don't handle unaligned accesses.
1199
#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1200
GOMP_FLAG) \
1201
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1202
if (*lhs OP rhs) { \
1203
GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1204
if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1205
MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1206
} else { \
1207
KMP_CHECK_GTID; \
1208
MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1209
} \
1210
} \
1211
}
1212
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213
1214
MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215
KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216
MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217
KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218
MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219
KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220
MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221
KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222
MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223
0) // __kmpc_atomic_fixed4_max
1224
MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225
0) // __kmpc_atomic_fixed4_min
1226
MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227
KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228
MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229
KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230
MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231
KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232
MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233
KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234
MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235
KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236
MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237
KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1239
MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
1240
1) // __kmpc_atomic_float10_max
1241
MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
1242
1) // __kmpc_atomic_float10_min
1243
#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1244
#if KMP_HAVE_QUAD
1245
MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1246
1) // __kmpc_atomic_float16_max
1247
MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1248
1) // __kmpc_atomic_float16_min
1249
#if (KMP_ARCH_X86)
1250
MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1251
1) // __kmpc_atomic_float16_max_a16
1252
MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1253
1) // __kmpc_atomic_float16_min_a16
1254
#endif // (KMP_ARCH_X86)
1255
#endif // KMP_HAVE_QUAD
1256
// ------------------------------------------------------------------------
1257
// Need separate macros for .EQV. because of the need of complement (~)
1258
// OP ignored for critical sections, ^=~ used instead
1259
#define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1260
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1261
OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1262
OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
1263
}
1264
1265
// ------------------------------------------------------------------------
1266
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1267
// ------------------------------------------------------------------------
1268
// X86 or X86_64: no alignment problems ===================================
1269
#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1270
GOMP_FLAG) \
1271
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1272
OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1273
OP_CMPXCHG(TYPE, BITS, OP) \
1274
}
1275
// ------------------------------------------------------------------------
1276
#else
1277
// ------------------------------------------------------------------------
1278
// Code for other architectures that don't handle unaligned accesses.
1279
#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1280
GOMP_FLAG) \
1281
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1282
OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
1283
if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1284
OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1285
} else { \
1286
KMP_CHECK_GTID; \
1287
OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
1288
} \
1289
}
1290
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1291
1292
ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1293
KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1294
ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1295
KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1296
ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1297
KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1298
ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1299
KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1300
ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1301
KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1302
ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1303
KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1304
ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1305
KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1306
ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1307
KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1308
1309
// ------------------------------------------------------------------------
1310
// Routines for Extended types: long double, _Quad, complex flavours (use
1311
// critical section)
1312
// TYPE_ID, OP_ID, TYPE - detailed above
1313
// OP - operator
1314
// LCK_ID - lock identifier, used to possibly distinguish lock variable
1315
#define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1316
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1317
OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1318
OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1319
}
1320
1321
/* ------------------------------------------------------------------------- */
1322
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1323
// routines for long double type
1324
ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1325
1) // __kmpc_atomic_float10_add
1326
ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1327
1) // __kmpc_atomic_float10_sub
1328
ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1329
1) // __kmpc_atomic_float10_mul
1330
ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1331
1) // __kmpc_atomic_float10_div
1332
#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1333
#if KMP_HAVE_QUAD
1334
// routines for _Quad type
1335
ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1336
1) // __kmpc_atomic_float16_add
1337
ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1338
1) // __kmpc_atomic_float16_sub
1339
ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1340
1) // __kmpc_atomic_float16_mul
1341
ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1342
1) // __kmpc_atomic_float16_div
1343
#if (KMP_ARCH_X86)
1344
ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1345
1) // __kmpc_atomic_float16_add_a16
1346
ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1347
1) // __kmpc_atomic_float16_sub_a16
1348
ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1349
1) // __kmpc_atomic_float16_mul_a16
1350
ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1351
1) // __kmpc_atomic_float16_div_a16
1352
#endif // (KMP_ARCH_X86)
1353
#endif // KMP_HAVE_QUAD
1354
// routines for complex types
1355
1356
#if USE_CMPXCHG_FIX
1357
// workaround for C78287 (complex(kind=4) data type)
1358
ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1359
1) // __kmpc_atomic_cmplx4_add
1360
ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1361
1) // __kmpc_atomic_cmplx4_sub
1362
ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1363
1) // __kmpc_atomic_cmplx4_mul
1364
ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1365
1) // __kmpc_atomic_cmplx4_div
1366
// end of the workaround for C78287
1367
#else
1368
ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1369
ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1370
ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1371
ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1372
#endif // USE_CMPXCHG_FIX
1373
1374
ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1375
ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1376
ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1377
ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1378
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1379
ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1380
1) // __kmpc_atomic_cmplx10_add
1381
ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1382
1) // __kmpc_atomic_cmplx10_sub
1383
ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1384
1) // __kmpc_atomic_cmplx10_mul
1385
ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1386
1) // __kmpc_atomic_cmplx10_div
1387
#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1388
#if KMP_HAVE_QUAD
1389
ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1390
1) // __kmpc_atomic_cmplx16_add
1391
ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1392
1) // __kmpc_atomic_cmplx16_sub
1393
ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1394
1) // __kmpc_atomic_cmplx16_mul
1395
ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1396
1) // __kmpc_atomic_cmplx16_div
1397
#if (KMP_ARCH_X86)
1398
ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1399
1) // __kmpc_atomic_cmplx16_add_a16
1400
ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1401
1) // __kmpc_atomic_cmplx16_sub_a16
1402
ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1403
1) // __kmpc_atomic_cmplx16_mul_a16
1404
ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1405
1) // __kmpc_atomic_cmplx16_div_a16
1406
#endif // (KMP_ARCH_X86)
1407
#endif // KMP_HAVE_QUAD
1408
1409
// OpenMP 4.0: x = expr binop x for non-commutative operations.
1410
// Supported only on IA-32 architecture and Intel(R) 64
1411
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1412
1413
// ------------------------------------------------------------------------
1414
// Operation on *lhs, rhs bound by critical section
1415
// OP - operator (it's supposed to contain an assignment)
1416
// LCK_ID - lock identifier
1417
// Note: don't check gtid as it should always be valid
1418
// 1, 2-byte - expect valid parameter, other - check before this macro
1419
#define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1420
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1421
\
1422
(*lhs) = (TYPE)((rhs)OP(*lhs)); \
1423
\
1424
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1425
1426
#ifdef KMP_GOMP_COMPAT
1427
#define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
1428
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1429
KMP_CHECK_GTID; \
1430
OP_CRITICAL_REV(TYPE, OP, 0); \
1431
return; \
1432
}
1433
1434
#else
1435
#define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1436
#endif /* KMP_GOMP_COMPAT */
1437
1438
// Beginning of a definition (provides name, parameters, gebug trace)
1439
// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1440
// fixed)
1441
// OP_ID - operation identifier (add, sub, mul, ...)
1442
// TYPE - operands' type
1443
#define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1444
RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1445
TYPE *lhs, TYPE rhs) { \
1446
KMP_DEBUG_ASSERT(__kmp_init_serial); \
1447
KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1448
1449
// ------------------------------------------------------------------------
1450
// Operation on *lhs, rhs using "compare_and_store" routine
1451
// TYPE - operands' type
1452
// BITS - size in bits, used to distinguish low level calls
1453
// OP - operator
1454
// Note: temp_val introduced in order to force the compiler to read
1455
// *lhs only once (w/o it the compiler reads *lhs twice)
1456
#define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1457
{ \
1458
TYPE KMP_ATOMIC_VOLATILE temp_val; \
1459
TYPE old_value, new_value; \
1460
temp_val = *lhs; \
1461
old_value = temp_val; \
1462
new_value = (TYPE)(rhs OP old_value); \
1463
while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1464
(kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1465
*VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1466
KMP_DO_PAUSE; \
1467
\
1468
temp_val = *lhs; \
1469
old_value = temp_val; \
1470
new_value = (TYPE)(rhs OP old_value); \
1471
} \
1472
}
1473
1474
// -------------------------------------------------------------------------
1475
#define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1476
ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1477
OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1478
OP_CMPXCHG_REV(TYPE, BITS, OP) \
1479
}
1480
1481
// ------------------------------------------------------------------------
1482
// Entries definition for integer operands
1483
// TYPE_ID - operands type and size (fixed4, float4)
1484
// OP_ID - operation identifier (add, sub, mul, ...)
1485
// TYPE - operand type
1486
// BITS - size in bits, used to distinguish low level calls
1487
// OP - operator (used in critical section)
1488
// LCK_ID - lock identifier, used to possibly distinguish lock variable
1489
1490
// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1491
// ------------------------------------------------------------------------
1492
// Routines for ATOMIC integer operands, other operators
1493
// ------------------------------------------------------------------------
1494
// TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1495
ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1496
KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1497
ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1498
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1499
ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1500
KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1501
ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1502
KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1503
ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1504
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1505
ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1506
KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1507
1508
ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1509
KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1510
ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1511
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1512
ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1513
KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1514
ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1515
KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1516
ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1517
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1518
ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1519
KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1520
1521
ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1522
KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1523
ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1524
KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1525
ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1526
KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1527
ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1528
KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1529
ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1530
KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1531
ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1532
KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1533
1534
ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1535
KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1536
ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1537
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1538
ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1539
KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1540
ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1541
KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1542
ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1543
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1544
ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1545
KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1546
1547
ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1548
KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1549
ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1550
KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1551
1552
ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1553
KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1554
ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1555
KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1556
// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1557
1558
// ------------------------------------------------------------------------
1559
// Routines for Extended types: long double, _Quad, complex flavours (use
1560
// critical section)
1561
// TYPE_ID, OP_ID, TYPE - detailed above
1562
// OP - operator
1563
// LCK_ID - lock identifier, used to possibly distinguish lock variable
1564
#define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1565
ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1566
OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1567
OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1568
}
1569
1570
/* ------------------------------------------------------------------------- */
1571
// routines for long double type
1572
ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1573
1) // __kmpc_atomic_float10_sub_rev
1574
ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1575
1) // __kmpc_atomic_float10_div_rev
1576
#if KMP_HAVE_QUAD
1577
// routines for _Quad type
1578
ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1579
1) // __kmpc_atomic_float16_sub_rev
1580
ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1581
1) // __kmpc_atomic_float16_div_rev
1582
#if (KMP_ARCH_X86)
1583
ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1584
1) // __kmpc_atomic_float16_sub_a16_rev
1585
ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1586
1) // __kmpc_atomic_float16_div_a16_rev
1587
#endif // KMP_ARCH_X86
1588
#endif // KMP_HAVE_QUAD
1589
1590
// routines for complex types
1591
ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1592
1) // __kmpc_atomic_cmplx4_sub_rev
1593
ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1594
1) // __kmpc_atomic_cmplx4_div_rev
1595
ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1596
1) // __kmpc_atomic_cmplx8_sub_rev
1597
ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1598
1) // __kmpc_atomic_cmplx8_div_rev
1599
ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1600
1) // __kmpc_atomic_cmplx10_sub_rev
1601
ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1602
1) // __kmpc_atomic_cmplx10_div_rev
1603
#if KMP_HAVE_QUAD
1604
ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1605
1) // __kmpc_atomic_cmplx16_sub_rev
1606
ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1607
1) // __kmpc_atomic_cmplx16_div_rev
1608
#if (KMP_ARCH_X86)
1609
ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1610
1) // __kmpc_atomic_cmplx16_sub_a16_rev
1611
ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1612
1) // __kmpc_atomic_cmplx16_div_a16_rev
1613
#endif // KMP_ARCH_X86
1614
#endif // KMP_HAVE_QUAD
1615
1616
#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1617
// End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1618
1619
/* ------------------------------------------------------------------------ */
1620
/* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1621
/* Note: in order to reduce the total number of types combinations */
1622
/* it is supposed that compiler converts RHS to longest floating type,*/
1623
/* that is _Quad, before call to any of these routines */
1624
/* Conversion to _Quad will be done by the compiler during calculation, */
1625
/* conversion back to TYPE - before the assignment, like: */
1626
/* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1627
/* Performance penalty expected because of SW emulation use */
1628
/* ------------------------------------------------------------------------ */
1629
1630
#define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1631
void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1632
ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1633
KMP_DEBUG_ASSERT(__kmp_init_serial); \
1634
KA_TRACE(100, \
1635
("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1636
gtid));
1637
1638
// -------------------------------------------------------------------------
1639
#define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1640
GOMP_FLAG) \
1641
ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1642
OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1643
OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1644
}
1645
1646
// -------------------------------------------------------------------------
1647
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1648
// -------------------------------------------------------------------------
1649
// X86 or X86_64: no alignment problems ====================================
1650
#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1651
LCK_ID, MASK, GOMP_FLAG) \
1652
ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1653
OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1654
OP_CMPXCHG(TYPE, BITS, OP) \
1655
}
1656
// -------------------------------------------------------------------------
1657
#else
1658
// ------------------------------------------------------------------------
1659
// Code for other architectures that don't handle unaligned accesses.
1660
#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1661
LCK_ID, MASK, GOMP_FLAG) \
1662
ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1663
OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1664
if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1665
OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1666
} else { \
1667
KMP_CHECK_GTID; \
1668
OP_UPDATE_CRITICAL(TYPE, OP, \
1669
LCK_ID) /* unaligned address - use critical */ \
1670
} \
1671
}
1672
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1673
1674
// -------------------------------------------------------------------------
1675
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1676
// -------------------------------------------------------------------------
1677
#define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1678
RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1679
ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1680
OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1681
OP_CMPXCHG_REV(TYPE, BITS, OP) \
1682
}
1683
#define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1684
LCK_ID, GOMP_FLAG) \
1685
ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1686
OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1687
OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1688
}
1689
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1690
1691
// RHS=float8
1692
ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1693
KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1694
ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1695
KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1696
ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1697
KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1698
ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1699
KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1700
ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1701
0) // __kmpc_atomic_fixed4_mul_float8
1702
ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1703
0) // __kmpc_atomic_fixed4_div_float8
1704
ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1705
KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1706
ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1707
KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1708
ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1709
KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1710
ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1711
KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1712
ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1713
KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1714
ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1715
KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1716
1717
// RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1718
// use them)
1719
#if KMP_HAVE_QUAD
1720
ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1721
KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1722
ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1723
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1724
ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1725
KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1726
ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1727
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1728
ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1729
KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1730
ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1731
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1732
ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1733
KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1734
ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1735
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1736
1737
ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1738
KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1739
ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1740
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1741
ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1742
KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1743
ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1744
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1745
ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1746
KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1747
ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1748
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1749
ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1750
KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1751
ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1752
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1753
1754
ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1755
0) // __kmpc_atomic_fixed4_add_fp
1756
ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1757
0) // __kmpc_atomic_fixed4u_add_fp
1758
ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1759
0) // __kmpc_atomic_fixed4_sub_fp
1760
ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1761
0) // __kmpc_atomic_fixed4u_sub_fp
1762
ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1763
0) // __kmpc_atomic_fixed4_mul_fp
1764
ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1765
0) // __kmpc_atomic_fixed4u_mul_fp
1766
ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1767
0) // __kmpc_atomic_fixed4_div_fp
1768
ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1769
0) // __kmpc_atomic_fixed4u_div_fp
1770
1771
ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1772
KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1773
ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1774
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1775
ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1776
KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1777
ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1778
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1779
ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1780
KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1781
ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1782
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1783
ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1784
KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1785
ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1786
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1787
1788
ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1789
KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1790
ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1791
KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1792
ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1793
KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1794
ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1795
KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1796
1797
ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1798
KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1799
ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1800
KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1801
ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1802
KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1803
ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1804
KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1805
1806
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1807
ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1808
1) // __kmpc_atomic_float10_add_fp
1809
ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1810
1) // __kmpc_atomic_float10_sub_fp
1811
ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1812
1) // __kmpc_atomic_float10_mul_fp
1813
ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1814
1) // __kmpc_atomic_float10_div_fp
1815
1816
// Reverse operations
1817
ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1818
KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1819
ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1820
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1821
ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1822
KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1823
ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1824
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1825
1826
ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1827
KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1828
ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1829
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1830
ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1831
KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1832
ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1833
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1834
1835
ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1836
0) // __kmpc_atomic_fixed4_sub_rev_fp
1837
ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1838
0) // __kmpc_atomic_fixed4u_sub_rev_fp
1839
ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1840
0) // __kmpc_atomic_fixed4_div_rev_fp
1841
ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1842
0) // __kmpc_atomic_fixed4u_div_rev_fp
1843
1844
ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1845
KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1846
ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1847
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1848
ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1849
KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1850
ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1851
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1852
1853
ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1854
KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1855
ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1856
KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1857
1858
ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1859
KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1860
ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1861
KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1862
1863
ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1864
1) // __kmpc_atomic_float10_sub_rev_fp
1865
ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1866
1) // __kmpc_atomic_float10_div_rev_fp
1867
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1868
1869
#endif // KMP_HAVE_QUAD
1870
1871
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1872
// ------------------------------------------------------------------------
1873
// X86 or X86_64: no alignment problems ====================================
1874
#if USE_CMPXCHG_FIX
1875
// workaround for C78287 (complex(kind=4) data type)
1876
#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1877
LCK_ID, MASK, GOMP_FLAG) \
1878
ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1879
OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1880
OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1881
}
1882
// end of the second part of the workaround for C78287
1883
#else
1884
#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1885
LCK_ID, MASK, GOMP_FLAG) \
1886
ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1887
OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1888
OP_CMPXCHG(TYPE, BITS, OP) \
1889
}
1890
#endif // USE_CMPXCHG_FIX
1891
#else
1892
// ------------------------------------------------------------------------
1893
// Code for other architectures that don't handle unaligned accesses.
1894
#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1895
LCK_ID, MASK, GOMP_FLAG) \
1896
ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1897
OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1898
if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1899
OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1900
} else { \
1901
KMP_CHECK_GTID; \
1902
OP_UPDATE_CRITICAL(TYPE, OP, \
1903
LCK_ID) /* unaligned address - use critical */ \
1904
} \
1905
}
1906
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1907
1908
ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1909
7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1910
ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1911
7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1912
ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1913
7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1914
ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1915
7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1916
1917
// READ, WRITE, CAPTURE
1918
1919
// ------------------------------------------------------------------------
1920
// Atomic READ routines
1921
1922
// ------------------------------------------------------------------------
1923
// Beginning of a definition (provides name, parameters, gebug trace)
1924
// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1925
// fixed)
1926
// OP_ID - operation identifier (add, sub, mul, ...)
1927
// TYPE - operands' type
1928
#define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1929
RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1930
TYPE *loc) { \
1931
KMP_DEBUG_ASSERT(__kmp_init_serial); \
1932
KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1933
1934
// ------------------------------------------------------------------------
1935
// Operation on *lhs, rhs using "compare_and_store_ret" routine
1936
// TYPE - operands' type
1937
// BITS - size in bits, used to distinguish low level calls
1938
// OP - operator
1939
// Note: temp_val introduced in order to force the compiler to read
1940
// *lhs only once (w/o it the compiler reads *lhs twice)
1941
// TODO: check if it is still necessary
1942
// Return old value regardless of the result of "compare & swap# operation
1943
#define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1944
{ \
1945
TYPE KMP_ATOMIC_VOLATILE temp_val; \
1946
union f_i_union { \
1947
TYPE f_val; \
1948
kmp_int##BITS i_val; \
1949
}; \
1950
union f_i_union old_value; \
1951
temp_val = *loc; \
1952
old_value.f_val = temp_val; \
1953
old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1954
(kmp_int##BITS *)loc, \
1955
*VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1956
*VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1957
new_value = old_value.f_val; \
1958
return new_value; \
1959
}
1960
1961
// -------------------------------------------------------------------------
1962
// Operation on *lhs, rhs bound by critical section
1963
// OP - operator (it's supposed to contain an assignment)
1964
// LCK_ID - lock identifier
1965
// Note: don't check gtid as it should always be valid
1966
// 1, 2-byte - expect valid parameter, other - check before this macro
1967
#define OP_CRITICAL_READ(OP, LCK_ID) \
1968
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1969
\
1970
new_value = (*loc); \
1971
\
1972
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1973
1974
// -------------------------------------------------------------------------
1975
#ifdef KMP_GOMP_COMPAT
1976
#define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1977
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1978
KMP_CHECK_GTID; \
1979
OP_CRITICAL_READ(OP, 0); \
1980
return new_value; \
1981
}
1982
#else
1983
#define OP_GOMP_CRITICAL_READ(OP, FLAG)
1984
#endif /* KMP_GOMP_COMPAT */
1985
1986
// -------------------------------------------------------------------------
1987
#define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1988
ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1989
TYPE new_value; \
1990
OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1991
new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1992
return new_value; \
1993
}
1994
// -------------------------------------------------------------------------
1995
#define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1996
ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1997
TYPE new_value; \
1998
OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1999
OP_CMPXCHG_READ(TYPE, BITS, OP) \
2000
}
2001
// ------------------------------------------------------------------------
2002
// Routines for Extended types: long double, _Quad, complex flavours (use
2003
// critical section)
2004
// TYPE_ID, OP_ID, TYPE - detailed above
2005
// OP - operator
2006
// LCK_ID - lock identifier, used to possibly distinguish lock variable
2007
#define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2008
ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
2009
TYPE new_value; \
2010
OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
2011
OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
2012
return new_value; \
2013
}
2014
2015
// ------------------------------------------------------------------------
2016
// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2017
// value doesn't work.
2018
// Let's return the read value through the additional parameter.
2019
#if (KMP_OS_WINDOWS)
2020
2021
#define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
2022
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2023
\
2024
(*out) = (*loc); \
2025
\
2026
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2027
// ------------------------------------------------------------------------
2028
#ifdef KMP_GOMP_COMPAT
2029
#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
2030
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2031
KMP_CHECK_GTID; \
2032
OP_CRITICAL_READ_WRK(OP, 0); \
2033
}
2034
#else
2035
#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2036
#endif /* KMP_GOMP_COMPAT */
2037
// ------------------------------------------------------------------------
2038
#define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2039
void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2040
TYPE *loc) { \
2041
KMP_DEBUG_ASSERT(__kmp_init_serial); \
2042
KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2043
2044
// ------------------------------------------------------------------------
2045
#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2046
ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2047
OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
2048
OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
2049
}
2050
2051
#endif // KMP_OS_WINDOWS
2052
2053
// ------------------------------------------------------------------------
2054
// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2055
ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2056
ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2057
KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2058
ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2059
KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2060
ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2061
KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2062
2063
// !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2064
ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2065
KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2066
ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2067
KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2068
2069
ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2070
1) // __kmpc_atomic_float10_rd
2071
#if KMP_HAVE_QUAD
2072
ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2073
1) // __kmpc_atomic_float16_rd
2074
#endif // KMP_HAVE_QUAD
2075
2076
// Fix for CQ220361 on Windows* OS
2077
#if (KMP_OS_WINDOWS)
2078
ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2079
1) // __kmpc_atomic_cmplx4_rd
2080
#else
2081
ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2082
1) // __kmpc_atomic_cmplx4_rd
2083
#endif // (KMP_OS_WINDOWS)
2084
ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2085
1) // __kmpc_atomic_cmplx8_rd
2086
ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2087
1) // __kmpc_atomic_cmplx10_rd
2088
#if KMP_HAVE_QUAD
2089
ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2090
1) // __kmpc_atomic_cmplx16_rd
2091
#if (KMP_ARCH_X86)
2092
ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2093
1) // __kmpc_atomic_float16_a16_rd
2094
ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2095
1) // __kmpc_atomic_cmplx16_a16_rd
2096
#endif // (KMP_ARCH_X86)
2097
#endif // KMP_HAVE_QUAD
2098
2099
// ------------------------------------------------------------------------
2100
// Atomic WRITE routines
2101
2102
#define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2103
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2104
OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2105
KMP_XCHG_FIXED##BITS(lhs, rhs); \
2106
}
2107
// ------------------------------------------------------------------------
2108
#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2109
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2110
OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2111
KMP_XCHG_REAL##BITS(lhs, rhs); \
2112
}
2113
2114
// ------------------------------------------------------------------------
2115
// Operation on *lhs, rhs using "compare_and_store" routine
2116
// TYPE - operands' type
2117
// BITS - size in bits, used to distinguish low level calls
2118
// OP - operator
2119
// Note: temp_val introduced in order to force the compiler to read
2120
// *lhs only once (w/o it the compiler reads *lhs twice)
2121
#define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2122
{ \
2123
TYPE KMP_ATOMIC_VOLATILE temp_val; \
2124
TYPE old_value, new_value; \
2125
temp_val = *lhs; \
2126
old_value = temp_val; \
2127
new_value = rhs; \
2128
while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2129
(kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2130
*VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2131
temp_val = *lhs; \
2132
old_value = temp_val; \
2133
new_value = rhs; \
2134
} \
2135
}
2136
2137
// -------------------------------------------------------------------------
2138
#define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2139
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2140
OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2141
OP_CMPXCHG_WR(TYPE, BITS, OP) \
2142
}
2143
2144
// ------------------------------------------------------------------------
2145
// Routines for Extended types: long double, _Quad, complex flavours (use
2146
// critical section)
2147
// TYPE_ID, OP_ID, TYPE - detailed above
2148
// OP - operator
2149
// LCK_ID - lock identifier, used to possibly distinguish lock variable
2150
#define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2151
ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2152
OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2153
OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2154
}
2155
// -------------------------------------------------------------------------
2156
2157
ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2158
KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2159
ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2160
KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2161
ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2162
KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2163
#if (KMP_ARCH_X86)
2164
ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2165
KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2166
#else
2167
ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2168
KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2169
#endif // (KMP_ARCH_X86)
2170
2171
ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2172
KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2173
#if (KMP_ARCH_X86)
2174
ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2175
KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2176
#else
2177
ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2178
KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2179
#endif // (KMP_ARCH_X86)
2180
2181
ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2182
1) // __kmpc_atomic_float10_wr
2183
#if KMP_HAVE_QUAD
2184
ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2185
1) // __kmpc_atomic_float16_wr
2186
#endif // KMP_HAVE_QUAD
2187
ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2188
ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2189
1) // __kmpc_atomic_cmplx8_wr
2190
ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2191
1) // __kmpc_atomic_cmplx10_wr
2192
#if KMP_HAVE_QUAD
2193
ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2194
1) // __kmpc_atomic_cmplx16_wr
2195
#if (KMP_ARCH_X86)
2196
ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2197
1) // __kmpc_atomic_float16_a16_wr
2198
ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2199
1) // __kmpc_atomic_cmplx16_a16_wr
2200
#endif // (KMP_ARCH_X86)
2201
#endif // KMP_HAVE_QUAD
2202
2203
// ------------------------------------------------------------------------
2204
// Atomic CAPTURE routines
2205
2206
// Beginning of a definition (provides name, parameters, gebug trace)
2207
// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2208
// fixed)
2209
// OP_ID - operation identifier (add, sub, mul, ...)
2210
// TYPE - operands' type
2211
#define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2212
RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2213
TYPE *lhs, TYPE rhs, int flag) { \
2214
KMP_DEBUG_ASSERT(__kmp_init_serial); \
2215
KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2216
2217
// -------------------------------------------------------------------------
2218
// Operation on *lhs, rhs bound by critical section
2219
// OP - operator (it's supposed to contain an assignment)
2220
// LCK_ID - lock identifier
2221
// Note: don't check gtid as it should always be valid
2222
// 1, 2-byte - expect valid parameter, other - check before this macro
2223
#define OP_CRITICAL_CPT(OP, LCK_ID) \
2224
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2225
\
2226
if (flag) { \
2227
(*lhs) OP rhs; \
2228
new_value = (*lhs); \
2229
} else { \
2230
new_value = (*lhs); \
2231
(*lhs) OP rhs; \
2232
} \
2233
\
2234
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2235
return new_value;
2236
2237
#define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
2238
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2239
\
2240
if (flag) { \
2241
(*lhs) = (TYPE)((*lhs)OP rhs); \
2242
new_value = (*lhs); \
2243
} else { \
2244
new_value = (*lhs); \
2245
(*lhs) = (TYPE)((*lhs)OP rhs); \
2246
} \
2247
\
2248
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2249
return new_value;
2250
2251
// ------------------------------------------------------------------------
2252
#ifdef KMP_GOMP_COMPAT
2253
#define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
2254
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2255
KMP_CHECK_GTID; \
2256
OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
2257
}
2258
#else
2259
#define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2260
#endif /* KMP_GOMP_COMPAT */
2261
2262
// ------------------------------------------------------------------------
2263
// Operation on *lhs, rhs using "compare_and_store" routine
2264
// TYPE - operands' type
2265
// BITS - size in bits, used to distinguish low level calls
2266
// OP - operator
2267
// Note: temp_val introduced in order to force the compiler to read
2268
// *lhs only once (w/o it the compiler reads *lhs twice)
2269
#define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2270
{ \
2271
TYPE KMP_ATOMIC_VOLATILE temp_val; \
2272
TYPE old_value, new_value; \
2273
temp_val = *lhs; \
2274
old_value = temp_val; \
2275
new_value = (TYPE)(old_value OP rhs); \
2276
while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2277
(kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2278
*VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2279
temp_val = *lhs; \
2280
old_value = temp_val; \
2281
new_value = (TYPE)(old_value OP rhs); \
2282
} \
2283
if (flag) { \
2284
return new_value; \
2285
} else \
2286
return old_value; \
2287
}
2288
2289
// -------------------------------------------------------------------------
2290
#define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2291
ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2292
TYPE new_value; \
2293
(void)new_value; \
2294
OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2295
OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2296
}
2297
2298
// -------------------------------------------------------------------------
2299
#define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2300
ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2301
TYPE old_value, new_value; \
2302
(void)new_value; \
2303
OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2304
/* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2305
old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2306
if (flag) { \
2307
return old_value OP rhs; \
2308
} else \
2309
return old_value; \
2310
}
2311
// -------------------------------------------------------------------------
2312
2313
ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2314
0) // __kmpc_atomic_fixed4_add_cpt
2315
ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2316
0) // __kmpc_atomic_fixed4_sub_cpt
2317
ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2318
KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2319
ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2320
KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2321
2322
ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2323
KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2324
ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2325
KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2326
ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2327
KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2328
ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2329
KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2330
2331
// ------------------------------------------------------------------------
2332
// Entries definition for integer operands
2333
// TYPE_ID - operands type and size (fixed4, float4)
2334
// OP_ID - operation identifier (add, sub, mul, ...)
2335
// TYPE - operand type
2336
// BITS - size in bits, used to distinguish low level calls
2337
// OP - operator (used in critical section)
2338
// TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2339
// ------------------------------------------------------------------------
2340
// Routines for ATOMIC integer operands, other operators
2341
// ------------------------------------------------------------------------
2342
// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2343
ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2344
KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2345
ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2346
0) // __kmpc_atomic_fixed1_andb_cpt
2347
ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2348
KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2349
ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2350
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2351
ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2352
KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2353
ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2354
0) // __kmpc_atomic_fixed1_orb_cpt
2355
ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2356
KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2357
ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2358
KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2359
ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2360
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2361
ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2362
KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2363
ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2364
0) // __kmpc_atomic_fixed1_xor_cpt
2365
ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2366
KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2367
ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2368
0) // __kmpc_atomic_fixed2_andb_cpt
2369
ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2370
KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2371
ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2372
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2373
ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2374
KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2375
ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2376
0) // __kmpc_atomic_fixed2_orb_cpt
2377
ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2378
KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2379
ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2380
KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2381
ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2382
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2383
ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2384
KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2385
ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2386
0) // __kmpc_atomic_fixed2_xor_cpt
2387
ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2388
0) // __kmpc_atomic_fixed4_andb_cpt
2389
ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2390
KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2391
ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2392
KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2393
ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2394
KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2395
ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2396
0) // __kmpc_atomic_fixed4_orb_cpt
2397
ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2398
KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2399
ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2400
KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2401
ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2402
KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2403
ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2404
0) // __kmpc_atomic_fixed4_xor_cpt
2405
ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2406
KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2407
ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2408
KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2409
ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2410
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2411
ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2412
KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2413
ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2414
KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2415
ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2416
KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2417
ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2418
KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2419
ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2420
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2421
ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2422
KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2423
ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2424
KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2425
ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2426
KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2427
ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2428
KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2429
ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2430
KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2431
// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2432
2433
// CAPTURE routines for mixed types RHS=float16
2434
#if KMP_HAVE_QUAD
2435
2436
// Beginning of a definition (provides name, parameters, gebug trace)
2437
// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2438
// fixed)
2439
// OP_ID - operation identifier (add, sub, mul, ...)
2440
// TYPE - operands' type
2441
#define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2442
TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2443
ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2444
KMP_DEBUG_ASSERT(__kmp_init_serial); \
2445
KA_TRACE(100, \
2446
("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2447
gtid));
2448
2449
// -------------------------------------------------------------------------
2450
#define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2451
RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2452
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2453
TYPE new_value; \
2454
(void)new_value; \
2455
OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2456
OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2457
}
2458
2459
// -------------------------------------------------------------------------
2460
#define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2461
LCK_ID, GOMP_FLAG) \
2462
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2463
TYPE new_value; \
2464
(void)new_value; \
2465
OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2466
OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2467
}
2468
2469
ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2470
KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2471
ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2472
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2473
ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2474
KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2475
ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2476
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2477
ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2478
KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2479
ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2480
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2481
ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2482
KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2483
ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2484
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2485
2486
ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2487
KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2488
ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2489
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2490
ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2491
KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2492
ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2493
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2494
ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2495
KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2496
ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2497
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2498
ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2499
KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2500
ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2501
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2502
2503
ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2504
0) // __kmpc_atomic_fixed4_add_cpt_fp
2505
ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2506
0) // __kmpc_atomic_fixed4u_add_cpt_fp
2507
ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2508
0) // __kmpc_atomic_fixed4_sub_cpt_fp
2509
ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2510
0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2511
ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2512
0) // __kmpc_atomic_fixed4_mul_cpt_fp
2513
ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2514
0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2515
ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2516
0) // __kmpc_atomic_fixed4_div_cpt_fp
2517
ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2518
0) // __kmpc_atomic_fixed4u_div_cpt_fp
2519
2520
ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2521
KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2522
ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2523
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2524
ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2525
KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2526
ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2527
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2528
ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2529
KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2530
ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2531
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2532
ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2533
KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2534
ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2535
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2536
2537
ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2538
KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2539
ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2540
KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2541
ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2542
KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2543
ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2544
KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2545
2546
ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2547
KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2548
ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2549
KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2550
ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2551
KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2552
ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2553
KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2554
2555
ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2556
1) // __kmpc_atomic_float10_add_cpt_fp
2557
ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2558
1) // __kmpc_atomic_float10_sub_cpt_fp
2559
ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2560
1) // __kmpc_atomic_float10_mul_cpt_fp
2561
ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2562
1) // __kmpc_atomic_float10_div_cpt_fp
2563
2564
#endif // KMP_HAVE_QUAD
2565
2566
// ------------------------------------------------------------------------
2567
// Routines for C/C++ Reduction operators && and ||
2568
2569
// -------------------------------------------------------------------------
2570
// Operation on *lhs, rhs bound by critical section
2571
// OP - operator (it's supposed to contain an assignment)
2572
// LCK_ID - lock identifier
2573
// Note: don't check gtid as it should always be valid
2574
// 1, 2-byte - expect valid parameter, other - check before this macro
2575
#define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2576
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2577
\
2578
if (flag) { \
2579
new_value OP rhs; \
2580
(*lhs) = new_value; \
2581
} else { \
2582
new_value = (*lhs); \
2583
(*lhs) OP rhs; \
2584
} \
2585
\
2586
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2587
2588
// ------------------------------------------------------------------------
2589
#ifdef KMP_GOMP_COMPAT
2590
#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2591
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2592
KMP_CHECK_GTID; \
2593
OP_CRITICAL_L_CPT(OP, 0); \
2594
return new_value; \
2595
}
2596
#else
2597
#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2598
#endif /* KMP_GOMP_COMPAT */
2599
2600
// ------------------------------------------------------------------------
2601
// Need separate macros for &&, || because there is no combined assignment
2602
#define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2603
ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2604
TYPE new_value; \
2605
(void)new_value; \
2606
OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2607
OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2608
}
2609
2610
ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2611
KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2612
ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2613
KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2614
ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2615
KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2616
ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2617
KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2618
ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2619
0) // __kmpc_atomic_fixed4_andl_cpt
2620
ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2621
0) // __kmpc_atomic_fixed4_orl_cpt
2622
ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2623
KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2624
ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2625
KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2626
2627
// -------------------------------------------------------------------------
2628
// Routines for Fortran operators that matched no one in C:
2629
// MAX, MIN, .EQV., .NEQV.
2630
// Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2631
// Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2632
2633
// -------------------------------------------------------------------------
2634
// MIN and MAX need separate macros
2635
// OP - operator to check if we need any actions?
2636
#define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2637
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2638
\
2639
if (*lhs OP rhs) { /* still need actions? */ \
2640
old_value = *lhs; \
2641
*lhs = rhs; \
2642
if (flag) \
2643
new_value = rhs; \
2644
else \
2645
new_value = old_value; \
2646
} else { \
2647
new_value = *lhs; \
2648
} \
2649
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2650
return new_value;
2651
2652
// -------------------------------------------------------------------------
2653
#ifdef KMP_GOMP_COMPAT
2654
#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2655
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2656
KMP_CHECK_GTID; \
2657
MIN_MAX_CRITSECT_CPT(OP, 0); \
2658
}
2659
#else
2660
#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2661
#endif /* KMP_GOMP_COMPAT */
2662
2663
// -------------------------------------------------------------------------
2664
#define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2665
{ \
2666
TYPE KMP_ATOMIC_VOLATILE temp_val; \
2667
/*TYPE old_value; */ \
2668
temp_val = *lhs; \
2669
old_value = temp_val; \
2670
while (old_value OP rhs && /* still need actions? */ \
2671
!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2672
(kmp_int##BITS *)lhs, \
2673
*VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2674
*VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2675
temp_val = *lhs; \
2676
old_value = temp_val; \
2677
} \
2678
if (flag) \
2679
return rhs; \
2680
else \
2681
return old_value; \
2682
}
2683
2684
// -------------------------------------------------------------------------
2685
// 1-byte, 2-byte operands - use critical section
2686
#define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2687
ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2688
TYPE new_value, old_value; \
2689
if (*lhs OP rhs) { /* need actions? */ \
2690
GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2691
MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2692
} \
2693
return *lhs; \
2694
}
2695
2696
#define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2697
ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2698
TYPE new_value, old_value; \
2699
(void)new_value; \
2700
if (*lhs OP rhs) { \
2701
GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2702
MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2703
} \
2704
return *lhs; \
2705
}
2706
2707
MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2708
KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2709
MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2710
KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2711
MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2712
KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2713
MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2714
KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2715
MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2716
0) // __kmpc_atomic_fixed4_max_cpt
2717
MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2718
0) // __kmpc_atomic_fixed4_min_cpt
2719
MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2720
KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2721
MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2722
KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2723
MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2724
KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2725
MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2726
KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2727
MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2728
KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2729
MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2730
KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2731
MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r,
2732
1) // __kmpc_atomic_float10_max_cpt
2733
MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r,
2734
1) // __kmpc_atomic_float10_min_cpt
2735
#if KMP_HAVE_QUAD
2736
MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2737
1) // __kmpc_atomic_float16_max_cpt
2738
MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2739
1) // __kmpc_atomic_float16_min_cpt
2740
#if (KMP_ARCH_X86)
2741
MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2742
1) // __kmpc_atomic_float16_max_a16_cpt
2743
MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2744
1) // __kmpc_atomic_float16_mix_a16_cpt
2745
#endif // (KMP_ARCH_X86)
2746
#endif // KMP_HAVE_QUAD
2747
2748
// ------------------------------------------------------------------------
2749
#ifdef KMP_GOMP_COMPAT
2750
#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2751
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2752
KMP_CHECK_GTID; \
2753
OP_CRITICAL_CPT(OP, 0); \
2754
}
2755
#else
2756
#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2757
#endif /* KMP_GOMP_COMPAT */
2758
// ------------------------------------------------------------------------
2759
#define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2760
ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2761
TYPE new_value; \
2762
(void)new_value; \
2763
OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
2764
OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2765
}
2766
2767
// ------------------------------------------------------------------------
2768
2769
ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2770
KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2771
ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2772
KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2773
ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2774
KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2775
ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2776
KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2777
ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2778
KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2779
ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2780
KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2781
ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2782
KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2783
ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2784
KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2785
2786
// ------------------------------------------------------------------------
2787
// Routines for Extended types: long double, _Quad, complex flavours (use
2788
// critical section)
2789
// TYPE_ID, OP_ID, TYPE - detailed above
2790
// OP - operator
2791
// LCK_ID - lock identifier, used to possibly distinguish lock variable
2792
#define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2793
ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2794
TYPE new_value; \
2795
OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2796
OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2797
}
2798
2799
// ------------------------------------------------------------------------
2800
// Workaround for cmplx4. Regular routines with return value don't work
2801
// on Win_32e. Let's return captured values through the additional parameter.
2802
#define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2803
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2804
\
2805
if (flag) { \
2806
(*lhs) OP rhs; \
2807
(*out) = (*lhs); \
2808
} else { \
2809
(*out) = (*lhs); \
2810
(*lhs) OP rhs; \
2811
} \
2812
\
2813
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2814
return;
2815
// ------------------------------------------------------------------------
2816
2817
#ifdef KMP_GOMP_COMPAT
2818
#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2819
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2820
KMP_CHECK_GTID; \
2821
OP_CRITICAL_CPT_WRK(OP## =, 0); \
2822
}
2823
#else
2824
#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2825
#endif /* KMP_GOMP_COMPAT */
2826
// ------------------------------------------------------------------------
2827
2828
#define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2829
void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2830
TYPE rhs, TYPE *out, int flag) { \
2831
KMP_DEBUG_ASSERT(__kmp_init_serial); \
2832
KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2833
// ------------------------------------------------------------------------
2834
2835
#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2836
ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2837
OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2838
OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2839
}
2840
// The end of workaround for cmplx4
2841
2842
/* ------------------------------------------------------------------------- */
2843
// routines for long double type
2844
ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2845
1) // __kmpc_atomic_float10_add_cpt
2846
ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2847
1) // __kmpc_atomic_float10_sub_cpt
2848
ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2849
1) // __kmpc_atomic_float10_mul_cpt
2850
ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2851
1) // __kmpc_atomic_float10_div_cpt
2852
#if KMP_HAVE_QUAD
2853
// routines for _Quad type
2854
ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2855
1) // __kmpc_atomic_float16_add_cpt
2856
ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2857
1) // __kmpc_atomic_float16_sub_cpt
2858
ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2859
1) // __kmpc_atomic_float16_mul_cpt
2860
ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2861
1) // __kmpc_atomic_float16_div_cpt
2862
#if (KMP_ARCH_X86)
2863
ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2864
1) // __kmpc_atomic_float16_add_a16_cpt
2865
ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2866
1) // __kmpc_atomic_float16_sub_a16_cpt
2867
ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2868
1) // __kmpc_atomic_float16_mul_a16_cpt
2869
ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2870
1) // __kmpc_atomic_float16_div_a16_cpt
2871
#endif // (KMP_ARCH_X86)
2872
#endif // KMP_HAVE_QUAD
2873
2874
// routines for complex types
2875
2876
// cmplx4 routines to return void
2877
ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2878
1) // __kmpc_atomic_cmplx4_add_cpt
2879
ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2880
1) // __kmpc_atomic_cmplx4_sub_cpt
2881
ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2882
1) // __kmpc_atomic_cmplx4_mul_cpt
2883
ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2884
1) // __kmpc_atomic_cmplx4_div_cpt
2885
2886
ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2887
1) // __kmpc_atomic_cmplx8_add_cpt
2888
ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2889
1) // __kmpc_atomic_cmplx8_sub_cpt
2890
ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2891
1) // __kmpc_atomic_cmplx8_mul_cpt
2892
ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2893
1) // __kmpc_atomic_cmplx8_div_cpt
2894
ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2895
1) // __kmpc_atomic_cmplx10_add_cpt
2896
ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2897
1) // __kmpc_atomic_cmplx10_sub_cpt
2898
ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2899
1) // __kmpc_atomic_cmplx10_mul_cpt
2900
ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2901
1) // __kmpc_atomic_cmplx10_div_cpt
2902
#if KMP_HAVE_QUAD
2903
ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2904
1) // __kmpc_atomic_cmplx16_add_cpt
2905
ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2906
1) // __kmpc_atomic_cmplx16_sub_cpt
2907
ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2908
1) // __kmpc_atomic_cmplx16_mul_cpt
2909
ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2910
1) // __kmpc_atomic_cmplx16_div_cpt
2911
#if (KMP_ARCH_X86)
2912
ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2913
1) // __kmpc_atomic_cmplx16_add_a16_cpt
2914
ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2915
1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2916
ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2917
1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2918
ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2919
1) // __kmpc_atomic_cmplx16_div_a16_cpt
2920
#endif // (KMP_ARCH_X86)
2921
#endif // KMP_HAVE_QUAD
2922
2923
// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2924
// binop x; v = x; } for non-commutative operations.
2925
// Supported only on IA-32 architecture and Intel(R) 64
2926
2927
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2928
// -------------------------------------------------------------------------
2929
// Operation on *lhs, rhs bound by critical section
2930
// OP - operator (it's supposed to contain an assignment)
2931
// LCK_ID - lock identifier
2932
// Note: don't check gtid as it should always be valid
2933
// 1, 2-byte - expect valid parameter, other - check before this macro
2934
#define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
2935
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2936
\
2937
if (flag) { \
2938
/*temp_val = (*lhs);*/ \
2939
(*lhs) = (TYPE)((rhs)OP(*lhs)); \
2940
new_value = (*lhs); \
2941
} else { \
2942
new_value = (*lhs); \
2943
(*lhs) = (TYPE)((rhs)OP(*lhs)); \
2944
} \
2945
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2946
return new_value;
2947
2948
// ------------------------------------------------------------------------
2949
#ifdef KMP_GOMP_COMPAT
2950
#define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
2951
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2952
KMP_CHECK_GTID; \
2953
OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
2954
}
2955
#else
2956
#define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2957
#endif /* KMP_GOMP_COMPAT */
2958
2959
// ------------------------------------------------------------------------
2960
// Operation on *lhs, rhs using "compare_and_store" routine
2961
// TYPE - operands' type
2962
// BITS - size in bits, used to distinguish low level calls
2963
// OP - operator
2964
// Note: temp_val introduced in order to force the compiler to read
2965
// *lhs only once (w/o it the compiler reads *lhs twice)
2966
#define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2967
{ \
2968
TYPE KMP_ATOMIC_VOLATILE temp_val; \
2969
TYPE old_value, new_value; \
2970
temp_val = *lhs; \
2971
old_value = temp_val; \
2972
new_value = (TYPE)(rhs OP old_value); \
2973
while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2974
(kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2975
*VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2976
temp_val = *lhs; \
2977
old_value = temp_val; \
2978
new_value = (TYPE)(rhs OP old_value); \
2979
} \
2980
if (flag) { \
2981
return new_value; \
2982
} else \
2983
return old_value; \
2984
}
2985
2986
// -------------------------------------------------------------------------
2987
#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2988
ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2989
TYPE new_value; \
2990
(void)new_value; \
2991
OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
2992
OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2993
}
2994
2995
ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2996
KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2997
ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2998
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2999
ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
3000
KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
3001
ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
3002
KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
3003
ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
3004
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
3005
ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
3006
KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
3007
ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
3008
KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
3009
ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
3010
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
3011
ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
3012
KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
3013
ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
3014
KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
3015
ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3016
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3017
ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3018
KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3019
ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3020
KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3021
ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3022
KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3023
ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3024
KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3025
ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3026
KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3027
ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3028
KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3029
ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3030
KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3031
ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3032
KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3033
ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3034
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3035
ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3036
KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3037
ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3038
KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3039
ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3040
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3041
ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3042
KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3043
ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3044
KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3045
ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3046
KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3047
ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3048
KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3049
ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3050
KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3051
// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
3052
3053
// ------------------------------------------------------------------------
3054
// Routines for Extended types: long double, _Quad, complex flavours (use
3055
// critical section)
3056
// TYPE_ID, OP_ID, TYPE - detailed above
3057
// OP - operator
3058
// LCK_ID - lock identifier, used to possibly distinguish lock variable
3059
#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
3060
ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
3061
TYPE new_value; \
3062
/*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
3063
OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3064
OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
3065
}
3066
3067
/* ------------------------------------------------------------------------- */
3068
// routines for long double type
3069
ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3070
1) // __kmpc_atomic_float10_sub_cpt_rev
3071
ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3072
1) // __kmpc_atomic_float10_div_cpt_rev
3073
#if KMP_HAVE_QUAD
3074
// routines for _Quad type
3075
ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3076
1) // __kmpc_atomic_float16_sub_cpt_rev
3077
ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3078
1) // __kmpc_atomic_float16_div_cpt_rev
3079
#if (KMP_ARCH_X86)
3080
ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3081
1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3082
ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3083
1) // __kmpc_atomic_float16_div_a16_cpt_rev
3084
#endif // (KMP_ARCH_X86)
3085
#endif // KMP_HAVE_QUAD
3086
3087
// routines for complex types
3088
3089
// ------------------------------------------------------------------------
3090
// Workaround for cmplx4. Regular routines with return value don't work
3091
// on Win_32e. Let's return captured values through the additional parameter.
3092
#define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3093
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3094
\
3095
if (flag) { \
3096
(*lhs) = (rhs)OP(*lhs); \
3097
(*out) = (*lhs); \
3098
} else { \
3099
(*out) = (*lhs); \
3100
(*lhs) = (rhs)OP(*lhs); \
3101
} \
3102
\
3103
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3104
return;
3105
// ------------------------------------------------------------------------
3106
3107
#ifdef KMP_GOMP_COMPAT
3108
#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3109
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3110
KMP_CHECK_GTID; \
3111
OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3112
}
3113
#else
3114
#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3115
#endif /* KMP_GOMP_COMPAT */
3116
// ------------------------------------------------------------------------
3117
3118
#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3119
GOMP_FLAG) \
3120
ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3121
OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3122
OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3123
}
3124
// The end of workaround for cmplx4
3125
3126
// !!! TODO: check if we need to return void for cmplx4 routines
3127
// cmplx4 routines to return void
3128
ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3129
1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3130
ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3131
1) // __kmpc_atomic_cmplx4_div_cpt_rev
3132
3133
ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3134
1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3135
ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3136
1) // __kmpc_atomic_cmplx8_div_cpt_rev
3137
ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3138
1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3139
ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3140
1) // __kmpc_atomic_cmplx10_div_cpt_rev
3141
#if KMP_HAVE_QUAD
3142
ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3143
1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3144
ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3145
1) // __kmpc_atomic_cmplx16_div_cpt_rev
3146
#if (KMP_ARCH_X86)
3147
ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3148
1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3149
ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3150
1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3151
#endif // (KMP_ARCH_X86)
3152
#endif // KMP_HAVE_QUAD
3153
3154
// Capture reverse for mixed type: RHS=float16
3155
#if KMP_HAVE_QUAD
3156
3157
// Beginning of a definition (provides name, parameters, gebug trace)
3158
// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3159
// fixed)
3160
// OP_ID - operation identifier (add, sub, mul, ...)
3161
// TYPE - operands' type
3162
// -------------------------------------------------------------------------
3163
#define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3164
RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3165
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3166
TYPE new_value; \
3167
(void)new_value; \
3168
OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3169
OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3170
}
3171
3172
// -------------------------------------------------------------------------
3173
#define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3174
LCK_ID, GOMP_FLAG) \
3175
ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3176
TYPE new_value; \
3177
(void)new_value; \
3178
OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
3179
OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
3180
}
3181
3182
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3183
KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3184
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3185
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3186
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3187
KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3188
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3189
KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3190
3191
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3192
KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3193
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3194
1,
3195
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3196
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3197
KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3198
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3199
1,
3200
KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3201
3202
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3203
3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3204
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3205
4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3206
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3207
3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3208
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3209
4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3210
3211
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3212
7,
3213
KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3214
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3215
8i, 7,
3216
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3217
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3218
7,
3219
KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3220
ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3221
8i, 7,
3222
KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3223
3224
ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3225
4r, 3,
3226
KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3227
ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3228
4r, 3,
3229
KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3230
3231
ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3232
8r, 7,
3233
KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3234
ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3235
8r, 7,
3236
KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3237
3238
ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3239
10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3240
ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3241
10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3242
3243
#endif // KMP_HAVE_QUAD
3244
3245
// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3246
3247
#define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3248
TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3249
TYPE rhs) { \
3250
KMP_DEBUG_ASSERT(__kmp_init_serial); \
3251
KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3252
3253
#define CRITICAL_SWP(LCK_ID) \
3254
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3255
\
3256
old_value = (*lhs); \
3257
(*lhs) = rhs; \
3258
\
3259
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3260
return old_value;
3261
3262
// ------------------------------------------------------------------------
3263
#ifdef KMP_GOMP_COMPAT
3264
#define GOMP_CRITICAL_SWP(FLAG) \
3265
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3266
KMP_CHECK_GTID; \
3267
CRITICAL_SWP(0); \
3268
}
3269
#else
3270
#define GOMP_CRITICAL_SWP(FLAG)
3271
#endif /* KMP_GOMP_COMPAT */
3272
3273
#define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3274
ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3275
TYPE old_value; \
3276
GOMP_CRITICAL_SWP(GOMP_FLAG) \
3277
old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3278
return old_value; \
3279
}
3280
// ------------------------------------------------------------------------
3281
#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3282
ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3283
TYPE old_value; \
3284
GOMP_CRITICAL_SWP(GOMP_FLAG) \
3285
old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3286
return old_value; \
3287
}
3288
3289
// ------------------------------------------------------------------------
3290
#define CMPXCHG_SWP(TYPE, BITS) \
3291
{ \
3292
TYPE KMP_ATOMIC_VOLATILE temp_val; \
3293
TYPE old_value, new_value; \
3294
temp_val = *lhs; \
3295
old_value = temp_val; \
3296
new_value = rhs; \
3297
while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3298
(kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3299
*VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3300
temp_val = *lhs; \
3301
old_value = temp_val; \
3302
new_value = rhs; \
3303
} \
3304
return old_value; \
3305
}
3306
3307
// -------------------------------------------------------------------------
3308
#define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3309
ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3310
TYPE old_value; \
3311
(void)old_value; \
3312
GOMP_CRITICAL_SWP(GOMP_FLAG) \
3313
CMPXCHG_SWP(TYPE, BITS) \
3314
}
3315
3316
ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3317
ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3318
ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3319
3320
ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3321
KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3322
3323
#if (KMP_ARCH_X86)
3324
ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3325
KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3326
ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3327
KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3328
#else
3329
ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3330
ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3331
KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3332
#endif // (KMP_ARCH_X86)
3333
3334
// ------------------------------------------------------------------------
3335
// Routines for Extended types: long double, _Quad, complex flavours (use
3336
// critical section)
3337
#define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3338
ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3339
TYPE old_value; \
3340
GOMP_CRITICAL_SWP(GOMP_FLAG) \
3341
CRITICAL_SWP(LCK_ID) \
3342
}
3343
3344
// ------------------------------------------------------------------------
3345
// !!! TODO: check if we need to return void for cmplx4 routines
3346
// Workaround for cmplx4. Regular routines with return value don't work
3347
// on Win_32e. Let's return captured values through the additional parameter.
3348
3349
#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3350
void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3351
TYPE rhs, TYPE *out) { \
3352
KMP_DEBUG_ASSERT(__kmp_init_serial); \
3353
KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3354
3355
#define CRITICAL_SWP_WRK(LCK_ID) \
3356
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3357
\
3358
tmp = (*lhs); \
3359
(*lhs) = (rhs); \
3360
(*out) = tmp; \
3361
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3362
return;
3363
// ------------------------------------------------------------------------
3364
3365
#ifdef KMP_GOMP_COMPAT
3366
#define GOMP_CRITICAL_SWP_WRK(FLAG) \
3367
if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3368
KMP_CHECK_GTID; \
3369
CRITICAL_SWP_WRK(0); \
3370
}
3371
#else
3372
#define GOMP_CRITICAL_SWP_WRK(FLAG)
3373
#endif /* KMP_GOMP_COMPAT */
3374
// ------------------------------------------------------------------------
3375
3376
#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3377
ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3378
TYPE tmp; \
3379
GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3380
CRITICAL_SWP_WRK(LCK_ID) \
3381
}
3382
// The end of workaround for cmplx4
3383
3384
ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3385
#if KMP_HAVE_QUAD
3386
ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3387
#endif // KMP_HAVE_QUAD
3388
// cmplx4 routine to return void
3389
ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3390
3391
// ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3392
// __kmpc_atomic_cmplx4_swp
3393
3394
ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3395
ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3396
#if KMP_HAVE_QUAD
3397
ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3398
#if (KMP_ARCH_X86)
3399
ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3400
1) // __kmpc_atomic_float16_a16_swp
3401
ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3402
1) // __kmpc_atomic_cmplx16_a16_swp
3403
#endif // (KMP_ARCH_X86)
3404
#endif // KMP_HAVE_QUAD
3405
3406
// End of OpenMP 4.0 Capture
3407
3408
#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3409
3410
#undef OP_CRITICAL
3411
3412
/* ------------------------------------------------------------------------ */
3413
/* Generic atomic routines */
3414
3415
void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3416
void (*f)(void *, void *, void *)) {
3417
KMP_DEBUG_ASSERT(__kmp_init_serial);
3418
3419
if (
3420
#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3421
FALSE /* must use lock */
3422
#else
3423
TRUE
3424
#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3425
) {
3426
kmp_int8 old_value, new_value;
3427
3428
old_value = *(kmp_int8 *)lhs;
3429
(*f)(&new_value, &old_value, rhs);
3430
3431
/* TODO: Should this be acquire or release? */
3432
while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3433
*(kmp_int8 *)&new_value)) {
3434
KMP_CPU_PAUSE();
3435
3436
old_value = *(kmp_int8 *)lhs;
3437
(*f)(&new_value, &old_value, rhs);
3438
}
3439
3440
return;
3441
} else {
3442
// All 1-byte data is of integer data type.
3443
3444
#ifdef KMP_GOMP_COMPAT
3445
if (__kmp_atomic_mode == 2) {
3446
__kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3447
} else
3448
#endif /* KMP_GOMP_COMPAT */
3449
__kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3450
3451
(*f)(lhs, lhs, rhs);
3452
3453
#ifdef KMP_GOMP_COMPAT
3454
if (__kmp_atomic_mode == 2) {
3455
__kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3456
} else
3457
#endif /* KMP_GOMP_COMPAT */
3458
__kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3459
}
3460
}
3461
3462
void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3463
void (*f)(void *, void *, void *)) {
3464
if (
3465
#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3466
FALSE /* must use lock */
3467
#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3468
TRUE /* no alignment problems */
3469
#else
3470
!((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3471
#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3472
) {
3473
kmp_int16 old_value, new_value;
3474
3475
old_value = *(kmp_int16 *)lhs;
3476
(*f)(&new_value, &old_value, rhs);
3477
3478
/* TODO: Should this be acquire or release? */
3479
while (!KMP_COMPARE_AND_STORE_ACQ16(
3480
(kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3481
KMP_CPU_PAUSE();
3482
3483
old_value = *(kmp_int16 *)lhs;
3484
(*f)(&new_value, &old_value, rhs);
3485
}
3486
3487
return;
3488
} else {
3489
// All 2-byte data is of integer data type.
3490
3491
#ifdef KMP_GOMP_COMPAT
3492
if (__kmp_atomic_mode == 2) {
3493
__kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3494
} else
3495
#endif /* KMP_GOMP_COMPAT */
3496
__kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3497
3498
(*f)(lhs, lhs, rhs);
3499
3500
#ifdef KMP_GOMP_COMPAT
3501
if (__kmp_atomic_mode == 2) {
3502
__kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3503
} else
3504
#endif /* KMP_GOMP_COMPAT */
3505
__kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3506
}
3507
}
3508
3509
void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3510
void (*f)(void *, void *, void *)) {
3511
KMP_DEBUG_ASSERT(__kmp_init_serial);
3512
3513
if (
3514
// FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3515
// Gomp compatibility is broken if this routine is called for floats.
3516
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3517
TRUE /* no alignment problems */
3518
#else
3519
!((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3520
#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3521
) {
3522
kmp_int32 old_value, new_value;
3523
3524
old_value = *(kmp_int32 *)lhs;
3525
(*f)(&new_value, &old_value, rhs);
3526
3527
/* TODO: Should this be acquire or release? */
3528
while (!KMP_COMPARE_AND_STORE_ACQ32(
3529
(kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3530
KMP_CPU_PAUSE();
3531
3532
old_value = *(kmp_int32 *)lhs;
3533
(*f)(&new_value, &old_value, rhs);
3534
}
3535
3536
return;
3537
} else {
3538
// Use __kmp_atomic_lock_4i for all 4-byte data,
3539
// even if it isn't of integer data type.
3540
3541
#ifdef KMP_GOMP_COMPAT
3542
if (__kmp_atomic_mode == 2) {
3543
__kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3544
} else
3545
#endif /* KMP_GOMP_COMPAT */
3546
__kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3547
3548
(*f)(lhs, lhs, rhs);
3549
3550
#ifdef KMP_GOMP_COMPAT
3551
if (__kmp_atomic_mode == 2) {
3552
__kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3553
} else
3554
#endif /* KMP_GOMP_COMPAT */
3555
__kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3556
}
3557
}
3558
3559
void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3560
void (*f)(void *, void *, void *)) {
3561
KMP_DEBUG_ASSERT(__kmp_init_serial);
3562
if (
3563
3564
#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3565
FALSE /* must use lock */
3566
#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3567
TRUE /* no alignment problems */
3568
#else
3569
!((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3570
#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3571
) {
3572
kmp_int64 old_value, new_value;
3573
3574
old_value = *(kmp_int64 *)lhs;
3575
(*f)(&new_value, &old_value, rhs);
3576
/* TODO: Should this be acquire or release? */
3577
while (!KMP_COMPARE_AND_STORE_ACQ64(
3578
(kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3579
KMP_CPU_PAUSE();
3580
3581
old_value = *(kmp_int64 *)lhs;
3582
(*f)(&new_value, &old_value, rhs);
3583
}
3584
3585
return;
3586
} else {
3587
// Use __kmp_atomic_lock_8i for all 8-byte data,
3588
// even if it isn't of integer data type.
3589
3590
#ifdef KMP_GOMP_COMPAT
3591
if (__kmp_atomic_mode == 2) {
3592
__kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3593
} else
3594
#endif /* KMP_GOMP_COMPAT */
3595
__kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3596
3597
(*f)(lhs, lhs, rhs);
3598
3599
#ifdef KMP_GOMP_COMPAT
3600
if (__kmp_atomic_mode == 2) {
3601
__kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3602
} else
3603
#endif /* KMP_GOMP_COMPAT */
3604
__kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3605
}
3606
}
3607
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3608
void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3609
void (*f)(void *, void *, void *)) {
3610
KMP_DEBUG_ASSERT(__kmp_init_serial);
3611
3612
#ifdef KMP_GOMP_COMPAT
3613
if (__kmp_atomic_mode == 2) {
3614
__kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3615
} else
3616
#endif /* KMP_GOMP_COMPAT */
3617
__kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3618
3619
(*f)(lhs, lhs, rhs);
3620
3621
#ifdef KMP_GOMP_COMPAT
3622
if (__kmp_atomic_mode == 2) {
3623
__kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3624
} else
3625
#endif /* KMP_GOMP_COMPAT */
3626
__kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3627
}
3628
#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3629
3630
void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3631
void (*f)(void *, void *, void *)) {
3632
KMP_DEBUG_ASSERT(__kmp_init_serial);
3633
3634
#ifdef KMP_GOMP_COMPAT
3635
if (__kmp_atomic_mode == 2) {
3636
__kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3637
} else
3638
#endif /* KMP_GOMP_COMPAT */
3639
__kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3640
3641
(*f)(lhs, lhs, rhs);
3642
3643
#ifdef KMP_GOMP_COMPAT
3644
if (__kmp_atomic_mode == 2) {
3645
__kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3646
} else
3647
#endif /* KMP_GOMP_COMPAT */
3648
__kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3649
}
3650
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3651
void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3652
void (*f)(void *, void *, void *)) {
3653
KMP_DEBUG_ASSERT(__kmp_init_serial);
3654
3655
#ifdef KMP_GOMP_COMPAT
3656
if (__kmp_atomic_mode == 2) {
3657
__kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3658
} else
3659
#endif /* KMP_GOMP_COMPAT */
3660
__kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3661
3662
(*f)(lhs, lhs, rhs);
3663
3664
#ifdef KMP_GOMP_COMPAT
3665
if (__kmp_atomic_mode == 2) {
3666
__kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3667
} else
3668
#endif /* KMP_GOMP_COMPAT */
3669
__kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3670
}
3671
#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3672
void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3673
void (*f)(void *, void *, void *)) {
3674
KMP_DEBUG_ASSERT(__kmp_init_serial);
3675
3676
#ifdef KMP_GOMP_COMPAT
3677
if (__kmp_atomic_mode == 2) {
3678
__kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3679
} else
3680
#endif /* KMP_GOMP_COMPAT */
3681
__kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3682
3683
(*f)(lhs, lhs, rhs);
3684
3685
#ifdef KMP_GOMP_COMPAT
3686
if (__kmp_atomic_mode == 2) {
3687
__kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3688
} else
3689
#endif /* KMP_GOMP_COMPAT */
3690
__kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3691
}
3692
3693
// AC: same two routines as GOMP_atomic_start/end, but will be called by our
3694
// compiler; duplicated in order to not use 3-party names in pure Intel code
3695
// TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3696
void __kmpc_atomic_start(void) {
3697
int gtid = __kmp_entry_gtid();
3698
KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3699
__kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3700
}
3701
3702
void __kmpc_atomic_end(void) {
3703
int gtid = __kmp_get_gtid();
3704
KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3705
__kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3706
}
3707
3708
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3709
3710
// OpenMP 5.1 compare and swap
3711
3712
/*!
3713
@param loc Source code location
3714
@param gtid Global thread id
3715
@param x Memory location to operate on
3716
@param e Expected value
3717
@param d Desired value
3718
@return Result of comparison
3719
3720
Implements Compare And Swap atomic operation.
3721
3722
Sample code:
3723
#pragma omp atomic compare update capture
3724
{ r = x == e; if(r) { x = d; } }
3725
*/
3726
bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3727
return KMP_COMPARE_AND_STORE_ACQ8(x, e, d);
3728
}
3729
bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
3730
short d) {
3731
return KMP_COMPARE_AND_STORE_ACQ16(x, e, d);
3732
}
3733
bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
3734
kmp_int32 d) {
3735
return KMP_COMPARE_AND_STORE_ACQ32(x, e, d);
3736
}
3737
bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
3738
kmp_int64 d) {
3739
return KMP_COMPARE_AND_STORE_ACQ64(x, e, d);
3740
}
3741
3742
/*!
3743
@param loc Source code location
3744
@param gtid Global thread id
3745
@param x Memory location to operate on
3746
@param e Expected value
3747
@param d Desired value
3748
@return Old value of x
3749
3750
Implements Compare And Swap atomic operation.
3751
3752
Sample code:
3753
#pragma omp atomic compare update capture
3754
{ v = x; if (x == e) { x = d; } }
3755
*/
3756
char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3757
return KMP_COMPARE_AND_STORE_RET8(x, e, d);
3758
}
3759
short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
3760
short d) {
3761
return KMP_COMPARE_AND_STORE_RET16(x, e, d);
3762
}
3763
kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
3764
kmp_int32 e, kmp_int32 d) {
3765
return KMP_COMPARE_AND_STORE_RET32(x, e, d);
3766
}
3767
kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
3768
kmp_int64 e, kmp_int64 d) {
3769
return KMP_COMPARE_AND_STORE_RET64(x, e, d);
3770
}
3771
3772
/*!
3773
@param loc Source code location
3774
@param gtid Global thread id
3775
@param x Memory location to operate on
3776
@param e Expected value
3777
@param d Desired value
3778
@param pv Captured value location
3779
@return Result of comparison
3780
3781
Implements Compare And Swap + Capture atomic operation.
3782
3783
v gets old valie of x if comparison failed, untouched otherwise.
3784
Sample code:
3785
#pragma omp atomic compare update capture
3786
{ r = x == e; if(r) { x = d; } else { v = x; } }
3787
*/
3788
bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3789
char d, char *pv) {
3790
char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3791
if (old == e)
3792
return true;
3793
KMP_ASSERT(pv != NULL);
3794
*pv = old;
3795
return false;
3796
}
3797
bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3798
short d, short *pv) {
3799
short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3800
if (old == e)
3801
return true;
3802
KMP_ASSERT(pv != NULL);
3803
*pv = old;
3804
return false;
3805
}
3806
bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3807
kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3808
kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3809
if (old == e)
3810
return true;
3811
KMP_ASSERT(pv != NULL);
3812
*pv = old;
3813
return false;
3814
}
3815
bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3816
kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3817
kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3818
if (old == e)
3819
return true;
3820
KMP_ASSERT(pv != NULL);
3821
*pv = old;
3822
return false;
3823
}
3824
3825
/*!
3826
@param loc Source code location
3827
@param gtid Global thread id
3828
@param x Memory location to operate on
3829
@param e Expected value
3830
@param d Desired value
3831
@param pv Captured value location
3832
@return Old value of x
3833
3834
Implements Compare And Swap + Capture atomic operation.
3835
3836
v gets new valie of x.
3837
Sample code:
3838
#pragma omp atomic compare update capture
3839
{ if (x == e) { x = d; }; v = x; }
3840
*/
3841
char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3842
char d, char *pv) {
3843
char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3844
KMP_ASSERT(pv != NULL);
3845
*pv = old == e ? d : old;
3846
return old;
3847
}
3848
short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3849
short d, short *pv) {
3850
short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3851
KMP_ASSERT(pv != NULL);
3852
*pv = old == e ? d : old;
3853
return old;
3854
}
3855
kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3856
kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3857
kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3858
KMP_ASSERT(pv != NULL);
3859
*pv = old == e ? d : old;
3860
return old;
3861
}
3862
kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3863
kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3864
kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3865
KMP_ASSERT(pv != NULL);
3866
*pv = old == e ? d : old;
3867
return old;
3868
}
3869
3870
// End OpenMP 5.1 compare + capture
3871
#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3872
3873
/*!
3874
@}
3875
*/
3876
3877
// end of file
3878
3879