CoCalc -- pycore

GitHub Repository: allendowney/cpython
Path: blob/main/Include/internal/pycore_atomic.h
¹² views
1
#ifndef Py_ATOMIC_H
2
#define Py_ATOMIC_H
3
#ifdef __cplusplus
4
extern "C" {
5
#endif
6

7
#ifndef Py_BUILD_CORE
8
#  error "this header requires Py_BUILD_CORE define"
9
#endif
10

11
#include "dynamic_annotations.h"   /* _Py_ANNOTATE_MEMORY_ORDER */
12
#include "pyconfig.h"
13

14
#ifdef HAVE_STD_ATOMIC
15
#  include <stdatomic.h>
16
#endif
17

18

19
#if defined(_MSC_VER)
20
#include <intrin.h>
21
#if defined(_M_IX86) || defined(_M_X64)
22
#  include <immintrin.h>
23
#endif
24
#endif
25

26
/* This is modeled after the atomics interface from C1x, according to
27
 * the draft at
28
 * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
29
 * Operations and types are named the same except with a _Py_ prefix
30
 * and have the same semantics.
31
 *
32
 * Beware, the implementations here are deep magic.
33
 */
34

35
#if defined(HAVE_STD_ATOMIC)
36

37
typedef enum _Py_memory_order {
38
    _Py_memory_order_relaxed = memory_order_relaxed,
39
    _Py_memory_order_acquire = memory_order_acquire,
40
    _Py_memory_order_release = memory_order_release,
41
    _Py_memory_order_acq_rel = memory_order_acq_rel,
42
    _Py_memory_order_seq_cst = memory_order_seq_cst
43
} _Py_memory_order;
44

45
typedef struct _Py_atomic_address {
46
    atomic_uintptr_t _value;
47
} _Py_atomic_address;
48

49
typedef struct _Py_atomic_int {
50
    atomic_int _value;
51
} _Py_atomic_int;
52

53
#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
54
    atomic_signal_fence(ORDER)
55

56
#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
57
    atomic_thread_fence(ORDER)
58

59
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
60
    atomic_store_explicit(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER)
61

62
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
63
    atomic_load_explicit(&((ATOMIC_VAL)->_value), ORDER)
64

65
// Use builtin atomic operations in GCC >= 4.7 and clang
66
#elif defined(HAVE_BUILTIN_ATOMIC)
67

68
typedef enum _Py_memory_order {
69
    _Py_memory_order_relaxed = __ATOMIC_RELAXED,
70
    _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
71
    _Py_memory_order_release = __ATOMIC_RELEASE,
72
    _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
73
    _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
74
} _Py_memory_order;
75

76
typedef struct _Py_atomic_address {
77
    uintptr_t _value;
78
} _Py_atomic_address;
79

80
typedef struct _Py_atomic_int {
81
    int _value;
82
} _Py_atomic_int;
83

84
#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
85
    __atomic_signal_fence(ORDER)
86

87
#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
88
    __atomic_thread_fence(ORDER)
89

90
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
91
    (assert((ORDER) == __ATOMIC_RELAXED                       \
92
            || (ORDER) == __ATOMIC_SEQ_CST                    \
93
            || (ORDER) == __ATOMIC_RELEASE),                  \
94
     __atomic_store_n(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER))
95

96
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER)           \
97
    (assert((ORDER) == __ATOMIC_RELAXED                       \
98
            || (ORDER) == __ATOMIC_SEQ_CST                    \
99
            || (ORDER) == __ATOMIC_ACQUIRE                    \
100
            || (ORDER) == __ATOMIC_CONSUME),                  \
101
     __atomic_load_n(&((ATOMIC_VAL)->_value), ORDER))
102

103
/* Only support GCC (for expression statements) and x86 (for simple
104
 * atomic semantics) and MSVC x86/x64/ARM */
105
#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
106
typedef enum _Py_memory_order {
107
    _Py_memory_order_relaxed,
108
    _Py_memory_order_acquire,
109
    _Py_memory_order_release,
110
    _Py_memory_order_acq_rel,
111
    _Py_memory_order_seq_cst
112
} _Py_memory_order;
113

114
typedef struct _Py_atomic_address {
115
    uintptr_t _value;
116
} _Py_atomic_address;
117

118
typedef struct _Py_atomic_int {
119
    int _value;
120
} _Py_atomic_int;
121

122

123
static __inline__ void
124
_Py_atomic_signal_fence(_Py_memory_order order)
125
{
126
    if (order != _Py_memory_order_relaxed)
127
        __asm__ volatile("":::"memory");
128
}
129

130
static __inline__ void
131
_Py_atomic_thread_fence(_Py_memory_order order)
132
{
133
    if (order != _Py_memory_order_relaxed)
134
        __asm__ volatile("mfence":::"memory");
135
}
136

137
/* Tell the race checker about this operation's effects. */
138
static __inline__ void
139
_Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
140
{
141
    (void)address;              /* shut up -Wunused-parameter */
142
    switch(order) {
143
    case _Py_memory_order_release:
144
    case _Py_memory_order_acq_rel:
145
    case _Py_memory_order_seq_cst:
146
        _Py_ANNOTATE_HAPPENS_BEFORE(address);
147
        break;
148
    case _Py_memory_order_relaxed:
149
    case _Py_memory_order_acquire:
150
        break;
151
    }
152
    switch(order) {
153
    case _Py_memory_order_acquire:
154
    case _Py_memory_order_acq_rel:
155
    case _Py_memory_order_seq_cst:
156
        _Py_ANNOTATE_HAPPENS_AFTER(address);
157
        break;
158
    case _Py_memory_order_relaxed:
159
    case _Py_memory_order_release:
160
        break;
161
    }
162
}
163

164
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
165
    __extension__ ({ \
166
        __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
167
        __typeof__(atomic_val->_value) new_val = NEW_VAL;\
168
        volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
169
        _Py_memory_order order = ORDER; \
170
        _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
171
        \
172
        /* Perform the operation. */ \
173
        _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
174
        switch(order) { \
175
        case _Py_memory_order_release: \
176
            _Py_atomic_signal_fence(_Py_memory_order_release); \
177
            /* fallthrough */ \
178
        case _Py_memory_order_relaxed: \
179
            *volatile_data = new_val; \
180
            break; \
181
        \
182
        case _Py_memory_order_acquire: \
183
        case _Py_memory_order_acq_rel: \
184
        case _Py_memory_order_seq_cst: \
185
            __asm__ volatile("xchg %0, %1" \
186
                         : "+r"(new_val) \
187
                         : "m"(atomic_val->_value) \
188
                         : "memory"); \
189
            break; \
190
        } \
191
        _Py_ANNOTATE_IGNORE_WRITES_END(); \
192
    })
193

194
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
195
    __extension__ ({  \
196
        __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
197
        __typeof__(atomic_val->_value) result; \
198
        volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
199
        _Py_memory_order order = ORDER; \
200
        _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
201
        \
202
        /* Perform the operation. */ \
203
        _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
204
        switch(order) { \
205
        case _Py_memory_order_release: \
206
        case _Py_memory_order_acq_rel: \
207
        case _Py_memory_order_seq_cst: \
208
            /* Loads on x86 are not releases by default, so need a */ \
209
            /* thread fence. */ \
210
            _Py_atomic_thread_fence(_Py_memory_order_release); \
211
            break; \
212
        default: \
213
            /* No fence */ \
214
            break; \
215
        } \
216
        result = *volatile_data; \
217
        switch(order) { \
218
        case _Py_memory_order_acquire: \
219
        case _Py_memory_order_acq_rel: \
220
        case _Py_memory_order_seq_cst: \
221
            /* Loads on x86 are automatically acquire operations so */ \
222
            /* can get by with just a compiler fence. */ \
223
            _Py_atomic_signal_fence(_Py_memory_order_acquire); \
224
            break; \
225
        default: \
226
            /* No fence */ \
227
            break; \
228
        } \
229
        _Py_ANNOTATE_IGNORE_READS_END(); \
230
        result; \
231
    })
232

233
#elif defined(_MSC_VER)
234
/*  _Interlocked* functions provide a full memory barrier and are therefore
235
    enough for acq_rel and seq_cst. If the HLE variants aren't available
236
    in hardware they will fall back to a full memory barrier as well.
237

238
    This might affect performance but likely only in some very specific and
239
    hard to measure scenario.
240
*/
241
#if defined(_M_IX86) || defined(_M_X64)
242
typedef enum _Py_memory_order {
243
    _Py_memory_order_relaxed,
244
    _Py_memory_order_acquire,
245
    _Py_memory_order_release,
246
    _Py_memory_order_acq_rel,
247
    _Py_memory_order_seq_cst
248
} _Py_memory_order;
249

250
typedef struct _Py_atomic_address {
251
    volatile uintptr_t _value;
252
} _Py_atomic_address;
253

254
typedef struct _Py_atomic_int {
255
    volatile int _value;
256
} _Py_atomic_int;
257

258

259
#if defined(_M_X64)
260
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
261
    switch (ORDER) { \
262
    case _Py_memory_order_acquire: \
263
      _InterlockedExchange64_HLEAcquire((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
264
      break; \
265
    case _Py_memory_order_release: \
266
      _InterlockedExchange64_HLERelease((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
267
      break; \
268
    default: \
269
      _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
270
      break; \
271
  }
272
#else
273
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
274
#endif
275

276
#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
277
  switch (ORDER) { \
278
  case _Py_memory_order_acquire: \
279
    _InterlockedExchange_HLEAcquire((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
280
    break; \
281
  case _Py_memory_order_release: \
282
    _InterlockedExchange_HLERelease((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
283
    break; \
284
  default: \
285
    _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
286
    break; \
287
  }
288

289
#if defined(_M_X64)
290
/*  This has to be an intptr_t for now.
291
    gil_created() uses -1 as a sentinel value, if this returns
292
    a uintptr_t it will do an unsigned compare and crash
293
*/
294
inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
295
    __int64 old;
296
    switch (order) {
297
    case _Py_memory_order_acquire:
298
    {
299
      do {
300
        old = *value;
301
      } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
302
      break;
303
    }
304
    case _Py_memory_order_release:
305
    {
306
      do {
307
        old = *value;
308
      } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
309
      break;
310
    }
311
    case _Py_memory_order_relaxed:
312
      old = *value;
313
      break;
314
    default:
315
    {
316
      do {
317
        old = *value;
318
      } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
319
      break;
320
    }
321
    }
322
    return old;
323
}
324

325
#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
326
    _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
327

328
#else
329
#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
330
#endif
331

332
inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
333
    long old;
334
    switch (order) {
335
    case _Py_memory_order_acquire:
336
    {
337
      do {
338
        old = *value;
339
      } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
340
      break;
341
    }
342
    case _Py_memory_order_release:
343
    {
344
      do {
345
        old = *value;
346
      } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
347
      break;
348
    }
349
    case _Py_memory_order_relaxed:
350
      old = *value;
351
      break;
352
    default:
353
    {
354
      do {
355
        old = *value;
356
      } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
357
      break;
358
    }
359
    }
360
    return old;
361
}
362

363
#define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
364
    _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
365

366
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
367
  if (sizeof((ATOMIC_VAL)->_value) == 8) { \
368
    _Py_atomic_store_64bit((ATOMIC_VAL), NEW_VAL, ORDER) } else { \
369
    _Py_atomic_store_32bit((ATOMIC_VAL), NEW_VAL, ORDER) }
370

371
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
372
  ( \
373
    sizeof((ATOMIC_VAL)->_value) == 8 ? \
374
    _Py_atomic_load_64bit((ATOMIC_VAL), ORDER) : \
375
    _Py_atomic_load_32bit((ATOMIC_VAL), ORDER) \
376
  )
377
#elif defined(_M_ARM) || defined(_M_ARM64)
378
typedef enum _Py_memory_order {
379
    _Py_memory_order_relaxed,
380
    _Py_memory_order_acquire,
381
    _Py_memory_order_release,
382
    _Py_memory_order_acq_rel,
383
    _Py_memory_order_seq_cst
384
} _Py_memory_order;
385

386
typedef struct _Py_atomic_address {
387
    volatile uintptr_t _value;
388
} _Py_atomic_address;
389

390
typedef struct _Py_atomic_int {
391
    volatile int _value;
392
} _Py_atomic_int;
393

394

395
#if defined(_M_ARM64)
396
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
397
    switch (ORDER) { \
398
    case _Py_memory_order_acquire: \
399
      _InterlockedExchange64_acq((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
400
      break; \
401
    case _Py_memory_order_release: \
402
      _InterlockedExchange64_rel((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
403
      break; \
404
    default: \
405
      _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
406
      break; \
407
  }
408
#else
409
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
410
#endif
411

412
#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
413
  switch (ORDER) { \
414
  case _Py_memory_order_acquire: \
415
    _InterlockedExchange_acq((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
416
    break; \
417
  case _Py_memory_order_release: \
418
    _InterlockedExchange_rel((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
419
    break; \
420
  default: \
421
    _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
422
    break; \
423
  }
424

425
#if defined(_M_ARM64)
426
/*  This has to be an intptr_t for now.
427
    gil_created() uses -1 as a sentinel value, if this returns
428
    a uintptr_t it will do an unsigned compare and crash
429
*/
430
inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
431
    uintptr_t old;
432
    switch (order) {
433
    case _Py_memory_order_acquire:
434
    {
435
      do {
436
        old = *value;
437
      } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
438
      break;
439
    }
440
    case _Py_memory_order_release:
441
    {
442
      do {
443
        old = *value;
444
      } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
445
      break;
446
    }
447
    case _Py_memory_order_relaxed:
448
      old = *value;
449
      break;
450
    default:
451
    {
452
      do {
453
        old = *value;
454
      } while(_InterlockedCompareExchange64(value, old, old) != old);
455
      break;
456
    }
457
    }
458
    return old;
459
}
460

461
#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
462
    _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
463

464
#else
465
#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
466
#endif
467

468
inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
469
    int old;
470
    switch (order) {
471
    case _Py_memory_order_acquire:
472
    {
473
      do {
474
        old = *value;
475
      } while(_InterlockedCompareExchange_acq(value, old, old) != old);
476
      break;
477
    }
478
    case _Py_memory_order_release:
479
    {
480
      do {
481
        old = *value;
482
      } while(_InterlockedCompareExchange_rel(value, old, old) != old);
483
      break;
484
    }
485
    case _Py_memory_order_relaxed:
486
      old = *value;
487
      break;
488
    default:
489
    {
490
      do {
491
        old = *value;
492
      } while(_InterlockedCompareExchange(value, old, old) != old);
493
      break;
494
    }
495
    }
496
    return old;
497
}
498

499
#define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
500
    _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
501

502
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
503
  if (sizeof((ATOMIC_VAL)->_value) == 8) { \
504
    _Py_atomic_store_64bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) } else { \
505
    _Py_atomic_store_32bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) }
506

507
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
508
  ( \
509
    sizeof((ATOMIC_VAL)->_value) == 8 ? \
510
    _Py_atomic_load_64bit((ATOMIC_VAL), (ORDER)) : \
511
    _Py_atomic_load_32bit((ATOMIC_VAL), (ORDER)) \
512
  )
513
#endif
514
#else  /* !gcc x86  !_msc_ver */
515
typedef enum _Py_memory_order {
516
    _Py_memory_order_relaxed,
517
    _Py_memory_order_acquire,
518
    _Py_memory_order_release,
519
    _Py_memory_order_acq_rel,
520
    _Py_memory_order_seq_cst
521
} _Py_memory_order;
522

523
typedef struct _Py_atomic_address {
524
    uintptr_t _value;
525
} _Py_atomic_address;
526

527
typedef struct _Py_atomic_int {
528
    int _value;
529
} _Py_atomic_int;
530
/* Fall back to other compilers and processors by assuming that simple
531
   volatile accesses are atomic.  This is false, so people should port
532
   this. */
533
#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
534
#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
535
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
536
    ((ATOMIC_VAL)->_value = NEW_VAL)
537
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
538
    ((ATOMIC_VAL)->_value)
539
#endif
540

541
/* Standardized shortcuts. */
542
#define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
543
    _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_seq_cst)
544
#define _Py_atomic_load(ATOMIC_VAL) \
545
    _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_seq_cst)
546

547
/* Python-local extensions */
548

549
#define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
550
    _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_relaxed)
551
#define _Py_atomic_load_relaxed(ATOMIC_VAL) \
552
    _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_relaxed)
553

554
#ifdef __cplusplus
555
}
556
#endif
557
#endif  /* Py_ATOMIC_H */
558

559
Product

Resources

Company