Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/tile/lib/atomic_asm_32.S
10817 views
1
/*
2
* Copyright 2010 Tilera Corporation. All Rights Reserved.
3
*
4
* This program is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU General Public License
6
* as published by the Free Software Foundation, version 2.
7
*
8
* This program is distributed in the hope that it will be useful, but
9
* WITHOUT ANY WARRANTY; without even the implied warranty of
10
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11
* NON INFRINGEMENT. See the GNU General Public License for
12
* more details.
13
*
14
* Support routines for atomic operations. Each function takes:
15
*
16
* r0: address to manipulate
17
* r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG)
18
* r2: new value to write, or for cmpxchg/add_unless, value to compare against
19
* r3: (cmpxchg/xchg_add_unless) new value to write or add;
20
* (atomic64 ops) high word of value to write
21
* r4/r5: (cmpxchg64/add_unless64) new value to write or add
22
*
23
* The 32-bit routines return a "struct __get_user" so that the futex code
24
* has an opportunity to return -EFAULT to the user if needed.
25
* The 64-bit routines just return a "long long" with the value,
26
* since they are only used from kernel space and don't expect to fault.
27
* Support for 16-bit ops is included in the framework but we don't provide
28
* any (x86_64 has an atomic_inc_short(), so we might want to some day).
29
*
30
* Note that the caller is advised to issue a suitable L1 or L2
31
* prefetch on the address being manipulated to avoid extra stalls.
32
* In addition, the hot path is on two icache lines, and we start with
33
* a jump to the second line to make sure they are both in cache so
34
* that we never stall waiting on icache fill while holding the lock.
35
* (This doesn't work out with most 64-bit ops, since they consume
36
* too many bundles, so may take an extra i-cache stall.)
37
*
38
* These routines set the INTERRUPT_CRITICAL_SECTION bit, just
39
* like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt
40
* the code, just page faults.
41
*
42
* If the load or store faults in a way that can be directly fixed in
43
* the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it
44
* directly, return to the instruction that faulted, and retry it.
45
*
46
* If the load or store faults in a way that potentially requires us
47
* to release the atomic lock, then retry (e.g. a migrating PTE), we
48
* reset the PC in do_page_fault_ics() to the "tns" instruction so
49
* that on return we will reacquire the lock and restart the op. We
50
* are somewhat overloading the exception_table_entry notion by doing
51
* this, since those entries are not normally used for migrating PTEs.
52
*
53
* If the main page fault handler discovers a bad address, it will see
54
* the PC pointing to the "tns" instruction (due to the earlier
55
* exception_table_entry processing in do_page_fault_ics), and
56
* re-reset the PC to the fault handler, atomic_bad_address(), which
57
* effectively takes over from the atomic op and can either return a
58
* bad "struct __get_user" (for user addresses) or can just panic (for
59
* bad kernel addresses).
60
*
61
* Note that if the value we would store is the same as what we
62
* loaded, we bypass the store. Other platforms with true atomics can
63
* make the guarantee that a non-atomic __clear_bit(), for example,
64
* can safely race with an atomic test_and_set_bit(); this example is
65
* from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do
66
* that on Tile since the "atomic" op is really just a
67
* read/modify/write, and can race with the non-atomic
68
* read/modify/write. However, if we can short-circuit the write when
69
* it is not needed, in the atomic case, we avoid the race.
70
*/
71
72
#include <linux/linkage.h>
73
#include <asm/atomic.h>
74
#include <asm/page.h>
75
#include <asm/processor.h>
76
77
.section .text.atomic,"ax"
78
ENTRY(__start_atomic_asm_code)
79
80
.macro atomic_op, name, bitwidth, body
81
.align 64
82
STD_ENTRY_SECTION(__atomic\name, .text.atomic)
83
{
84
movei r24, 1
85
j 4f /* branch to second cache line */
86
}
87
1: {
88
.ifc \bitwidth,16
89
lh r22, r0
90
.else
91
lw r22, r0
92
addi r28, r0, 4
93
.endif
94
}
95
.ifc \bitwidth,64
96
lw r23, r28
97
.endif
98
\body /* set r24, and r25 if 64-bit */
99
{
100
seq r26, r22, r24
101
seq r27, r23, r25
102
}
103
.ifc \bitwidth,64
104
bbnst r27, 2f
105
.endif
106
bbs r26, 3f /* skip write-back if it's the same value */
107
2: {
108
.ifc \bitwidth,16
109
sh r0, r24
110
.else
111
sw r0, r24
112
.endif
113
}
114
.ifc \bitwidth,64
115
sw r28, r25
116
.endif
117
mf
118
3: {
119
move r0, r22
120
.ifc \bitwidth,64
121
move r1, r23
122
.else
123
move r1, zero
124
.endif
125
sw ATOMIC_LOCK_REG_NAME, zero
126
}
127
mtspr INTERRUPT_CRITICAL_SECTION, zero
128
jrp lr
129
4: {
130
move ATOMIC_LOCK_REG_NAME, r1
131
mtspr INTERRUPT_CRITICAL_SECTION, r24
132
}
133
#ifndef CONFIG_SMP
134
j 1b /* no atomic locks */
135
#else
136
{
137
tns r21, ATOMIC_LOCK_REG_NAME
138
moveli r23, 2048 /* maximum backoff time in cycles */
139
}
140
{
141
bzt r21, 1b /* branch if lock acquired */
142
moveli r25, 32 /* starting backoff time in cycles */
143
}
144
5: mtspr INTERRUPT_CRITICAL_SECTION, zero
145
mfspr r26, CYCLE_LOW /* get start point for this backoff */
146
6: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */
147
sub r22, r22, r26
148
slt r22, r22, r25
149
bbst r22, 6b
150
{
151
mtspr INTERRUPT_CRITICAL_SECTION, r24
152
shli r25, r25, 1 /* double the backoff; retry the tns */
153
}
154
{
155
tns r21, ATOMIC_LOCK_REG_NAME
156
slt r26, r23, r25 /* is the proposed backoff too big? */
157
}
158
{
159
bzt r21, 1b /* branch if lock acquired */
160
mvnz r25, r26, r23
161
}
162
j 5b
163
#endif
164
STD_ENDPROC(__atomic\name)
165
.ifc \bitwidth,32
166
.pushsection __ex_table,"a"
167
.word 1b, __atomic\name
168
.word 2b, __atomic\name
169
.word __atomic\name, __atomic_bad_address
170
.popsection
171
.endif
172
.endm
173
174
atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
175
atomic_op _xchg, 32, "move r24, r2"
176
atomic_op _xchg_add, 32, "add r24, r22, r2"
177
atomic_op _xchg_add_unless, 32, \
178
"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
179
atomic_op _or, 32, "or r24, r22, r2"
180
atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
181
atomic_op _xor, 32, "xor r24, r22, r2"
182
183
atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
184
{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
185
atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }"
186
atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \
187
slt_u r26, r24, r22; add r25, r25, r26"
188
atomic_op 64_xchg_add_unless, 64, \
189
"{ sne r26, r22, r2; sne r27, r23, r3 }; \
190
{ bbns r26, 3f; add r24, r22, r4 }; \
191
{ bbns r27, 3f; add r25, r23, r5 }; \
192
slt_u r26, r24, r22; add r25, r25, r26"
193
194
jrp lr /* happy backtracer */
195
196
ENTRY(__end_atomic_asm_code)
197
198