CoCalc -- atomic_asm

GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/tile/lib/atomic_asm_32.S
¹⁰⁸¹⁷ views
1
/*
2
 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3
 *
4
 *   This program is free software; you can redistribute it and/or
5
 *   modify it under the terms of the GNU General Public License
6
 *   as published by the Free Software Foundation, version 2.
7
 *
8
 *   This program is distributed in the hope that it will be useful, but
9
 *   WITHOUT ANY WARRANTY; without even the implied warranty of
10
 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11
 *   NON INFRINGEMENT.  See the GNU General Public License for
12
 *   more details.
13
 *
14
 * Support routines for atomic operations.  Each function takes:
15
 *
16
 * r0: address to manipulate
17
 * r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG)
18
 * r2: new value to write, or for cmpxchg/add_unless, value to compare against
19
 * r3: (cmpxchg/xchg_add_unless) new value to write or add;
20
 *     (atomic64 ops) high word of value to write
21
 * r4/r5: (cmpxchg64/add_unless64) new value to write or add
22
 *
23
 * The 32-bit routines return a "struct __get_user" so that the futex code
24
 * has an opportunity to return -EFAULT to the user if needed.
25
 * The 64-bit routines just return a "long long" with the value,
26
 * since they are only used from kernel space and don't expect to fault.
27
 * Support for 16-bit ops is included in the framework but we don't provide
28
 * any (x86_64 has an atomic_inc_short(), so we might want to some day).
29
 *
30
 * Note that the caller is advised to issue a suitable L1 or L2
31
 * prefetch on the address being manipulated to avoid extra stalls.
32
 * In addition, the hot path is on two icache lines, and we start with
33
 * a jump to the second line to make sure they are both in cache so
34
 * that we never stall waiting on icache fill while holding the lock.
35
 * (This doesn't work out with most 64-bit ops, since they consume
36
 * too many bundles, so may take an extra i-cache stall.)
37
 *
38
 * These routines set the INTERRUPT_CRITICAL_SECTION bit, just
39
 * like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt
40
 * the code, just page faults.
41
 *
42
 * If the load or store faults in a way that can be directly fixed in
43
 * the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it
44
 * directly, return to the instruction that faulted, and retry it.
45
 *
46
 * If the load or store faults in a way that potentially requires us
47
 * to release the atomic lock, then retry (e.g. a migrating PTE), we
48
 * reset the PC in do_page_fault_ics() to the "tns" instruction so
49
 * that on return we will reacquire the lock and restart the op.  We
50
 * are somewhat overloading the exception_table_entry notion by doing
51
 * this, since those entries are not normally used for migrating PTEs.
52
 *
53
 * If the main page fault handler discovers a bad address, it will see
54
 * the PC pointing to the "tns" instruction (due to the earlier
55
 * exception_table_entry processing in do_page_fault_ics), and
56
 * re-reset the PC to the fault handler, atomic_bad_address(), which
57
 * effectively takes over from the atomic op and can either return a
58
 * bad "struct __get_user" (for user addresses) or can just panic (for
59
 * bad kernel addresses).
60
 *
61
 * Note that if the value we would store is the same as what we
62
 * loaded, we bypass the store.  Other platforms with true atomics can
63
 * make the guarantee that a non-atomic __clear_bit(), for example,
64
 * can safely race with an atomic test_and_set_bit(); this example is
65
 * from bit_spinlock.h in slub_lock() / slub_unlock().  We can't do
66
 * that on Tile since the "atomic" op is really just a
67
 * read/modify/write, and can race with the non-atomic
68
 * read/modify/write.  However, if we can short-circuit the write when
69
 * it is not needed, in the atomic case, we avoid the race.
70
 */
71

72
#include <linux/linkage.h>
73
#include <asm/atomic.h>
74
#include <asm/page.h>
75
#include <asm/processor.h>
76

77
	.section .text.atomic,"ax"
78
ENTRY(__start_atomic_asm_code)
79

80
	.macro  atomic_op, name, bitwidth, body
81
	.align  64
82
STD_ENTRY_SECTION(__atomic\name, .text.atomic)
83
	{
84
	 movei  r24, 1
85
	 j      4f		/* branch to second cache line */
86
	}
87
1:	{
88
	 .ifc \bitwidth,16
89
	 lh     r22, r0
90
	 .else
91
	 lw     r22, r0
92
	 addi   r28, r0, 4
93
	 .endif
94
	}
95
	.ifc \bitwidth,64
96
	lw      r23, r28
97
	.endif
98
	\body /* set r24, and r25 if 64-bit */
99
	{
100
	 seq    r26, r22, r24
101
	 seq    r27, r23, r25
102
	}
103
	.ifc \bitwidth,64
104
	bbnst   r27, 2f
105
	.endif
106
	bbs     r26, 3f		/* skip write-back if it's the same value */
107
2:	{
108
	 .ifc \bitwidth,16
109
	 sh     r0, r24
110
	 .else
111
	 sw     r0, r24
112
	 .endif
113
	}
114
	.ifc \bitwidth,64
115
	sw      r28, r25
116
	.endif
117
	mf
118
3:	{
119
	 move   r0, r22
120
	 .ifc \bitwidth,64
121
	 move   r1, r23
122
	 .else
123
	 move   r1, zero
124
	 .endif
125
	 sw     ATOMIC_LOCK_REG_NAME, zero
126
	}
127
	mtspr   INTERRUPT_CRITICAL_SECTION, zero
128
	jrp     lr
129
4:	{
130
	 move   ATOMIC_LOCK_REG_NAME, r1
131
	 mtspr  INTERRUPT_CRITICAL_SECTION, r24
132
	}
133
#ifndef CONFIG_SMP
134
	j       1b		/* no atomic locks */
135
#else
136
	{
137
	 tns    r21, ATOMIC_LOCK_REG_NAME
138
	 moveli r23, 2048       /* maximum backoff time in cycles */
139
	}
140
	{
141
	 bzt    r21, 1b		/* branch if lock acquired */
142
	 moveli r25, 32         /* starting backoff time in cycles */
143
	}
144
5:	mtspr   INTERRUPT_CRITICAL_SECTION, zero
145
	mfspr   r26, CYCLE_LOW  /* get start point for this backoff */
146
6:	mfspr   r22, CYCLE_LOW  /* test to see if we've backed off enough */
147
	sub     r22, r22, r26
148
	slt     r22, r22, r25
149
	bbst    r22, 6b
150
	{
151
	 mtspr  INTERRUPT_CRITICAL_SECTION, r24
152
	 shli   r25, r25, 1     /* double the backoff; retry the tns */
153
	}
154
	{
155
	 tns    r21, ATOMIC_LOCK_REG_NAME
156
	 slt    r26, r23, r25   /* is the proposed backoff too big? */
157
	}
158
	{
159
	 bzt    r21, 1b		/* branch if lock acquired */
160
	 mvnz   r25, r26, r23
161
	}
162
	j       5b
163
#endif
164
	STD_ENDPROC(__atomic\name)
165
	.ifc \bitwidth,32
166
	.pushsection __ex_table,"a"
167
	.word   1b, __atomic\name
168
	.word   2b, __atomic\name
169
	.word   __atomic\name, __atomic_bad_address
170
	.popsection
171
	.endif
172
	.endm
173

174
atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
175
atomic_op _xchg, 32, "move r24, r2"
176
atomic_op _xchg_add, 32, "add r24, r22, r2"
177
atomic_op _xchg_add_unless, 32, \
178
	"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
179
atomic_op _or, 32, "or r24, r22, r2"
180
atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
181
atomic_op _xor, 32, "xor r24, r22, r2"
182

183
atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
184
	{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
185
atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }"
186
atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \
187
	slt_u r26, r24, r22; add r25, r25, r26"
188
atomic_op 64_xchg_add_unless, 64, \
189
	"{ sne r26, r22, r2; sne r27, r23, r3 }; \
190
	{ bbns r26, 3f; add r24, r22, r4 }; \
191
	{ bbns r27, 3f; add r25, r23, r5 }; \
192
	slt_u r26, r24, r22; add r25, r25, r26"
193

194
	jrp     lr              /* happy backtracer */
195

196
ENTRY(__end_atomic_asm_code)
197

198
Product

Resources

Company