Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/ia64/sn/kernel/bte_error.c
10819 views
1
/*
2
* This file is subject to the terms and conditions of the GNU General Public
3
* License. See the file "COPYING" in the main directory of this archive
4
* for more details.
5
*
6
* Copyright (c) 2000-2007 Silicon Graphics, Inc. All Rights Reserved.
7
*/
8
9
#include <linux/types.h>
10
#include <asm/sn/sn_sal.h>
11
#include "ioerror.h"
12
#include <asm/sn/addrs.h>
13
#include <asm/sn/shubio.h>
14
#include <asm/sn/geo.h>
15
#include "xtalk/xwidgetdev.h"
16
#include "xtalk/hubdev.h"
17
#include <asm/sn/bte.h>
18
#include <asm/param.h>
19
20
/*
21
* Bte error handling is done in two parts. The first captures
22
* any crb related errors. Since there can be multiple crbs per
23
* interface and multiple interfaces active, we need to wait until
24
* all active crbs are completed. This is the first job of the
25
* second part error handler. When all bte related CRBs are cleanly
26
* completed, it resets the interfaces and gets them ready for new
27
* transfers to be queued.
28
*/
29
30
void bte_error_handler(unsigned long);
31
32
/*
33
* Wait until all BTE related CRBs are completed
34
* and then reset the interfaces.
35
*/
36
int shub1_bte_error_handler(unsigned long _nodepda)
37
{
38
struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
39
struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
40
nasid_t nasid;
41
int i;
42
int valid_crbs;
43
ii_imem_u_t imem; /* II IMEM Register */
44
ii_icrb0_d_u_t icrbd; /* II CRB Register D */
45
ii_ibcr_u_t ibcr;
46
ii_icmr_u_t icmr;
47
ii_ieclr_u_t ieclr;
48
49
BTE_PRINTK(("shub1_bte_error_handler(%p) - %d\n", err_nodepda,
50
smp_processor_id()));
51
52
if ((err_nodepda->bte_if[0].bh_error == BTE_SUCCESS) &&
53
(err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) {
54
BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda,
55
smp_processor_id()));
56
return 1;
57
}
58
59
/* Determine information about our hub */
60
nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
61
62
/*
63
* A BTE transfer can use multiple CRBs. We need to make sure
64
* that all the BTE CRBs are complete (or timed out) before
65
* attempting to clean up the error. Resetting the BTE while
66
* there are still BTE CRBs active will hang the BTE.
67
* We should look at all the CRBs to see if they are allocated
68
* to the BTE and see if they are still active. When none
69
* are active, we can continue with the cleanup.
70
*
71
* We also want to make sure that the local NI port is up.
72
* When a router resets the NI port can go down, while it
73
* goes through the LLP handshake, but then comes back up.
74
*/
75
icmr.ii_icmr_regval = REMOTE_HUB_L(nasid, IIO_ICMR);
76
if (icmr.ii_icmr_fld_s.i_crb_mark != 0) {
77
/*
78
* There are errors which still need to be cleaned up by
79
* hubiio_crb_error_handler
80
*/
81
mod_timer(recovery_timer, jiffies + (HZ * 5));
82
BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
83
smp_processor_id()));
84
return 1;
85
}
86
if (icmr.ii_icmr_fld_s.i_crb_vld != 0) {
87
88
valid_crbs = icmr.ii_icmr_fld_s.i_crb_vld;
89
90
for (i = 0; i < IIO_NUM_CRBS; i++) {
91
if (!((1 << i) & valid_crbs)) {
92
/* This crb was not marked as valid, ignore */
93
continue;
94
}
95
icrbd.ii_icrb0_d_regval =
96
REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
97
if (icrbd.d_bteop) {
98
mod_timer(recovery_timer, jiffies + (HZ * 5));
99
BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
100
err_nodepda, smp_processor_id(),
101
i));
102
return 1;
103
}
104
}
105
}
106
107
BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda, smp_processor_id()));
108
/* Re-enable both bte interfaces */
109
imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM);
110
imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1;
111
REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval);
112
113
/* Clear BTE0/1 error bits */
114
ieclr.ii_ieclr_regval = 0;
115
if (err_nodepda->bte_if[0].bh_error != BTE_SUCCESS)
116
ieclr.ii_ieclr_fld_s.i_e_bte_0 = 1;
117
if (err_nodepda->bte_if[1].bh_error != BTE_SUCCESS)
118
ieclr.ii_ieclr_fld_s.i_e_bte_1 = 1;
119
REMOTE_HUB_S(nasid, IIO_IECLR, ieclr.ii_ieclr_regval);
120
121
/* Reinitialize both BTE state machines. */
122
ibcr.ii_ibcr_regval = REMOTE_HUB_L(nasid, IIO_IBCR);
123
ibcr.ii_ibcr_fld_s.i_soft_reset = 1;
124
REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval);
125
126
del_timer(recovery_timer);
127
return 0;
128
}
129
130
/*
131
* Wait until all BTE related CRBs are completed
132
* and then reset the interfaces.
133
*/
134
int shub2_bte_error_handler(unsigned long _nodepda)
135
{
136
struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
137
struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
138
struct bteinfo_s *bte;
139
nasid_t nasid;
140
u64 status;
141
int i;
142
143
nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
144
145
/*
146
* Verify that all the BTEs are complete
147
*/
148
for (i = 0; i < BTES_PER_NODE; i++) {
149
bte = &err_nodepda->bte_if[i];
150
status = BTE_LNSTAT_LOAD(bte);
151
if (status & IBLS_ERROR) {
152
bte->bh_error = BTE_SHUB2_ERROR(status);
153
continue;
154
}
155
if (!(status & IBLS_BUSY))
156
continue;
157
mod_timer(recovery_timer, jiffies + (HZ * 5));
158
BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
159
smp_processor_id()));
160
return 1;
161
}
162
if (ia64_sn_bte_recovery(nasid))
163
panic("bte_error_handler(): Fatal BTE Error");
164
165
del_timer(recovery_timer);
166
return 0;
167
}
168
169
/*
170
* Wait until all BTE related CRBs are completed
171
* and then reset the interfaces.
172
*/
173
void bte_error_handler(unsigned long _nodepda)
174
{
175
struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
176
spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
177
int i;
178
unsigned long irq_flags;
179
volatile u64 *notify;
180
bte_result_t bh_error;
181
182
BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda,
183
smp_processor_id()));
184
185
spin_lock_irqsave(recovery_lock, irq_flags);
186
187
/*
188
* Lock all interfaces on this node to prevent new transfers
189
* from being queued.
190
*/
191
for (i = 0; i < BTES_PER_NODE; i++) {
192
if (err_nodepda->bte_if[i].cleanup_active) {
193
continue;
194
}
195
spin_lock(&err_nodepda->bte_if[i].spinlock);
196
BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda,
197
smp_processor_id(), i));
198
err_nodepda->bte_if[i].cleanup_active = 1;
199
}
200
201
if (is_shub1()) {
202
if (shub1_bte_error_handler(_nodepda)) {
203
spin_unlock_irqrestore(recovery_lock, irq_flags);
204
return;
205
}
206
} else {
207
if (shub2_bte_error_handler(_nodepda)) {
208
spin_unlock_irqrestore(recovery_lock, irq_flags);
209
return;
210
}
211
}
212
213
for (i = 0; i < BTES_PER_NODE; i++) {
214
bh_error = err_nodepda->bte_if[i].bh_error;
215
if (bh_error != BTE_SUCCESS) {
216
/* There is an error which needs to be notified */
217
notify = err_nodepda->bte_if[i].most_rcnt_na;
218
BTE_PRINTK(("cnode %d bte %d error=0x%lx\n",
219
err_nodepda->bte_if[i].bte_cnode,
220
err_nodepda->bte_if[i].bte_num,
221
IBLS_ERROR | (u64) bh_error));
222
*notify = IBLS_ERROR | bh_error;
223
err_nodepda->bte_if[i].bh_error = BTE_SUCCESS;
224
}
225
226
err_nodepda->bte_if[i].cleanup_active = 0;
227
BTE_PRINTK(("eh:%p:%d Unlocked %d\n", err_nodepda,
228
smp_processor_id(), i));
229
spin_unlock(&err_nodepda->bte_if[i].spinlock);
230
}
231
232
spin_unlock_irqrestore(recovery_lock, irq_flags);
233
}
234
235
/*
236
* First part error handler. This is called whenever any error CRB interrupt
237
* is generated by the II.
238
*/
239
void
240
bte_crb_error_handler(cnodeid_t cnode, int btenum,
241
int crbnum, ioerror_t * ioe, int bteop)
242
{
243
struct bteinfo_s *bte;
244
245
246
bte = &(NODEPDA(cnode)->bte_if[btenum]);
247
248
/*
249
* The caller has already figured out the error type, we save that
250
* in the bte handle structure for the thread exercising the
251
* interface to consume.
252
*/
253
bte->bh_error = ioe->ie_errortype + BTEFAIL_OFFSET;
254
bte->bte_error_count++;
255
256
BTE_PRINTK(("Got an error on cnode %d bte %d: HW error type 0x%x\n",
257
bte->bte_cnode, bte->bte_num, ioe->ie_errortype));
258
bte_error_handler((unsigned long) NODEPDA(cnode));
259
}
260
261
262