Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/infiniband/hw/ipath/ipath_stats.c
15112 views
1
/*
2
* Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4
*
5
* This software is available to you under a choice of one of two
6
* licenses. You may choose to be licensed under the terms of the GNU
7
* General Public License (GPL) Version 2, available from the file
8
* COPYING in the main directory of this source tree, or the
9
* OpenIB.org BSD license below:
10
*
11
* Redistribution and use in source and binary forms, with or
12
* without modification, are permitted provided that the following
13
* conditions are met:
14
*
15
* - Redistributions of source code must retain the above
16
* copyright notice, this list of conditions and the following
17
* disclaimer.
18
*
19
* - Redistributions in binary form must reproduce the above
20
* copyright notice, this list of conditions and the following
21
* disclaimer in the documentation and/or other materials
22
* provided with the distribution.
23
*
24
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31
* SOFTWARE.
32
*/
33
34
#include "ipath_kernel.h"
35
36
struct infinipath_stats ipath_stats;
37
38
/**
39
* ipath_snap_cntr - snapshot a chip counter
40
* @dd: the infinipath device
41
* @creg: the counter to snapshot
42
*
43
* called from add_timer and user counter read calls, to deal with
44
* counters that wrap in "human time". The words sent and received, and
45
* the packets sent and received are all that we worry about. For now,
46
* at least, we don't worry about error counters, because if they wrap
47
* that quickly, we probably don't care. We may eventually just make this
48
* handle all the counters. word counters can wrap in about 20 seconds
49
* of full bandwidth traffic, packet counters in a few hours.
50
*/
51
52
u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
53
{
54
u32 val, reg64 = 0;
55
u64 val64;
56
unsigned long t0, t1;
57
u64 ret;
58
59
t0 = jiffies;
60
/* If fast increment counters are only 32 bits, snapshot them,
61
* and maintain them as 64bit values in the driver */
62
if (!(dd->ipath_flags & IPATH_32BITCOUNTERS) &&
63
(creg == dd->ipath_cregs->cr_wordsendcnt ||
64
creg == dd->ipath_cregs->cr_wordrcvcnt ||
65
creg == dd->ipath_cregs->cr_pktsendcnt ||
66
creg == dd->ipath_cregs->cr_pktrcvcnt)) {
67
val64 = ipath_read_creg(dd, creg);
68
val = val64 == ~0ULL ? ~0U : 0;
69
reg64 = 1;
70
} else /* val64 just to keep gcc quiet... */
71
val64 = val = ipath_read_creg32(dd, creg);
72
/*
73
* See if a second has passed. This is just a way to detect things
74
* that are quite broken. Normally this should take just a few
75
* cycles (the check is for long enough that we don't care if we get
76
* pre-empted.) An Opteron HT O read timeout is 4 seconds with
77
* normal NB values
78
*/
79
t1 = jiffies;
80
if (time_before(t0 + HZ, t1) && val == -1) {
81
ipath_dev_err(dd, "Error! Read counter 0x%x timed out\n",
82
creg);
83
ret = 0ULL;
84
goto bail;
85
}
86
if (reg64) {
87
ret = val64;
88
goto bail;
89
}
90
91
if (creg == dd->ipath_cregs->cr_wordsendcnt) {
92
if (val != dd->ipath_lastsword) {
93
dd->ipath_sword += val - dd->ipath_lastsword;
94
dd->ipath_lastsword = val;
95
}
96
val64 = dd->ipath_sword;
97
} else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
98
if (val != dd->ipath_lastrword) {
99
dd->ipath_rword += val - dd->ipath_lastrword;
100
dd->ipath_lastrword = val;
101
}
102
val64 = dd->ipath_rword;
103
} else if (creg == dd->ipath_cregs->cr_pktsendcnt) {
104
if (val != dd->ipath_lastspkts) {
105
dd->ipath_spkts += val - dd->ipath_lastspkts;
106
dd->ipath_lastspkts = val;
107
}
108
val64 = dd->ipath_spkts;
109
} else if (creg == dd->ipath_cregs->cr_pktrcvcnt) {
110
if (val != dd->ipath_lastrpkts) {
111
dd->ipath_rpkts += val - dd->ipath_lastrpkts;
112
dd->ipath_lastrpkts = val;
113
}
114
val64 = dd->ipath_rpkts;
115
} else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) {
116
if (dd->ibdeltainprog)
117
val64 -= val64 - dd->ibsymsnap;
118
val64 -= dd->ibsymdelta;
119
} else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) {
120
if (dd->ibdeltainprog)
121
val64 -= val64 - dd->iblnkerrsnap;
122
val64 -= dd->iblnkerrdelta;
123
} else
124
val64 = (u64) val;
125
126
ret = val64;
127
128
bail:
129
return ret;
130
}
131
132
/**
133
* ipath_qcheck - print delta of egrfull/hdrqfull errors for kernel ports
134
* @dd: the infinipath device
135
*
136
* print the delta of egrfull/hdrqfull errors for kernel ports no more than
137
* every 5 seconds. User processes are printed at close, but kernel doesn't
138
* close, so... Separate routine so may call from other places someday, and
139
* so function name when printed by _IPATH_INFO is meaningfull
140
*/
141
static void ipath_qcheck(struct ipath_devdata *dd)
142
{
143
static u64 last_tot_hdrqfull;
144
struct ipath_portdata *pd = dd->ipath_pd[0];
145
size_t blen = 0;
146
char buf[128];
147
u32 hdrqtail;
148
149
*buf = 0;
150
if (pd->port_hdrqfull != dd->ipath_p0_hdrqfull) {
151
blen = snprintf(buf, sizeof buf, "port 0 hdrqfull %u",
152
pd->port_hdrqfull -
153
dd->ipath_p0_hdrqfull);
154
dd->ipath_p0_hdrqfull = pd->port_hdrqfull;
155
}
156
if (ipath_stats.sps_etidfull != dd->ipath_last_tidfull) {
157
blen += snprintf(buf + blen, sizeof buf - blen,
158
"%srcvegrfull %llu",
159
blen ? ", " : "",
160
(unsigned long long)
161
(ipath_stats.sps_etidfull -
162
dd->ipath_last_tidfull));
163
dd->ipath_last_tidfull = ipath_stats.sps_etidfull;
164
}
165
166
/*
167
* this is actually the number of hdrq full interrupts, not actual
168
* events, but at the moment that's mostly what I'm interested in.
169
* Actual count, etc. is in the counters, if needed. For production
170
* users this won't ordinarily be printed.
171
*/
172
173
if ((ipath_debug & (__IPATH_PKTDBG | __IPATH_DBG)) &&
174
ipath_stats.sps_hdrqfull != last_tot_hdrqfull) {
175
blen += snprintf(buf + blen, sizeof buf - blen,
176
"%shdrqfull %llu (all ports)",
177
blen ? ", " : "",
178
(unsigned long long)
179
(ipath_stats.sps_hdrqfull -
180
last_tot_hdrqfull));
181
last_tot_hdrqfull = ipath_stats.sps_hdrqfull;
182
}
183
if (blen)
184
ipath_dbg("%s\n", buf);
185
186
hdrqtail = ipath_get_hdrqtail(pd);
187
if (pd->port_head != hdrqtail) {
188
if (dd->ipath_lastport0rcv_cnt ==
189
ipath_stats.sps_port0pkts) {
190
ipath_cdbg(PKT, "missing rcv interrupts? "
191
"port0 hd=%x tl=%x; port0pkts %llx; write"
192
" hd (w/intr)\n",
193
pd->port_head, hdrqtail,
194
(unsigned long long)
195
ipath_stats.sps_port0pkts);
196
ipath_write_ureg(dd, ur_rcvhdrhead, hdrqtail |
197
dd->ipath_rhdrhead_intr_off, pd->port_port);
198
}
199
dd->ipath_lastport0rcv_cnt = ipath_stats.sps_port0pkts;
200
}
201
}
202
203
static void ipath_chk_errormask(struct ipath_devdata *dd)
204
{
205
static u32 fixed;
206
u32 ctrl;
207
unsigned long errormask;
208
unsigned long hwerrs;
209
210
if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
211
return;
212
213
errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);
214
215
if (errormask == dd->ipath_errormask)
216
return;
217
fixed++;
218
219
hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
220
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
221
222
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
223
dd->ipath_errormask);
224
225
if ((hwerrs & dd->ipath_hwerrmask) ||
226
(ctrl & INFINIPATH_C_FREEZEMODE)) {
227
/* force re-interrupt of pending events, just in case */
228
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
229
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
230
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
231
dev_info(&dd->pcidev->dev,
232
"errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
233
fixed, errormask, (unsigned long)dd->ipath_errormask,
234
ctrl, hwerrs);
235
} else
236
ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
237
fixed, errormask,
238
(unsigned long)dd->ipath_errormask);
239
}
240
241
242
/**
243
* ipath_get_faststats - get word counters from chip before they overflow
244
* @opaque - contains a pointer to the infinipath device ipath_devdata
245
*
246
* called from add_timer
247
*/
248
void ipath_get_faststats(unsigned long opaque)
249
{
250
struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
251
int i;
252
static unsigned cnt;
253
unsigned long flags;
254
u64 traffic_wds;
255
256
/*
257
* don't access the chip while running diags, or memory diags can
258
* fail
259
*/
260
if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
261
ipath_diag_inuse)
262
/* but re-arm the timer, for diags case; won't hurt other */
263
goto done;
264
265
/*
266
* We now try to maintain a "active timer", based on traffic
267
* exceeding a threshold, so we need to check the word-counts
268
* even if they are 64-bit.
269
*/
270
traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) +
271
ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
272
spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
273
traffic_wds -= dd->ipath_traffic_wds;
274
dd->ipath_traffic_wds += traffic_wds;
275
if (traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
276
atomic_add(5, &dd->ipath_active_time); /* S/B #define */
277
spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
278
279
if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
280
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
281
ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
282
}
283
284
ipath_qcheck(dd);
285
286
/*
287
* deal with repeat error suppression. Doesn't really matter if
288
* last error was almost a full interval ago, or just a few usecs
289
* ago; still won't get more than 2 per interval. We may want
290
* longer intervals for this eventually, could do with mod, counter
291
* or separate timer. Also see code in ipath_handle_errors() and
292
* ipath_handle_hwerrors().
293
*/
294
295
if (dd->ipath_lasterror)
296
dd->ipath_lasterror = 0;
297
if (dd->ipath_lasthwerror)
298
dd->ipath_lasthwerror = 0;
299
if (dd->ipath_maskederrs
300
&& time_after(jiffies, dd->ipath_unmasktime)) {
301
char ebuf[256];
302
int iserr;
303
iserr = ipath_decode_err(dd, ebuf, sizeof ebuf,
304
dd->ipath_maskederrs);
305
if (dd->ipath_maskederrs &
306
~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
307
INFINIPATH_E_PKTERRS))
308
ipath_dev_err(dd, "Re-enabling masked errors "
309
"(%s)\n", ebuf);
310
else {
311
/*
312
* rcvegrfull and rcvhdrqfull are "normal", for some
313
* types of processes (mostly benchmarks) that send
314
* huge numbers of messages, while not processing
315
* them. So only complain about these at debug
316
* level.
317
*/
318
if (iserr)
319
ipath_dbg(
320
"Re-enabling queue full errors (%s)\n",
321
ebuf);
322
else
323
ipath_cdbg(ERRPKT, "Re-enabling packet"
324
" problem interrupt (%s)\n", ebuf);
325
}
326
327
/* re-enable masked errors */
328
dd->ipath_errormask |= dd->ipath_maskederrs;
329
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
330
dd->ipath_errormask);
331
dd->ipath_maskederrs = 0;
332
}
333
334
/* limit qfull messages to ~one per minute per port */
335
if ((++cnt & 0x10)) {
336
for (i = (int) dd->ipath_cfgports; --i >= 0; ) {
337
struct ipath_portdata *pd = dd->ipath_pd[i];
338
339
if (pd && pd->port_lastrcvhdrqtail != -1)
340
pd->port_lastrcvhdrqtail = -1;
341
}
342
}
343
344
ipath_chk_errormask(dd);
345
done:
346
mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
347
}
348
349