Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/dev/bnxt/bnxt_re/main.c
39566 views
1
/*
2
* Copyright (c) 2015-2024, Broadcom. All rights reserved. The term
3
* Broadcom refers to Broadcom Limited and/or its subsidiaries.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
7
* are met:
8
*
9
* 1. Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
11
* 2. Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in
13
* the documentation and/or other materials provided with the
14
* distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
17
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
20
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
24
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
25
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
26
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
*
28
* Description: Main component of the bnxt_re driver
29
*/
30
31
#include <linux/if_ether.h>
32
#include <linux/module.h>
33
#include <linux/errno.h>
34
#include <linux/pci.h>
35
#include <linux/dma-mapping.h>
36
#include <linux/slab.h>
37
#include <linux/sched.h>
38
#include <linux/delay.h>
39
#include <linux/fs.h>
40
#include <rdma/ib_user_verbs.h>
41
#include <rdma/ib_addr.h>
42
#include <rdma/ib_cache.h>
43
#include <dev/mlx5/port.h>
44
#include <dev/mlx5/vport.h>
45
#include <linux/list.h>
46
#include <rdma/ib_smi.h>
47
#include <rdma/ib_umem.h>
48
#include <linux/in.h>
49
#include <linux/etherdevice.h>
50
51
#include "bnxt_re.h"
52
#include "ib_verbs.h"
53
#include "bnxt_re-abi.h"
54
#include "bnxt.h"
55
56
static char drv_version[] =
57
"Broadcom NetXtreme-C/E RoCE Driver " ROCE_DRV_MODULE_NAME \
58
" v" ROCE_DRV_MODULE_VERSION " (" ROCE_DRV_MODULE_RELDATE ")\n";
59
60
#define BNXT_RE_DESC "Broadcom NetXtreme RoCE"
61
#define BNXT_ADEV_NAME "if_bnxt"
62
63
MODULE_DESCRIPTION("Broadcom NetXtreme-C/E RoCE Driver");
64
MODULE_LICENSE("Dual BSD/GPL");
65
MODULE_DEPEND(bnxt_re, linuxkpi, 1, 1, 1);
66
MODULE_DEPEND(bnxt_re, ibcore, 1, 1, 1);
67
MODULE_DEPEND(bnxt_re, if_bnxt, 1, 1, 1);
68
MODULE_VERSION(bnxt_re, 1);
69
70
71
DEFINE_MUTEX(bnxt_re_mutex); /* mutex lock for driver */
72
73
static unsigned int restrict_mrs = 0;
74
module_param(restrict_mrs, uint, 0);
75
MODULE_PARM_DESC(restrict_mrs, " Restrict the no. of MRs 0 = 256K , 1 = 64K");
76
77
unsigned int restrict_stats = 0;
78
module_param(restrict_stats, uint, 0);
79
MODULE_PARM_DESC(restrict_stats, "Restrict stats query frequency to ethtool coalesce value. Disabled by default");
80
81
unsigned int enable_fc = 1;
82
module_param(enable_fc, uint, 0);
83
MODULE_PARM_DESC(enable_fc, "Enable default PFC, CC,ETS during driver load. 1 - fc enable, 0 - fc disable - Default is 1");
84
85
unsigned int min_tx_depth = 1;
86
module_param(min_tx_depth, uint, 0);
87
MODULE_PARM_DESC(min_tx_depth, "Minimum TX depth - Default is 1");
88
89
static uint8_t max_msix_vec[BNXT_RE_MAX_DEVICES] = {0};
90
static unsigned int max_msix_vec_argc;
91
module_param_array(max_msix_vec, byte, &max_msix_vec_argc, 0444);
92
MODULE_PARM_DESC(max_msix_vec, "Max MSI-x vectors per PF (2 - 64) - Default is 64");
93
94
unsigned int cmdq_shadow_qd = RCFW_CMD_NON_BLOCKING_SHADOW_QD;
95
module_param_named(cmdq_shadow_qd, cmdq_shadow_qd, uint, 0644);
96
MODULE_PARM_DESC(cmdq_shadow_qd, "Perf Stat Debug: Shadow QD Range (1-64) - Default is 64");
97
98
99
/* globals */
100
struct list_head bnxt_re_dev_list = LINUX_LIST_HEAD_INIT(bnxt_re_dev_list);
101
static int bnxt_re_probe_count;
102
103
DEFINE_MUTEX(bnxt_re_dev_lock);
104
static u32 gmod_exit;
105
static u32 gadd_dev_inprogress;
106
107
static void bnxt_re_task(struct work_struct *work_task);
108
static struct workqueue_struct *bnxt_re_wq;
109
static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev);
110
static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
111
u32 *offset);
112
static int bnxt_re_ib_init(struct bnxt_re_dev *rdev);
113
static void bnxt_re_ib_init_2(struct bnxt_re_dev *rdev);
114
void _bnxt_re_remove(struct auxiliary_device *adev);
115
void writel_fbsd(struct bnxt_softc *bp, u32, u8, u32);
116
u32 readl_fbsd(struct bnxt_softc *bp, u32, u8);
117
static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev);
118
119
int bnxt_re_register_netdevice_notifier(struct notifier_block *nb)
120
{
121
int rc;
122
rc = register_netdevice_notifier(nb);
123
return rc;
124
}
125
126
int bnxt_re_unregister_netdevice_notifier(struct notifier_block *nb)
127
{
128
int rc;
129
rc = unregister_netdevice_notifier(nb);
130
return rc;
131
}
132
133
void bnxt_re_set_dma_device(struct ib_device *ibdev, struct bnxt_re_dev *rdev)
134
{
135
ibdev->dma_device = &rdev->en_dev->pdev->dev;
136
}
137
138
void bnxt_re_init_resolve_wq(struct bnxt_re_dev *rdev)
139
{
140
rdev->resolve_wq = create_singlethread_workqueue("bnxt_re_resolve_wq");
141
INIT_LIST_HEAD(&rdev->mac_wq_list);
142
}
143
144
void bnxt_re_uninit_resolve_wq(struct bnxt_re_dev *rdev)
145
{
146
struct bnxt_re_resolve_dmac_work *tmp_work = NULL, *tmp_st;
147
if (!rdev->resolve_wq)
148
return;
149
flush_workqueue(rdev->resolve_wq);
150
list_for_each_entry_safe(tmp_work, tmp_st, &rdev->mac_wq_list, list) {
151
list_del(&tmp_work->list);
152
kfree(tmp_work);
153
}
154
destroy_workqueue(rdev->resolve_wq);
155
rdev->resolve_wq = NULL;
156
}
157
158
u32 readl_fbsd(struct bnxt_softc *bp, u32 reg_off, u8 bar_idx)
159
{
160
161
if (bar_idx)
162
return bus_space_read_8(bp->doorbell_bar.tag, bp->doorbell_bar.handle, reg_off);
163
else
164
return bus_space_read_8(bp->hwrm_bar.tag, bp->hwrm_bar.handle, reg_off);
165
}
166
167
void writel_fbsd(struct bnxt_softc *bp, u32 reg_off, u8 bar_idx, u32 val)
168
{
169
if (bar_idx)
170
bus_space_write_8(bp->doorbell_bar.tag, bp->doorbell_bar.handle, reg_off, htole32(val));
171
else
172
bus_space_write_8(bp->hwrm_bar.tag, bp->hwrm_bar.handle, reg_off, htole32(val));
173
}
174
175
static void bnxt_re_update_fifo_occup_slabs(struct bnxt_re_dev *rdev,
176
u32 fifo_occup)
177
{
178
if (fifo_occup > rdev->dbg_stats->dbq.fifo_occup_water_mark)
179
rdev->dbg_stats->dbq.fifo_occup_water_mark = fifo_occup;
180
181
if (fifo_occup > 8 * rdev->pacing_algo_th)
182
rdev->dbg_stats->dbq.fifo_occup_slab_4++;
183
else if (fifo_occup > 4 * rdev->pacing_algo_th)
184
rdev->dbg_stats->dbq.fifo_occup_slab_3++;
185
else if (fifo_occup > 2 * rdev->pacing_algo_th)
186
rdev->dbg_stats->dbq.fifo_occup_slab_2++;
187
else if (fifo_occup > rdev->pacing_algo_th)
188
rdev->dbg_stats->dbq.fifo_occup_slab_1++;
189
}
190
191
static void bnxt_re_update_do_pacing_slabs(struct bnxt_re_dev *rdev)
192
{
193
struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
194
195
if (pacing_data->do_pacing > rdev->dbg_stats->dbq.do_pacing_water_mark)
196
rdev->dbg_stats->dbq.do_pacing_water_mark = pacing_data->do_pacing;
197
198
if (pacing_data->do_pacing > 16 * rdev->dbr_def_do_pacing)
199
rdev->dbg_stats->dbq.do_pacing_slab_5++;
200
else if (pacing_data->do_pacing > 8 * rdev->dbr_def_do_pacing)
201
rdev->dbg_stats->dbq.do_pacing_slab_4++;
202
else if (pacing_data->do_pacing > 4 * rdev->dbr_def_do_pacing)
203
rdev->dbg_stats->dbq.do_pacing_slab_3++;
204
else if (pacing_data->do_pacing > 2 * rdev->dbr_def_do_pacing)
205
rdev->dbg_stats->dbq.do_pacing_slab_2++;
206
else if (pacing_data->do_pacing > rdev->dbr_def_do_pacing)
207
rdev->dbg_stats->dbq.do_pacing_slab_1++;
208
}
209
210
static bool bnxt_re_is_qp1_qp(struct bnxt_re_qp *qp)
211
{
212
return qp->ib_qp.qp_type == IB_QPT_GSI;
213
}
214
215
static struct bnxt_re_qp *bnxt_re_get_qp1_qp(struct bnxt_re_dev *rdev)
216
{
217
struct bnxt_re_qp *qp;
218
219
mutex_lock(&rdev->qp_lock);
220
list_for_each_entry(qp, &rdev->qp_list, list) {
221
if (bnxt_re_is_qp1_qp(qp)) {
222
mutex_unlock(&rdev->qp_lock);
223
return qp;
224
}
225
}
226
mutex_unlock(&rdev->qp_lock);
227
return NULL;
228
}
229
230
/* Set the maximum number of each resource that the driver actually wants
231
* to allocate. This may be up to the maximum number the firmware has
232
* reserved for the function. The driver may choose to allocate fewer
233
* resources than the firmware maximum.
234
*/
235
static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev)
236
{
237
struct bnxt_qplib_max_res dev_res = {};
238
struct bnxt_qplib_chip_ctx *cctx;
239
struct bnxt_qplib_dev_attr *attr;
240
struct bnxt_qplib_ctx *hctx;
241
int i;
242
243
attr = rdev->dev_attr;
244
hctx = rdev->qplib_res.hctx;
245
cctx = rdev->chip_ctx;
246
247
bnxt_qplib_max_res_supported(cctx, &rdev->qplib_res, &dev_res, false);
248
if (!_is_chip_gen_p5_p7(cctx)) {
249
hctx->qp_ctx.max = min_t(u32, dev_res.max_qp, attr->max_qp);
250
hctx->mrw_ctx.max = min_t(u32, dev_res.max_mr, attr->max_mr);
251
/* To accommodate 16k MRs and 16k AHs,
252
* driver has to allocate 32k backing store memory
253
*/
254
hctx->mrw_ctx.max *= 2;
255
hctx->srq_ctx.max = min_t(u32, dev_res.max_srq, attr->max_srq);
256
hctx->cq_ctx.max = min_t(u32, dev_res.max_cq, attr->max_cq);
257
for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
258
hctx->tqm_ctx.qcount[i] = attr->tqm_alloc_reqs[i];
259
} else {
260
hctx->qp_ctx.max = attr->max_qp ? attr->max_qp : dev_res.max_qp;
261
hctx->mrw_ctx.max = attr->max_mr ? attr->max_mr : dev_res.max_mr;
262
hctx->srq_ctx.max = attr->max_srq ? attr->max_srq : dev_res.max_srq;
263
hctx->cq_ctx.max = attr->max_cq ? attr->max_cq : dev_res.max_cq;
264
}
265
}
266
267
static void bnxt_re_limit_vf_res(struct bnxt_re_dev *rdev,
268
struct bnxt_qplib_vf_res *vf_res,
269
u32 num_vf)
270
{
271
struct bnxt_qplib_chip_ctx *cctx = rdev->chip_ctx;
272
struct bnxt_qplib_max_res dev_res = {};
273
274
bnxt_qplib_max_res_supported(cctx, &rdev->qplib_res, &dev_res, true);
275
vf_res->max_qp = dev_res.max_qp / num_vf;
276
vf_res->max_srq = dev_res.max_srq / num_vf;
277
vf_res->max_cq = dev_res.max_cq / num_vf;
278
/*
279
* MR and AH shares the same backing store, the value specified
280
* for max_mrw is split into half by the FW for MR and AH
281
*/
282
vf_res->max_mrw = dev_res.max_mr * 2 / num_vf;
283
vf_res->max_gid = BNXT_RE_MAX_GID_PER_VF;
284
}
285
286
static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
287
{
288
struct bnxt_qplib_ctx *hctx;
289
290
hctx = rdev->qplib_res.hctx;
291
memset(&hctx->vf_res, 0, sizeof(struct bnxt_qplib_vf_res));
292
bnxt_re_limit_pf_res(rdev);
293
294
if (rdev->num_vfs)
295
bnxt_re_limit_vf_res(rdev, &hctx->vf_res, rdev->num_vfs);
296
}
297
298
static void bnxt_re_dettach_irq(struct bnxt_re_dev *rdev)
299
{
300
struct bnxt_qplib_rcfw *rcfw = NULL;
301
struct bnxt_qplib_nq *nq;
302
int indx;
303
304
rcfw = &rdev->rcfw;
305
for (indx = 0; indx < rdev->nqr.max_init; indx++) {
306
nq = &rdev->nqr.nq[indx];
307
mutex_lock(&nq->lock);
308
bnxt_qplib_nq_stop_irq(nq, false);
309
mutex_unlock(&nq->lock);
310
}
311
312
bnxt_qplib_rcfw_stop_irq(rcfw, false);
313
}
314
315
static void bnxt_re_detach_err_device(struct bnxt_re_dev *rdev)
316
{
317
/* Free the MSIx vectors only so that L2 can proceed with MSIx disable */
318
bnxt_re_dettach_irq(rdev);
319
320
/* Set the state as detached to prevent sending any more commands */
321
set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
322
set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
323
wake_up_all(&rdev->rcfw.cmdq.waitq);
324
}
325
326
#define MAX_DSCP_PRI_TUPLE 64
327
328
struct bnxt_re_dcb_work {
329
struct work_struct work;
330
struct bnxt_re_dev *rdev;
331
struct hwrm_async_event_cmpl cmpl;
332
};
333
334
static void bnxt_re_init_dcb_wq(struct bnxt_re_dev *rdev)
335
{
336
rdev->dcb_wq = create_singlethread_workqueue("bnxt_re_dcb_wq");
337
}
338
339
static void bnxt_re_uninit_dcb_wq(struct bnxt_re_dev *rdev)
340
{
341
if (!rdev->dcb_wq)
342
return;
343
flush_workqueue(rdev->dcb_wq);
344
destroy_workqueue(rdev->dcb_wq);
345
rdev->dcb_wq = NULL;
346
}
347
348
static void bnxt_re_init_aer_wq(struct bnxt_re_dev *rdev)
349
{
350
rdev->aer_wq = create_singlethread_workqueue("bnxt_re_aer_wq");
351
}
352
353
static void bnxt_re_uninit_aer_wq(struct bnxt_re_dev *rdev)
354
{
355
if (!rdev->aer_wq)
356
return;
357
flush_workqueue(rdev->aer_wq);
358
destroy_workqueue(rdev->aer_wq);
359
rdev->aer_wq = NULL;
360
}
361
362
static int bnxt_re_update_qp1_tos_dscp(struct bnxt_re_dev *rdev)
363
{
364
struct bnxt_re_qp *qp;
365
366
if (!_is_chip_gen_p5_p7(rdev->chip_ctx))
367
return 0;
368
369
qp = bnxt_re_get_qp1_qp(rdev);
370
if (!qp)
371
return 0;
372
373
qp->qplib_qp.modify_flags = CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP;
374
qp->qplib_qp.tos_dscp = rdev->cc_param.qp1_tos_dscp;
375
376
return bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
377
}
378
379
static void bnxt_re_reconfigure_dscp(struct bnxt_re_dev *rdev)
380
{
381
struct bnxt_qplib_cc_param *cc_param;
382
struct bnxt_re_tc_rec *tc_rec;
383
bool update_cc = false;
384
u8 dscp_user;
385
int rc;
386
387
cc_param = &rdev->cc_param;
388
tc_rec = &rdev->tc_rec[0];
389
390
if (!(cc_param->roce_dscp_user || cc_param->cnp_dscp_user))
391
return;
392
393
if (cc_param->cnp_dscp_user) {
394
dscp_user = (cc_param->cnp_dscp_user & 0x3f);
395
if ((tc_rec->cnp_dscp_bv & (1ul << dscp_user)) &&
396
(cc_param->alt_tos_dscp != dscp_user)) {
397
cc_param->alt_tos_dscp = dscp_user;
398
cc_param->mask |= CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP;
399
update_cc = true;
400
}
401
}
402
403
if (cc_param->roce_dscp_user) {
404
dscp_user = (cc_param->roce_dscp_user & 0x3f);
405
if ((tc_rec->roce_dscp_bv & (1ul << dscp_user)) &&
406
(cc_param->tos_dscp != dscp_user)) {
407
cc_param->tos_dscp = dscp_user;
408
cc_param->mask |= CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP;
409
update_cc = true;
410
}
411
}
412
413
if (update_cc) {
414
rc = bnxt_qplib_modify_cc(&rdev->qplib_res, cc_param);
415
if (rc)
416
dev_err(rdev_to_dev(rdev), "Failed to apply cc settings\n");
417
}
418
}
419
420
static void bnxt_re_dcb_wq_task(struct work_struct *work)
421
{
422
struct bnxt_qplib_cc_param *cc_param;
423
struct bnxt_re_tc_rec *tc_rec;
424
struct bnxt_re_dev *rdev;
425
struct bnxt_re_dcb_work *dcb_work =
426
container_of(work, struct bnxt_re_dcb_work, work);
427
int rc;
428
429
rdev = dcb_work->rdev;
430
if (!rdev)
431
goto exit;
432
433
mutex_lock(&rdev->cc_lock);
434
435
cc_param = &rdev->cc_param;
436
rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, cc_param);
437
if (rc) {
438
dev_err(rdev_to_dev(rdev), "Failed to query ccparam rc:%d", rc);
439
goto fail;
440
}
441
tc_rec = &rdev->tc_rec[0];
442
/*
443
* Upon the receival of DCB Async event:
444
* If roce_dscp or cnp_dscp or both (which user configured using configfs)
445
* is in the list, re-program the value using modify_roce_cc command
446
*/
447
bnxt_re_reconfigure_dscp(rdev);
448
449
cc_param->roce_pri = tc_rec->roce_prio;
450
if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) {
451
cc_param->qp1_tos_dscp = cc_param->tos_dscp;
452
rc = bnxt_re_update_qp1_tos_dscp(rdev);
453
if (rc) {
454
dev_err(rdev_to_dev(rdev), "%s:Failed to modify QP1 rc:%d",
455
__func__, rc);
456
goto fail;
457
}
458
}
459
460
fail:
461
mutex_unlock(&rdev->cc_lock);
462
exit:
463
kfree(dcb_work);
464
}
465
466
static int bnxt_re_hwrm_dbr_pacing_broadcast_event(struct bnxt_re_dev *rdev)
467
{
468
struct hwrm_func_dbr_pacing_broadcast_event_output resp = {0};
469
struct hwrm_func_dbr_pacing_broadcast_event_input req = {0};
470
struct bnxt_en_dev *en_dev = rdev->en_dev;
471
struct bnxt_fw_msg fw_msg;
472
int rc;
473
474
memset(&fw_msg, 0, sizeof(fw_msg));
475
bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
476
HWRM_FUNC_DBR_PACING_BROADCAST_EVENT, -1, -1);
477
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
478
sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
479
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
480
if (rc) {
481
dev_dbg(rdev_to_dev(rdev),
482
"Failed to send dbr pacing broadcast event rc:%d", rc);
483
return rc;
484
}
485
return 0;
486
}
487
488
static int bnxt_re_hwrm_dbr_pacing_nqlist_query(struct bnxt_re_dev *rdev)
489
{
490
struct hwrm_func_dbr_pacing_nqlist_query_output resp = {0};
491
struct hwrm_func_dbr_pacing_nqlist_query_input req = {0};
492
struct bnxt_dbq_nq_list *nq_list = &rdev->nq_list;
493
struct bnxt_en_dev *en_dev = rdev->en_dev;
494
bool primary_found = false;
495
struct bnxt_fw_msg fw_msg;
496
struct bnxt_qplib_nq *nq;
497
int rc, i, j = 1;
498
u16 *nql_ptr;
499
500
nq = &rdev->nqr.nq[0];
501
502
memset(&fw_msg, 0, sizeof(fw_msg));
503
bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
504
HWRM_FUNC_DBR_PACING_NQLIST_QUERY, -1, -1);
505
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
506
sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
507
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
508
if (rc) {
509
dev_err(rdev_to_dev(rdev), "Failed to send dbr pacing nq list query rc:%d", rc);
510
return rc;
511
}
512
nq_list->num_nql_entries = le32_to_cpu(resp.num_nqs);
513
nql_ptr = &resp.nq_ring_id0;
514
/* populate the nq_list of the primary function with list received
515
* from FW. Fill the NQ IDs of secondary functions from index 1 to
516
* num_nql_entries - 1. Fill the nq_list->nq_id[0] with the
517
* nq_id of the primary pf
518
*/
519
for (i = 0; i < nq_list->num_nql_entries; i++) {
520
u16 nq_id = *nql_ptr;
521
522
dev_dbg(rdev_to_dev(rdev),
523
"nq_list->nq_id[%d] = %d\n", i, nq_id);
524
if (nq_id != nq->ring_id) {
525
nq_list->nq_id[j] = nq_id;
526
j++;
527
} else {
528
primary_found = true;
529
nq_list->nq_id[0] = nq->ring_id;
530
}
531
nql_ptr++;
532
}
533
if (primary_found)
534
bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 1);
535
536
return 0;
537
}
538
539
static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev)
540
{
541
struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
542
u32 read_val, fifo_occup;
543
bool first_read = true;
544
545
/* loop shouldn't run infintely as the occupancy usually goes
546
* below pacing algo threshold as soon as pacing kicks in.
547
*/
548
while (1) {
549
read_val = readl_fbsd(rdev->en_dev->softc, rdev->dbr_db_fifo_reg_off, 0);
550
fifo_occup = pacing_data->fifo_max_depth -
551
((read_val & pacing_data->fifo_room_mask) >>
552
pacing_data->fifo_room_shift);
553
/* Fifo occupancy cannot be greater the MAX FIFO depth */
554
if (fifo_occup > pacing_data->fifo_max_depth)
555
break;
556
557
if (first_read) {
558
bnxt_re_update_fifo_occup_slabs(rdev, fifo_occup);
559
first_read = false;
560
}
561
if (fifo_occup < pacing_data->pacing_th)
562
break;
563
}
564
}
565
566
static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev)
567
{
568
struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
569
570
pacing_data->do_pacing = rdev->dbr_def_do_pacing;
571
pacing_data->pacing_th = rdev->pacing_algo_th;
572
pacing_data->alarm_th =
573
pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE(rdev->chip_ctx);
574
}
575
576
#define CAG_RING_MASK 0x7FF
577
#define CAG_RING_SHIFT 17
578
#define WATERMARK_MASK 0xFFF
579
#define WATERMARK_SHIFT 0
580
581
static bool bnxt_re_check_if_dbq_intr_triggered(struct bnxt_re_dev *rdev)
582
{
583
u32 read_val;
584
int j;
585
586
for (j = 0; j < 10; j++) {
587
read_val = readl_fbsd(rdev->en_dev->softc, rdev->dbr_aeq_arm_reg_off, 0);
588
dev_dbg(rdev_to_dev(rdev), "AEQ ARM status = 0x%x\n",
589
read_val);
590
if (!read_val)
591
return true;
592
}
593
return false;
594
}
595
596
int bnxt_re_set_dbq_throttling_reg(struct bnxt_re_dev *rdev, u16 nq_id, u32 throttle)
597
{
598
u32 cag_ring_water_mark = 0, read_val;
599
u32 throttle_val;
600
601
/* Convert throttle percentage to value */
602
throttle_val = (rdev->qplib_res.pacing_data->fifo_max_depth * throttle) / 100;
603
604
if (bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) {
605
cag_ring_water_mark = (nq_id & CAG_RING_MASK) << CAG_RING_SHIFT |
606
(throttle_val & WATERMARK_MASK);
607
writel_fbsd(rdev->en_dev->softc, rdev->dbr_throttling_reg_off, 0, cag_ring_water_mark);
608
read_val = readl_fbsd(rdev->en_dev->softc , rdev->dbr_throttling_reg_off, 0);
609
dev_dbg(rdev_to_dev(rdev),
610
"%s: dbr_throttling_reg_off read_val = 0x%x\n",
611
__func__, read_val);
612
if (read_val != cag_ring_water_mark) {
613
dev_dbg(rdev_to_dev(rdev),
614
"nq_id = %d write_val=0x%x read_val=0x%x\n",
615
nq_id, cag_ring_water_mark, read_val);
616
return 1;
617
}
618
}
619
writel_fbsd(rdev->en_dev->softc, rdev->dbr_aeq_arm_reg_off, 0, 1);
620
return 0;
621
}
622
623
static void bnxt_re_set_dbq_throttling_for_non_primary(struct bnxt_re_dev *rdev)
624
{
625
struct bnxt_dbq_nq_list *nq_list;
626
struct bnxt_qplib_nq *nq;
627
int i;
628
629
nq_list = &rdev->nq_list;
630
/* Run a loop for other Active functions if this is primary function */
631
if (bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx)) {
632
dev_dbg(rdev_to_dev(rdev), "%s: nq_list->num_nql_entries= %d\n",
633
__func__, nq_list->num_nql_entries);
634
nq = &rdev->nqr.nq[0];
635
for (i = nq_list->num_nql_entries - 1; i > 0; i--) {
636
u16 nq_id = nq_list->nq_id[i];
637
if (nq)
638
dev_dbg(rdev_to_dev(rdev),
639
"%s: nq_id = %d cur_fn_ring_id = %d\n",
640
__func__, nq_id, nq->ring_id);
641
if (bnxt_re_set_dbq_throttling_reg
642
(rdev, nq_id, 0))
643
break;
644
bnxt_re_check_if_dbq_intr_triggered(rdev);
645
}
646
}
647
}
648
649
static void bnxt_re_handle_dbr_nq_pacing_notification(struct bnxt_re_dev *rdev)
650
{
651
struct bnxt_qplib_nq *nq;
652
int rc = 0;
653
654
nq = &rdev->nqr.nq[0];
655
656
/* Query the NQ list*/
657
rc = bnxt_re_hwrm_dbr_pacing_nqlist_query(rdev);
658
if (rc) {
659
dev_err(rdev_to_dev(rdev),
660
"Failed to Query NQ list rc= %d", rc);
661
return;
662
}
663
/*Configure GRC access for Throttling and aeq_arm register */
664
writel_fbsd(rdev->en_dev->softc, BNXT_GRCPF_REG_WINDOW_BASE_OUT + 28, 0,
665
rdev->chip_ctx->dbr_aeq_arm_reg & BNXT_GRC_BASE_MASK);
666
667
rdev->dbr_throttling_reg_off =
668
(rdev->chip_ctx->dbr_throttling_reg &
669
BNXT_GRC_OFFSET_MASK) + 0x8000;
670
rdev->dbr_aeq_arm_reg_off =
671
(rdev->chip_ctx->dbr_aeq_arm_reg &
672
BNXT_GRC_OFFSET_MASK) + 0x8000;
673
674
bnxt_re_set_dbq_throttling_reg(rdev, nq->ring_id, rdev->dbq_watermark);
675
}
676
677
static void bnxt_re_dbq_wq_task(struct work_struct *work)
678
{
679
struct bnxt_re_dbq_work *dbq_work =
680
container_of(work, struct bnxt_re_dbq_work, work);
681
struct bnxt_re_dev *rdev;
682
683
rdev = dbq_work->rdev;
684
685
if (!rdev)
686
goto exit;
687
switch (dbq_work->event) {
688
case BNXT_RE_DBQ_EVENT_SCHED:
689
dev_dbg(rdev_to_dev(rdev), "%s: Handle DBQ Pacing event\n",
690
__func__);
691
if (!bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx))
692
bnxt_re_hwrm_dbr_pacing_broadcast_event(rdev);
693
else
694
bnxt_re_pacing_alert(rdev);
695
break;
696
case BNXT_RE_DBR_PACING_EVENT:
697
dev_dbg(rdev_to_dev(rdev), "%s: Sched interrupt/pacing worker\n",
698
__func__);
699
if (_is_chip_p7(rdev->chip_ctx))
700
bnxt_re_pacing_alert(rdev);
701
else if (!rdev->chip_ctx->modes.dbr_pacing_v0)
702
bnxt_re_hwrm_dbr_pacing_qcfg(rdev);
703
break;
704
case BNXT_RE_DBR_NQ_PACING_NOTIFICATION:
705
bnxt_re_handle_dbr_nq_pacing_notification(rdev);
706
/* Issue a broadcast event to notify other functions
707
* that primary changed
708
*/
709
bnxt_re_hwrm_dbr_pacing_broadcast_event(rdev);
710
break;
711
}
712
exit:
713
kfree(dbq_work);
714
}
715
716
static void bnxt_re_async_notifier(void *handle, struct hwrm_async_event_cmpl *cmpl)
717
{
718
struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
719
struct bnxt_re_dcb_work *dcb_work;
720
struct bnxt_re_dbq_work *dbq_work;
721
struct bnxt_re_dev *rdev;
722
u16 event_id;
723
u32 data1;
724
u32 data2 = 0;
725
726
if (!cmpl) {
727
pr_err("Async event, bad completion\n");
728
return;
729
}
730
731
if (!en_info || !en_info->en_dev) {
732
pr_err("Async event, bad en_info or en_dev\n");
733
return;
734
}
735
rdev = en_info->rdev;
736
737
event_id = le16_to_cpu(cmpl->event_id);
738
data1 = le32_to_cpu(cmpl->event_data1);
739
data2 = le32_to_cpu(cmpl->event_data2);
740
741
if (!rdev || !rdev_to_dev(rdev)) {
742
dev_dbg(NULL, "Async event, bad rdev or netdev\n");
743
return;
744
}
745
746
if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags) ||
747
!test_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
748
dev_dbg(NULL, "Async event, device already detached\n");
749
return;
750
}
751
if (data2 >= 0)
752
dev_dbg(rdev_to_dev(rdev), "Async event_id = %d data1 = %d data2 = %d",
753
event_id, data1, data2);
754
755
switch (event_id) {
756
case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE:
757
/* Not handling the event in older FWs */
758
if (!is_qport_service_type_supported(rdev))
759
break;
760
if (!rdev->dcb_wq)
761
break;
762
dcb_work = kzalloc(sizeof(*dcb_work), GFP_ATOMIC);
763
if (!dcb_work)
764
break;
765
766
dcb_work->rdev = rdev;
767
memcpy(&dcb_work->cmpl, cmpl, sizeof(*cmpl));
768
INIT_WORK(&dcb_work->work, bnxt_re_dcb_wq_task);
769
queue_work(rdev->dcb_wq, &dcb_work->work);
770
break;
771
case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY:
772
if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) {
773
/* Set rcfw flag to control commands send to Bono */
774
set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
775
/* Set bnxt_re flag to control commands send via L2 driver */
776
set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
777
wake_up_all(&rdev->rcfw.cmdq.waitq);
778
}
779
break;
780
case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD:
781
if (!rdev->dbr_pacing)
782
break;
783
dbq_work = kzalloc(sizeof(*dbq_work), GFP_ATOMIC);
784
if (!dbq_work)
785
goto unlock;
786
dbq_work->rdev = rdev;
787
dbq_work->event = BNXT_RE_DBR_PACING_EVENT;
788
INIT_WORK(&dbq_work->work, bnxt_re_dbq_wq_task);
789
queue_work(rdev->dbq_wq, &dbq_work->work);
790
rdev->dbr_sw_stats->dbq_int_recv++;
791
break;
792
case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE:
793
if (!rdev->dbr_pacing)
794
break;
795
796
dbq_work = kzalloc(sizeof(*dbq_work), GFP_ATOMIC);
797
if (!dbq_work)
798
goto unlock;
799
dbq_work->rdev = rdev;
800
dbq_work->event = BNXT_RE_DBR_NQ_PACING_NOTIFICATION;
801
INIT_WORK(&dbq_work->work, bnxt_re_dbq_wq_task);
802
queue_work(rdev->dbq_wq, &dbq_work->work);
803
break;
804
805
default:
806
break;
807
}
808
unlock:
809
return;
810
}
811
812
static void bnxt_re_db_fifo_check(struct work_struct *work)
813
{
814
struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
815
dbq_fifo_check_work);
816
struct bnxt_qplib_db_pacing_data *pacing_data;
817
u32 pacing_save;
818
819
if (!mutex_trylock(&rdev->dbq_lock))
820
return;
821
pacing_data = rdev->qplib_res.pacing_data;
822
pacing_save = rdev->do_pacing_save;
823
__wait_for_fifo_occupancy_below_th(rdev);
824
cancel_delayed_work_sync(&rdev->dbq_pacing_work);
825
if (rdev->dbr_recovery_on)
826
goto recovery_on;
827
if (pacing_save > rdev->dbr_def_do_pacing) {
828
/* Double the do_pacing value during the congestion */
829
pacing_save = pacing_save << 1;
830
} else {
831
/*
832
* when a new congestion is detected increase the do_pacing
833
* by 8 times. And also increase the pacing_th by 4 times. The
834
* reason to increase pacing_th is to give more space for the
835
* queue to oscillate down without getting empty, but also more
836
* room for the queue to increase without causing another alarm.
837
*/
838
pacing_save = pacing_save << 3;
839
pacing_data->pacing_th = rdev->pacing_algo_th * 4;
840
}
841
842
if (pacing_save > BNXT_RE_MAX_DBR_DO_PACING)
843
pacing_save = BNXT_RE_MAX_DBR_DO_PACING;
844
845
pacing_data->do_pacing = pacing_save;
846
rdev->do_pacing_save = pacing_data->do_pacing;
847
pacing_data->alarm_th =
848
pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE(rdev->chip_ctx);
849
recovery_on:
850
schedule_delayed_work(&rdev->dbq_pacing_work,
851
msecs_to_jiffies(rdev->dbq_pacing_time));
852
rdev->dbr_sw_stats->dbq_pacing_alerts++;
853
mutex_unlock(&rdev->dbq_lock);
854
}
855
856
static void bnxt_re_pacing_timer_exp(struct work_struct *work)
857
{
858
struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
859
dbq_pacing_work.work);
860
struct bnxt_qplib_db_pacing_data *pacing_data;
861
u32 read_val, fifo_occup;
862
struct bnxt_qplib_nq *nq;
863
864
if (!mutex_trylock(&rdev->dbq_lock))
865
return;
866
867
pacing_data = rdev->qplib_res.pacing_data;
868
read_val = readl_fbsd(rdev->en_dev->softc , rdev->dbr_db_fifo_reg_off, 0);
869
fifo_occup = pacing_data->fifo_max_depth -
870
((read_val & pacing_data->fifo_room_mask) >>
871
pacing_data->fifo_room_shift);
872
873
if (fifo_occup > pacing_data->pacing_th)
874
goto restart_timer;
875
876
/*
877
* Instead of immediately going back to the default do_pacing
878
* reduce it by 1/8 times and restart the timer.
879
*/
880
pacing_data->do_pacing = pacing_data->do_pacing - (pacing_data->do_pacing >> 3);
881
pacing_data->do_pacing = max_t(u32, rdev->dbr_def_do_pacing, pacing_data->do_pacing);
882
/*
883
* If the fifo_occup is less than the interrupt enable threshold
884
* enable the interrupt on the primary PF.
885
*/
886
if (rdev->dbq_int_disable && fifo_occup < rdev->pacing_en_int_th) {
887
if (bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx)) {
888
if (!rdev->chip_ctx->modes.dbr_pacing_v0) {
889
nq = &rdev->nqr.nq[0];
890
bnxt_re_set_dbq_throttling_reg(rdev, nq->ring_id,
891
rdev->dbq_watermark);
892
rdev->dbr_sw_stats->dbq_int_en++;
893
rdev->dbq_int_disable = false;
894
}
895
}
896
}
897
if (pacing_data->do_pacing <= rdev->dbr_def_do_pacing) {
898
bnxt_re_set_default_pacing_data(rdev);
899
rdev->dbr_sw_stats->dbq_pacing_complete++;
900
goto dbq_unlock;
901
}
902
restart_timer:
903
schedule_delayed_work(&rdev->dbq_pacing_work,
904
msecs_to_jiffies(rdev->dbq_pacing_time));
905
bnxt_re_update_do_pacing_slabs(rdev);
906
rdev->dbr_sw_stats->dbq_pacing_resched++;
907
dbq_unlock:
908
rdev->do_pacing_save = pacing_data->do_pacing;
909
mutex_unlock(&rdev->dbq_lock);
910
}
911
912
void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev)
913
{
914
struct bnxt_qplib_db_pacing_data *pacing_data;
915
916
if (!rdev->dbr_pacing)
917
return;
918
mutex_lock(&rdev->dbq_lock);
919
pacing_data = rdev->qplib_res.pacing_data;
920
921
/*
922
* Increase the alarm_th to max so that other user lib instances do not
923
* keep alerting the driver.
924
*/
925
pacing_data->alarm_th = pacing_data->fifo_max_depth;
926
pacing_data->do_pacing = BNXT_RE_MAX_DBR_DO_PACING;
927
cancel_work_sync(&rdev->dbq_fifo_check_work);
928
schedule_work(&rdev->dbq_fifo_check_work);
929
mutex_unlock(&rdev->dbq_lock);
930
}
931
932
void bnxt_re_schedule_dbq_event(struct bnxt_qplib_res *res)
933
{
934
struct bnxt_re_dbq_work *dbq_work;
935
struct bnxt_re_dev *rdev;
936
937
rdev = container_of(res, struct bnxt_re_dev, qplib_res);
938
939
atomic_set(&rdev->dbq_intr_running, 1);
940
941
if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
942
goto exit;
943
/* Run the loop to send dbq event to other functions
944
* for newer FW
945
*/
946
if (bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx) &&
947
!rdev->chip_ctx->modes.dbr_pacing_v0)
948
bnxt_re_set_dbq_throttling_for_non_primary(rdev);
949
950
dbq_work = kzalloc(sizeof(*dbq_work), GFP_ATOMIC);
951
if (!dbq_work)
952
goto exit;
953
dbq_work->rdev = rdev;
954
dbq_work->event = BNXT_RE_DBQ_EVENT_SCHED;
955
INIT_WORK(&dbq_work->work, bnxt_re_dbq_wq_task);
956
queue_work(rdev->dbq_wq, &dbq_work->work);
957
rdev->dbr_sw_stats->dbq_int_recv++;
958
rdev->dbq_int_disable = true;
959
exit:
960
atomic_set(&rdev->dbq_intr_running, 0);
961
}
962
963
static void bnxt_re_free_msix(struct bnxt_re_dev *rdev)
964
{
965
struct bnxt_en_dev *en_dev = rdev->en_dev;
966
int rc;
967
968
rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP);
969
if (rc)
970
dev_err(rdev_to_dev(rdev), "netdev %p free_msix failed! rc = 0x%x",
971
rdev->netdev, rc);
972
}
973
974
static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
975
{
976
struct bnxt_en_dev *en_dev = rdev->en_dev;
977
int rc = 0, num_msix_want, num_msix_got;
978
struct bnxt_msix_entry *entry;
979
980
/*
981
* Request MSIx based on the function type. This is
982
* a temporory solution to enable max VFs when NPAR is
983
* enabled.
984
* TODO - change the scheme with an adapter specific check
985
* as the latest adapters can support more NQs. For now
986
* this change satisfy all adapter versions.
987
*/
988
989
if (rdev->is_virtfn)
990
num_msix_want = BNXT_RE_MAX_MSIX_VF;
991
else if (BNXT_EN_NPAR(en_dev))
992
num_msix_want = BNXT_RE_MAX_MSIX_NPAR_PF;
993
else if (_is_chip_gen_p5_p7(rdev->chip_ctx))
994
num_msix_want = rdev->num_msix_requested ?: BNXT_RE_MAX_MSIX_GEN_P5_PF;
995
else
996
num_msix_want = BNXT_RE_MAX_MSIX_PF;
997
998
/*
999
* Since MSIX vectors are used for both NQs and CREQ, we should try to
1000
* allocate num_online_cpus + 1 by taking into account the CREQ. This
1001
* leaves the number of MSIX vectors for NQs match the number of CPUs
1002
* and allows the system to be fully utilized
1003
*/
1004
num_msix_want = min_t(u32, num_msix_want, num_online_cpus() + 1);
1005
num_msix_want = min_t(u32, num_msix_want, BNXT_RE_MAX_MSIX);
1006
num_msix_want = max_t(u32, num_msix_want, BNXT_RE_MIN_MSIX);
1007
1008
entry = rdev->nqr.msix_entries;
1009
1010
num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP,
1011
entry, num_msix_want);
1012
if (num_msix_got < BNXT_RE_MIN_MSIX) {
1013
rc = -EINVAL;
1014
goto done;
1015
}
1016
if (num_msix_got != num_msix_want)
1017
dev_warn(rdev_to_dev(rdev),
1018
"bnxt_request_msix: wanted %d vectors, got %d\n",
1019
num_msix_want, num_msix_got);
1020
1021
rdev->nqr.num_msix = num_msix_got;
1022
return 0;
1023
done:
1024
if (num_msix_got)
1025
bnxt_re_free_msix(rdev);
1026
return rc;
1027
}
1028
1029
static int __wait_for_ib_unregister(struct bnxt_re_dev *rdev,
1030
struct bnxt_re_en_dev_info *en_info)
1031
{
1032
u64 timeout = 0;
1033
u32 cur_prod = 0, cur_cons = 0;
1034
int retry = 0, rc = 0, ret = 0;
1035
1036
cur_prod = rdev->rcfw.cmdq.hwq.prod;
1037
cur_cons = rdev->rcfw.cmdq.hwq.cons;
1038
timeout = msecs_to_jiffies(BNXT_RE_RECOVERY_IB_UNINIT_WAIT_TIME_MS);
1039
retry = BNXT_RE_RECOVERY_IB_UNINIT_WAIT_RETRY;
1040
/* During module exit, increase timeout ten-fold to 100 mins to wait
1041
* as long as possible for ib_unregister() to complete
1042
*/
1043
if (rdev->mod_exit)
1044
retry *= 10;
1045
do {
1046
/*
1047
* Since the caller of this function invokes with bnxt_re_mutex held,
1048
* release it to avoid holding a lock while in wait / sleep mode.
1049
*/
1050
mutex_unlock(&bnxt_re_mutex);
1051
rc = wait_event_timeout(en_info->waitq,
1052
en_info->ib_uninit_done,
1053
timeout);
1054
mutex_lock(&bnxt_re_mutex);
1055
1056
if (!bnxt_re_is_rdev_valid(rdev))
1057
break;
1058
1059
if (rc)
1060
break;
1061
1062
if (!RCFW_NO_FW_ACCESS(&rdev->rcfw)) {
1063
/* No need to check for cmdq stall during module exit,
1064
* wait for ib unregister to complete
1065
*/
1066
if (!rdev->mod_exit)
1067
ret = __check_cmdq_stall(&rdev->rcfw, &cur_prod, &cur_cons);
1068
if (ret || en_info->ib_uninit_done)
1069
break;
1070
}
1071
} while (retry--);
1072
1073
return rc;
1074
}
1075
1076
static int bnxt_re_handle_start(struct auxiliary_device *adev)
1077
{
1078
struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
1079
struct bnxt_re_dev *rdev = NULL;
1080
struct ifnet *real_dev;
1081
struct bnxt_en_dev *en_dev;
1082
struct ifnet *netdev;
1083
int rc = 0;
1084
1085
if (!en_info || !en_info->en_dev) {
1086
pr_err("Start, bad en_info or en_dev\n");
1087
return -EINVAL;
1088
}
1089
netdev = en_info->en_dev->net;
1090
if (en_info->rdev) {
1091
dev_info(rdev_to_dev(en_info->rdev),
1092
"%s: Device is already added adev %p rdev: %p\n",
1093
__func__, adev, en_info->rdev);
1094
return 0;
1095
}
1096
1097
en_dev = en_info->en_dev;
1098
real_dev = rdma_vlan_dev_real_dev(netdev);
1099
if (!real_dev)
1100
real_dev = netdev;
1101
rc = bnxt_re_add_device(&rdev, real_dev,
1102
en_info->gsi_mode,
1103
BNXT_RE_POST_RECOVERY_INIT,
1104
en_info->wqe_mode,
1105
en_info->num_msix_requested, adev);
1106
if (rc) {
1107
/* Add device failed. Unregister the device.
1108
* This has to be done explicitly as
1109
* bnxt_re_stop would not have unregistered
1110
*/
1111
rtnl_lock();
1112
en_dev->en_ops->bnxt_unregister_device(en_dev, BNXT_ROCE_ULP);
1113
rtnl_unlock();
1114
mutex_lock(&bnxt_re_dev_lock);
1115
gadd_dev_inprogress--;
1116
mutex_unlock(&bnxt_re_dev_lock);
1117
return rc;
1118
}
1119
rdev->adev = adev;
1120
rtnl_lock();
1121
bnxt_re_get_link_speed(rdev);
1122
rtnl_unlock();
1123
rc = bnxt_re_ib_init(rdev);
1124
if (rc) {
1125
dev_err(rdev_to_dev(rdev), "Failed ib_init\n");
1126
return rc;
1127
}
1128
bnxt_re_ib_init_2(rdev);
1129
1130
return rc;
1131
}
1132
1133
static void bnxt_re_stop(void *handle)
1134
{
1135
struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
1136
struct ifnet *netdev;
1137
struct bnxt_re_dev *rdev;
1138
struct bnxt_en_dev *en_dev;
1139
int rc = 0;
1140
1141
rtnl_unlock();
1142
mutex_lock(&bnxt_re_mutex);
1143
if (!en_info || !en_info->en_dev) {
1144
pr_err("Stop, bad en_info or en_dev\n");
1145
goto exit;
1146
}
1147
netdev = en_info->en_dev->net;
1148
rdev = en_info->rdev;
1149
if (!rdev)
1150
goto exit;
1151
1152
if (!bnxt_re_is_rdev_valid(rdev))
1153
goto exit;
1154
1155
/*
1156
* Check if fw has undergone reset or is in a fatal condition.
1157
* If so, set flags so that no further commands are sent down to FW
1158
*/
1159
en_dev = rdev->en_dev;
1160
if (en_dev->en_state & BNXT_STATE_FW_FATAL_COND ||
1161
en_dev->en_state & BNXT_STATE_FW_RESET_DET) {
1162
/* Set rcfw flag to control commands send to Bono */
1163
set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
1164
/* Set bnxt_re flag to control commands send via L2 driver */
1165
set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
1166
wake_up_all(&rdev->rcfw.cmdq.waitq);
1167
}
1168
1169
if (test_bit(BNXT_RE_FLAG_STOP_IN_PROGRESS, &rdev->flags))
1170
goto exit;
1171
set_bit(BNXT_RE_FLAG_STOP_IN_PROGRESS, &rdev->flags);
1172
1173
en_info->wqe_mode = rdev->chip_ctx->modes.wqe_mode;
1174
en_info->gsi_mode = rdev->gsi_ctx.gsi_qp_mode;
1175
en_info->num_msix_requested = rdev->num_msix_requested;
1176
en_info->ib_uninit_done = false;
1177
1178
if (rdev->dbr_pacing)
1179
bnxt_re_set_pacing_dev_state(rdev);
1180
1181
dev_info(rdev_to_dev(rdev), "%s: L2 driver notified to stop."
1182
"Attempting to stop and Dispatching event "
1183
"to inform the stack\n", __func__);
1184
init_waitqueue_head(&en_info->waitq);
1185
/* Schedule a work item to handle IB UNINIT for recovery */
1186
bnxt_re_schedule_work(rdev, NETDEV_UNREGISTER,
1187
NULL, netdev, rdev->adev);
1188
rc = __wait_for_ib_unregister(rdev, en_info);
1189
if (!bnxt_re_is_rdev_valid(rdev))
1190
goto exit;
1191
if (!rc) {
1192
dev_info(rdev_to_dev(rdev), "%s: Attempt to stop failed\n",
1193
__func__);
1194
bnxt_re_detach_err_device(rdev);
1195
goto exit;
1196
}
1197
bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, rdev->adev);
1198
exit:
1199
mutex_unlock(&bnxt_re_mutex);
1200
/* Take rtnl_lock before return, bnxt_re_stop is called with rtnl_lock */
1201
rtnl_lock();
1202
1203
return;
1204
}
1205
1206
static void bnxt_re_start(void *handle)
1207
{
1208
rtnl_unlock();
1209
mutex_lock(&bnxt_re_mutex);
1210
if (bnxt_re_handle_start((struct auxiliary_device *)handle))
1211
pr_err("Failed to start RoCE device");
1212
mutex_unlock(&bnxt_re_mutex);
1213
/* Take rtnl_lock before return, bnxt_re_start is called with rtnl_lock */
1214
rtnl_lock();
1215
return;
1216
}
1217
1218
static void bnxt_re_shutdown(void *p)
1219
{
1220
struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(p);
1221
struct bnxt_re_dev *rdev;
1222
1223
if (!en_info) {
1224
pr_err("Shutdown, bad en_info\n");
1225
return;
1226
}
1227
rtnl_unlock();
1228
mutex_lock(&bnxt_re_mutex);
1229
rdev = en_info->rdev;
1230
if (!rdev || !bnxt_re_is_rdev_valid(rdev))
1231
goto exit;
1232
1233
/* rtnl_lock held by L2 before coming here */
1234
bnxt_re_stopqps_and_ib_uninit(rdev);
1235
bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, rdev->adev);
1236
exit:
1237
mutex_unlock(&bnxt_re_mutex);
1238
rtnl_lock();
1239
return;
1240
}
1241
1242
static void bnxt_re_stop_irq(void *handle)
1243
{
1244
struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
1245
struct bnxt_qplib_rcfw *rcfw = NULL;
1246
struct bnxt_re_dev *rdev;
1247
struct bnxt_qplib_nq *nq;
1248
int indx;
1249
1250
if (!en_info) {
1251
pr_err("Stop irq, bad en_info\n");
1252
return;
1253
}
1254
rdev = en_info->rdev;
1255
1256
if (!rdev)
1257
return;
1258
1259
rcfw = &rdev->rcfw;
1260
for (indx = 0; indx < rdev->nqr.max_init; indx++) {
1261
nq = &rdev->nqr.nq[indx];
1262
mutex_lock(&nq->lock);
1263
bnxt_qplib_nq_stop_irq(nq, false);
1264
mutex_unlock(&nq->lock);
1265
}
1266
1267
if (test_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags))
1268
bnxt_qplib_rcfw_stop_irq(rcfw, false);
1269
}
1270
1271
static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
1272
{
1273
struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
1274
struct bnxt_msix_entry *msix_ent = NULL;
1275
struct bnxt_qplib_rcfw *rcfw = NULL;
1276
struct bnxt_re_dev *rdev;
1277
struct bnxt_qplib_nq *nq;
1278
int indx, rc, vec;
1279
1280
if (!en_info) {
1281
pr_err("Start irq, bad en_info\n");
1282
return;
1283
}
1284
rdev = en_info->rdev;
1285
if (!rdev)
1286
return;
1287
if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
1288
return;
1289
msix_ent = rdev->nqr.msix_entries;
1290
rcfw = &rdev->rcfw;
1291
1292
if (!ent) {
1293
/* Not setting the f/w timeout bit in rcfw.
1294
* During the driver unload the first command
1295
* to f/w will timeout and that will set the
1296
* timeout bit.
1297
*/
1298
dev_err(rdev_to_dev(rdev), "Failed to re-start IRQs\n");
1299
return;
1300
}
1301
1302
/* Vectors may change after restart, so update with new vectors
1303
* in device structure.
1304
*/
1305
for (indx = 0; indx < rdev->nqr.num_msix; indx++)
1306
rdev->nqr.msix_entries[indx].vector = ent[indx].vector;
1307
1308
if (test_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags)) {
1309
rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
1310
false);
1311
if (rc) {
1312
dev_warn(rdev_to_dev(rdev),
1313
"Failed to reinit CREQ\n");
1314
return;
1315
}
1316
}
1317
for (indx = 0 ; indx < rdev->nqr.max_init; indx++) {
1318
nq = &rdev->nqr.nq[indx];
1319
vec = indx + 1;
1320
rc = bnxt_qplib_nq_start_irq(nq, indx, msix_ent[vec].vector,
1321
false);
1322
if (rc) {
1323
dev_warn(rdev_to_dev(rdev),
1324
"Failed to reinit NQ index %d\n", indx);
1325
return;
1326
}
1327
}
1328
}
1329
1330
/*
1331
* Except for ulp_async_notifier, the remaining ulp_ops
1332
* below are called with rtnl_lock held
1333
*/
1334
static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
1335
.ulp_async_notifier = bnxt_re_async_notifier,
1336
.ulp_stop = bnxt_re_stop,
1337
.ulp_start = bnxt_re_start,
1338
.ulp_shutdown = bnxt_re_shutdown,
1339
.ulp_irq_stop = bnxt_re_stop_irq,
1340
.ulp_irq_restart = bnxt_re_start_irq,
1341
};
1342
1343
static inline const char *bnxt_re_netevent(unsigned long event)
1344
{
1345
BNXT_RE_NETDEV_EVENT(event, NETDEV_UP);
1346
BNXT_RE_NETDEV_EVENT(event, NETDEV_DOWN);
1347
BNXT_RE_NETDEV_EVENT(event, NETDEV_CHANGE);
1348
BNXT_RE_NETDEV_EVENT(event, NETDEV_REGISTER);
1349
BNXT_RE_NETDEV_EVENT(event, NETDEV_UNREGISTER);
1350
BNXT_RE_NETDEV_EVENT(event, NETDEV_CHANGEADDR);
1351
return "Unknown";
1352
}
1353
1354
/* RoCE -> Net driver */
1355
1356
/* Driver registration routines used to let the networking driver (bnxt_en)
1357
* to know that the RoCE driver is now installed */
1358
static void bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev)
1359
{
1360
struct bnxt_en_dev *en_dev = rdev->en_dev;
1361
int rc;
1362
1363
rtnl_lock();
1364
rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev,
1365
BNXT_ROCE_ULP);
1366
rtnl_unlock();
1367
if (rc)
1368
dev_err(rdev_to_dev(rdev), "netdev %p unregister failed! rc = 0x%x",
1369
rdev->en_dev->net, rc);
1370
1371
clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
1372
}
1373
1374
static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
1375
{
1376
struct bnxt_en_dev *en_dev = rdev->en_dev;
1377
int rc = 0;
1378
1379
rtnl_lock();
1380
rc = en_dev->en_ops->bnxt_register_device(en_dev,
1381
BNXT_ROCE_ULP,
1382
&bnxt_re_ulp_ops,
1383
rdev->adev);
1384
rtnl_unlock();
1385
if (rc) {
1386
dev_err(rdev_to_dev(rdev), "netdev %p register failed! rc = 0x%x",
1387
rdev->netdev, rc);
1388
return rc;
1389
}
1390
1391
return rc;
1392
}
1393
1394
static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev)
1395
{
1396
struct bnxt_qplib_chip_ctx *cctx;
1397
struct bnxt_en_dev *en_dev;
1398
struct bnxt_qplib_res *res;
1399
u32 l2db_len = 0;
1400
u32 offset = 0;
1401
u32 barlen;
1402
int rc;
1403
1404
res = &rdev->qplib_res;
1405
en_dev = rdev->en_dev;
1406
cctx = rdev->chip_ctx;
1407
1408
/* Issue qcfg */
1409
rc = bnxt_re_hwrm_qcfg(rdev, &l2db_len, &offset);
1410
if (rc)
1411
dev_info(rdev_to_dev(rdev),
1412
"Couldn't get DB bar size, Low latency framework is disabled\n");
1413
/* set register offsets for both UC and WC */
1414
if (_is_chip_p7(cctx))
1415
res->dpi_tbl.ucreg.offset = offset;
1416
else
1417
res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET :
1418
BNXT_QPLIB_DBR_PF_DB_OFFSET;
1419
res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset;
1420
1421
/* If WC mapping is disabled by L2 driver then en_dev->l2_db_size
1422
* is equal to the DB-Bar actual size. This indicates that L2
1423
* is mapping entire bar as UC-. RoCE driver can't enable WC mapping
1424
* in such cases and DB-push will be disabled.
1425
*/
1426
barlen = pci_resource_len(res->pdev, RCFW_DBR_PCI_BAR_REGION);
1427
if (cctx->modes.db_push && l2db_len && en_dev->l2_db_size != barlen) {
1428
res->dpi_tbl.wcreg.offset = en_dev->l2_db_size;
1429
dev_info(rdev_to_dev(rdev),
1430
"Low latency framework is enabled\n");
1431
}
1432
1433
return;
1434
}
1435
1436
static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode)
1437
{
1438
struct bnxt_qplib_chip_ctx *cctx;
1439
struct bnxt_en_dev *en_dev;
1440
1441
en_dev = rdev->en_dev;
1442
cctx = rdev->chip_ctx;
1443
cctx->modes.wqe_mode = _is_chip_gen_p5_p7(rdev->chip_ctx) ?
1444
mode : BNXT_QPLIB_WQE_MODE_STATIC;
1445
cctx->modes.te_bypass = false;
1446
if (bnxt_re_hwrm_qcaps(rdev))
1447
dev_err(rdev_to_dev(rdev),
1448
"Failed to query hwrm qcaps\n");
1449
/*
1450
* TODO: Need a better mechanism for spreading of the
1451
* 512 extended PPP pages in the presence of VF and
1452
* NPAR, until then not enabling push
1453
*/
1454
if (_is_chip_p7(rdev->chip_ctx) && cctx->modes.db_push) {
1455
if (rdev->is_virtfn || BNXT_EN_NPAR(en_dev))
1456
cctx->modes.db_push = false;
1457
}
1458
1459
rdev->roce_mode = en_dev->flags & BNXT_EN_FLAG_ROCE_CAP;
1460
dev_dbg(rdev_to_dev(rdev),
1461
"RoCE is supported on the device - caps:0x%x",
1462
rdev->roce_mode);
1463
if (!_is_chip_gen_p5_p7(rdev->chip_ctx))
1464
rdev->roce_mode = BNXT_RE_FLAG_ROCEV2_CAP;
1465
cctx->hw_stats_size = en_dev->hw_ring_stats_size;
1466
}
1467
1468
static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
1469
{
1470
struct bnxt_qplib_chip_ctx *chip_ctx;
1471
struct bnxt_qplib_res *res;
1472
1473
if (!rdev->chip_ctx)
1474
return;
1475
1476
res = &rdev->qplib_res;
1477
bnxt_qplib_unmap_db_bar(res);
1478
1479
kfree(res->hctx);
1480
res->rcfw = NULL;
1481
kfree(rdev->dev_attr);
1482
rdev->dev_attr = NULL;
1483
1484
chip_ctx = rdev->chip_ctx;
1485
rdev->chip_ctx = NULL;
1486
res->cctx = NULL;
1487
res->hctx = NULL;
1488
res->pdev = NULL;
1489
res->netdev = NULL;
1490
kfree(chip_ctx);
1491
}
1492
1493
static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
1494
{
1495
struct bnxt_qplib_chip_ctx *chip_ctx;
1496
struct bnxt_en_dev *en_dev;
1497
int rc;
1498
1499
en_dev = rdev->en_dev;
1500
/* Supply pci device to qplib */
1501
rdev->qplib_res.pdev = en_dev->pdev;
1502
rdev->qplib_res.netdev = rdev->netdev;
1503
rdev->qplib_res.en_dev = en_dev;
1504
1505
chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL);
1506
if (!chip_ctx)
1507
return -ENOMEM;
1508
rdev->chip_ctx = chip_ctx;
1509
rdev->qplib_res.cctx = chip_ctx;
1510
rc = bnxt_re_query_hwrm_intf_version(rdev);
1511
if (rc)
1512
goto fail;
1513
rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL);
1514
if (!rdev->dev_attr) {
1515
rc = -ENOMEM;
1516
goto fail;
1517
}
1518
rdev->qplib_res.dattr = rdev->dev_attr;
1519
rdev->qplib_res.rcfw = &rdev->rcfw;
1520
rdev->qplib_res.is_vf = rdev->is_virtfn;
1521
1522
rdev->qplib_res.hctx = kzalloc(sizeof(*rdev->qplib_res.hctx),
1523
GFP_KERNEL);
1524
if (!rdev->qplib_res.hctx) {
1525
rc = -ENOMEM;
1526
goto fail;
1527
}
1528
bnxt_re_set_drv_mode(rdev, wqe_mode);
1529
1530
bnxt_re_set_db_offset(rdev);
1531
rc = bnxt_qplib_map_db_bar(&rdev->qplib_res);
1532
if (rc)
1533
goto fail;
1534
1535
rc = bnxt_qplib_enable_atomic_ops_to_root(en_dev->pdev);
1536
if (rc)
1537
dev_dbg(rdev_to_dev(rdev),
1538
"platform doesn't support global atomics");
1539
1540
return 0;
1541
fail:
1542
kfree(rdev->chip_ctx);
1543
rdev->chip_ctx = NULL;
1544
1545
kfree(rdev->dev_attr);
1546
rdev->dev_attr = NULL;
1547
1548
kfree(rdev->qplib_res.hctx);
1549
rdev->qplib_res.hctx = NULL;
1550
return rc;
1551
}
1552
1553
static u16 bnxt_re_get_rtype(struct bnxt_re_dev *rdev) {
1554
return _is_chip_gen_p5_p7(rdev->chip_ctx) ?
1555
HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ :
1556
HWRM_RING_ALLOC_INPUT_RING_TYPE_ROCE_CMPL;
1557
}
1558
1559
static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id)
1560
{
1561
int rc = -EINVAL;
1562
struct hwrm_ring_free_input req = {0};
1563
struct hwrm_ring_free_output resp;
1564
struct bnxt_en_dev *en_dev = rdev->en_dev;
1565
struct bnxt_fw_msg fw_msg;
1566
1567
if (!en_dev)
1568
return rc;
1569
1570
/* To avoid unnecessary error messages during recovery.
1571
* HW is anyway in error state. So dont send down the command */
1572
if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
1573
return 0;
1574
1575
/* allocation had failed, no need to issue hwrm */
1576
if (fw_ring_id == 0xffff)
1577
return 0;
1578
1579
memset(&fw_msg, 0, sizeof(fw_msg));
1580
1581
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
1582
req.ring_type = bnxt_re_get_rtype(rdev);
1583
req.ring_id = cpu_to_le16(fw_ring_id);
1584
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1585
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1586
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1587
if (rc) {
1588
dev_err(rdev_to_dev(rdev),
1589
"Failed to free HW ring with rc = 0x%x", rc);
1590
return rc;
1591
}
1592
dev_dbg(rdev_to_dev(rdev), "HW ring freed with id = 0x%x\n",
1593
fw_ring_id);
1594
1595
return rc;
1596
}
1597
1598
static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev,
1599
struct bnxt_re_ring_attr *ring_attr,
1600
u16 *fw_ring_id)
1601
{
1602
int rc = -EINVAL;
1603
struct hwrm_ring_alloc_input req = {0};
1604
struct hwrm_ring_alloc_output resp;
1605
struct bnxt_en_dev *en_dev = rdev->en_dev;
1606
struct bnxt_fw_msg fw_msg;
1607
1608
if (!en_dev)
1609
return rc;
1610
1611
memset(&fw_msg, 0, sizeof(fw_msg));
1612
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
1613
req.flags = cpu_to_le16(ring_attr->flags);
1614
req.enables = 0;
1615
req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]);
1616
if (ring_attr->pages > 1) {
1617
/* Page size is in log2 units */
1618
req.page_size = BNXT_PAGE_SHIFT;
1619
req.page_tbl_depth = 1;
1620
} else {
1621
req.page_size = 4;
1622
req.page_tbl_depth = 0;
1623
}
1624
1625
req.fbo = 0;
1626
/* Association of ring index with doorbell index and MSIX number */
1627
req.logical_id = cpu_to_le16(ring_attr->lrid);
1628
req.length = cpu_to_le32(ring_attr->depth + 1);
1629
req.ring_type = ring_attr->type;
1630
req.int_mode = ring_attr->mode;
1631
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1632
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1633
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1634
if (rc) {
1635
dev_err(rdev_to_dev(rdev),
1636
"Failed to allocate HW ring with rc = 0x%x", rc);
1637
return rc;
1638
}
1639
*fw_ring_id = le16_to_cpu(resp.ring_id);
1640
dev_dbg(rdev_to_dev(rdev),
1641
"HW ring allocated with id = 0x%x at slot 0x%x",
1642
resp.ring_id, ring_attr->lrid);
1643
1644
return rc;
1645
}
1646
1647
static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
1648
u32 fw_stats_ctx_id, u16 tid)
1649
{
1650
struct bnxt_en_dev *en_dev = rdev->en_dev;
1651
struct hwrm_stat_ctx_free_input req = {0};
1652
struct hwrm_stat_ctx_free_output resp;
1653
struct bnxt_fw_msg fw_msg;
1654
int rc = -EINVAL;
1655
1656
if (!en_dev)
1657
return rc;
1658
1659
/* To avoid unnecessary error messages during recovery.
1660
* HW is anyway in error state. So dont send down the command */
1661
if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
1662
return 0;
1663
memset(&fw_msg, 0, sizeof(fw_msg));
1664
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, tid);
1665
req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
1666
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1667
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1668
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1669
if (rc) {
1670
dev_err(rdev_to_dev(rdev),
1671
"Failed to free HW stats ctx with rc = 0x%x", rc);
1672
return rc;
1673
}
1674
dev_dbg(rdev_to_dev(rdev),
1675
"HW stats ctx freed with id = 0x%x", fw_stats_ctx_id);
1676
1677
return rc;
1678
}
1679
1680
static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, u16 tid)
1681
{
1682
struct hwrm_stat_ctx_alloc_output resp = {};
1683
struct hwrm_stat_ctx_alloc_input req = {};
1684
struct bnxt_en_dev *en_dev = rdev->en_dev;
1685
struct bnxt_qplib_stats *stat;
1686
struct bnxt_qplib_ctx *hctx;
1687
struct bnxt_fw_msg fw_msg;
1688
int rc = 0;
1689
1690
hctx = rdev->qplib_res.hctx;
1691
stat = (tid == 0xffff) ? &hctx->stats : &hctx->stats2;
1692
stat->fw_id = INVALID_STATS_CTX_ID;
1693
1694
if (!en_dev)
1695
return -EINVAL;
1696
1697
memset(&fw_msg, 0, sizeof(fw_msg));
1698
bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
1699
HWRM_STAT_CTX_ALLOC, -1, tid);
1700
req.update_period_ms = cpu_to_le32(1000);
1701
req.stats_dma_length = rdev->chip_ctx->hw_stats_size;
1702
req.stats_dma_addr = cpu_to_le64(stat->dma_map);
1703
req.stat_ctx_flags = HWRM_STAT_CTX_ALLOC_INPUT_STAT_CTX_FLAGS_ROCE;
1704
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1705
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1706
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1707
if (rc) {
1708
dev_err(rdev_to_dev(rdev),
1709
"Failed to allocate HW stats ctx, rc = 0x%x", rc);
1710
return rc;
1711
}
1712
stat->fw_id = le32_to_cpu(resp.stat_ctx_id);
1713
dev_dbg(rdev_to_dev(rdev), "HW stats ctx allocated with id = 0x%x",
1714
stat->fw_id);
1715
1716
return rc;
1717
}
1718
1719
static void bnxt_re_net_unregister_async_event(struct bnxt_re_dev *rdev)
1720
{
1721
const struct bnxt_en_ops *en_ops;
1722
1723
if (rdev->is_virtfn ||
1724
test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
1725
return;
1726
1727
memset(rdev->event_bitmap, 0, sizeof(rdev->event_bitmap));
1728
en_ops = rdev->en_dev->en_ops;
1729
if (en_ops->bnxt_register_fw_async_events
1730
(rdev->en_dev, BNXT_ROCE_ULP,
1731
(unsigned long *)rdev->event_bitmap,
1732
HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE))
1733
dev_err(rdev_to_dev(rdev),
1734
"Failed to unregister async event");
1735
}
1736
1737
static void bnxt_re_net_register_async_event(struct bnxt_re_dev *rdev)
1738
{
1739
const struct bnxt_en_ops *en_ops;
1740
1741
if (rdev->is_virtfn)
1742
return;
1743
1744
rdev->event_bitmap[0] |=
1745
BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE) |
1746
BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY);
1747
1748
rdev->event_bitmap[2] |=
1749
BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT - 64);
1750
rdev->event_bitmap[2] |=
1751
BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD - 64) |
1752
BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE - 64);
1753
en_ops = rdev->en_dev->en_ops;
1754
if (en_ops->bnxt_register_fw_async_events
1755
(rdev->en_dev, BNXT_ROCE_ULP,
1756
(unsigned long *)rdev->event_bitmap,
1757
HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE))
1758
dev_err(rdev_to_dev(rdev),
1759
"Failed to reg Async event");
1760
}
1761
1762
static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
1763
{
1764
struct bnxt_en_dev *en_dev = rdev->en_dev;
1765
struct hwrm_ver_get_output resp = {0};
1766
struct hwrm_ver_get_input req = {0};
1767
struct bnxt_qplib_chip_ctx *cctx;
1768
struct bnxt_fw_msg fw_msg;
1769
int rc = 0;
1770
1771
memset(&fw_msg, 0, sizeof(fw_msg));
1772
bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
1773
HWRM_VER_GET, -1, -1);
1774
req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
1775
req.hwrm_intf_min = HWRM_VERSION_MINOR;
1776
req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
1777
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1778
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1779
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1780
if (rc) {
1781
dev_err(rdev_to_dev(rdev),
1782
"Failed to query HW version, rc = 0x%x", rc);
1783
return rc;
1784
}
1785
cctx = rdev->chip_ctx;
1786
cctx->hwrm_intf_ver = (u64) le16_to_cpu(resp.hwrm_intf_major) << 48 |
1787
(u64) le16_to_cpu(resp.hwrm_intf_minor) << 32 |
1788
(u64) le16_to_cpu(resp.hwrm_intf_build) << 16 |
1789
le16_to_cpu(resp.hwrm_intf_patch);
1790
1791
cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout);
1792
1793
if (!cctx->hwrm_cmd_max_timeout)
1794
cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT;
1795
1796
cctx->chip_num = le16_to_cpu(resp.chip_num);
1797
cctx->chip_rev = resp.chip_rev;
1798
cctx->chip_metal = resp.chip_metal;
1799
return 0;
1800
}
1801
1802
/* Query device config using common hwrm */
1803
static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
1804
u32 *offset)
1805
{
1806
struct bnxt_en_dev *en_dev = rdev->en_dev;
1807
struct hwrm_func_qcfg_output resp = {0};
1808
struct hwrm_func_qcfg_input req = {0};
1809
struct bnxt_fw_msg fw_msg;
1810
int rc;
1811
1812
memset(&fw_msg, 0, sizeof(fw_msg));
1813
bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
1814
HWRM_FUNC_QCFG, -1, -1);
1815
req.fid = cpu_to_le16(0xffff);
1816
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1817
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1818
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1819
if (rc) {
1820
dev_err(rdev_to_dev(rdev),
1821
"Failed to query config, rc = %#x", rc);
1822
return rc;
1823
}
1824
1825
*db_len = PAGE_ALIGN(le16_to_cpu(resp.l2_doorbell_bar_size_kb) * 1024);
1826
*offset = PAGE_ALIGN(le16_to_cpu(resp.legacy_l2_db_size_kb) * 1024);
1827
return 0;
1828
}
1829
1830
/* Query function capabilities using common hwrm */
1831
int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev)
1832
{
1833
struct bnxt_en_dev *en_dev = rdev->en_dev;
1834
struct hwrm_func_qcaps_output resp = {0};
1835
struct hwrm_func_qcaps_input req = {0};
1836
struct bnxt_qplib_chip_ctx *cctx;
1837
struct bnxt_fw_msg fw_msg;
1838
u8 push_enable = false;
1839
int rc;
1840
1841
cctx = rdev->chip_ctx;
1842
memset(&fw_msg, 0, sizeof(fw_msg));
1843
bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
1844
HWRM_FUNC_QCAPS, -1, -1);
1845
req.fid = cpu_to_le16(0xffff);
1846
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1847
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1848
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1849
if (rc) {
1850
dev_err(rdev_to_dev(rdev),
1851
"Failed to query capabilities, rc = %#x", rc);
1852
return rc;
1853
}
1854
if (_is_chip_p7(rdev->chip_ctx))
1855
push_enable =
1856
(resp.flags_ext &
1857
HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT_PPP_PUSH_MODE_SUPPORTED) ?
1858
true : false;
1859
else
1860
push_enable =
1861
(resp.flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_WCB_PUSH_MODE) ?
1862
true : false;
1863
cctx->modes.db_push = push_enable;
1864
1865
cctx->modes.dbr_pacing =
1866
resp.flags_ext & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT_DBR_PACING_SUPPORTED ?
1867
true : false;
1868
cctx->modes.dbr_pacing_ext =
1869
resp.flags_ext2 &
1870
HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED ?
1871
true : false;
1872
cctx->modes.dbr_drop_recov =
1873
(resp.flags_ext2 &
1874
HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_SW_DBR_DROP_RECOVERY_SUPPORTED) ?
1875
true : false;
1876
cctx->modes.dbr_pacing_v0 =
1877
(resp.flags_ext2 &
1878
HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_DBR_PACING_V0_SUPPORTED) ?
1879
true : false;
1880
dev_dbg(rdev_to_dev(rdev),
1881
"%s: cctx->modes.dbr_pacing = %d cctx->modes.dbr_pacing_ext = %d, dbr_drop_recov %d\n",
1882
__func__, cctx->modes.dbr_pacing, cctx->modes.dbr_pacing_ext, cctx->modes.dbr_drop_recov);
1883
1884
return 0;
1885
}
1886
1887
static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev)
1888
{
1889
struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
1890
struct hwrm_func_dbr_pacing_qcfg_output resp = {0};
1891
struct hwrm_func_dbr_pacing_qcfg_input req = {0};
1892
struct bnxt_en_dev *en_dev = rdev->en_dev;
1893
struct bnxt_qplib_chip_ctx *cctx;
1894
struct bnxt_fw_msg fw_msg;
1895
u32 primary_nq_id;
1896
int rc;
1897
1898
cctx = rdev->chip_ctx;
1899
memset(&fw_msg, 0, sizeof(fw_msg));
1900
bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
1901
HWRM_FUNC_DBR_PACING_QCFG, -1, -1);
1902
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1903
sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
1904
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1905
if (rc) {
1906
dev_dbg(rdev_to_dev(rdev),
1907
"Failed to query dbr pacing config, rc = %#x", rc);
1908
return rc;
1909
}
1910
1911
primary_nq_id = le32_to_cpu(resp.primary_nq_id);
1912
if (primary_nq_id == 0xffffffff &&
1913
!bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) {
1914
dev_err(rdev_to_dev(rdev), "%s:%d Invoke bnxt_qplib_dbr_pacing_set_primary_pf with 1\n",
1915
__func__, __LINE__);
1916
bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 1);
1917
}
1918
1919
if (bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) {
1920
struct bnxt_qplib_nq *nq;
1921
1922
nq = &rdev->nqr.nq[0];
1923
/* Reset the primary capability */
1924
if (nq->ring_id != primary_nq_id)
1925
bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 0);
1926
}
1927
1928
if ((resp.dbr_stat_db_fifo_reg &
1929
HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK) ==
1930
HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC)
1931
cctx->dbr_stat_db_fifo =
1932
resp.dbr_stat_db_fifo_reg &
1933
~HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK;
1934
1935
if ((resp.dbr_throttling_aeq_arm_reg &
1936
HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_MASK)
1937
== HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_GRC) {
1938
cctx->dbr_aeq_arm_reg = resp.dbr_throttling_aeq_arm_reg &
1939
~HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK;
1940
cctx->dbr_throttling_reg = cctx->dbr_aeq_arm_reg - 4;
1941
}
1942
pacing_data->fifo_max_depth = le32_to_cpu(resp.dbr_stat_db_max_fifo_depth);
1943
if (!pacing_data->fifo_max_depth)
1944
pacing_data->fifo_max_depth = BNXT_RE_MAX_FIFO_DEPTH(cctx);
1945
pacing_data->fifo_room_mask = le32_to_cpu(resp.dbr_stat_db_fifo_reg_fifo_room_mask);
1946
pacing_data->fifo_room_shift = resp.dbr_stat_db_fifo_reg_fifo_room_shift;
1947
dev_dbg(rdev_to_dev(rdev),
1948
"%s: nq:0x%x primary_pf:%d db_fifo:0x%x aeq_arm:0x%x i"
1949
"fifo_max_depth 0x%x , resp.dbr_stat_db_max_fifo_depth 0x%x);\n",
1950
__func__, resp.primary_nq_id, cctx->modes.dbr_primary_pf,
1951
cctx->dbr_stat_db_fifo, cctx->dbr_aeq_arm_reg,
1952
pacing_data->fifo_max_depth,
1953
le32_to_cpu(resp.dbr_stat_db_max_fifo_depth));
1954
return 0;
1955
}
1956
1957
static int bnxt_re_hwrm_dbr_pacing_cfg(struct bnxt_re_dev *rdev, bool enable)
1958
{
1959
struct hwrm_func_dbr_pacing_cfg_output resp = {0};
1960
struct hwrm_func_dbr_pacing_cfg_input req = {0};
1961
struct bnxt_en_dev *en_dev = rdev->en_dev;
1962
struct bnxt_fw_msg fw_msg;
1963
int rc;
1964
1965
if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
1966
return 0;
1967
1968
memset(&fw_msg, 0, sizeof(fw_msg));
1969
bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
1970
HWRM_FUNC_DBR_PACING_CFG, -1, -1);
1971
if (enable) {
1972
req.flags = HWRM_FUNC_DBR_PACING_CFG_INPUT_FLAGS_DBR_NQ_EVENT_ENABLE;
1973
req.enables =
1974
cpu_to_le32(HWRM_FUNC_DBR_PACING_CFG_INPUT_ENABLES_PRIMARY_NQ_ID_VALID |
1975
HWRM_FUNC_DBR_PACING_CFG_INPUT_ENABLES_PACING_THRESHOLD_VALID);
1976
} else {
1977
req.flags = HWRM_FUNC_DBR_PACING_CFG_INPUT_FLAGS_DBR_NQ_EVENT_DISABLE;
1978
}
1979
req.primary_nq_id = cpu_to_le32(rdev->dbq_nq_id);
1980
req.pacing_threshold = cpu_to_le32(rdev->dbq_watermark);
1981
dev_dbg(rdev_to_dev(rdev), "%s: nq_id = 0x%x pacing_threshold = 0x%x",
1982
__func__, req.primary_nq_id, req.pacing_threshold);
1983
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1984
sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
1985
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1986
if (rc) {
1987
dev_dbg(rdev_to_dev(rdev),
1988
"Failed to set dbr pacing config, rc = %#x", rc);
1989
return rc;
1990
}
1991
return 0;
1992
}
1993
1994
/* Net -> RoCE driver */
1995
1996
/* Device */
1997
struct bnxt_re_dev *bnxt_re_from_netdev(struct ifnet *netdev)
1998
{
1999
struct bnxt_re_dev *rdev;
2000
2001
rcu_read_lock();
2002
list_for_each_entry_rcu(rdev, &bnxt_re_dev_list, list) {
2003
if (rdev->netdev == netdev) {
2004
rcu_read_unlock();
2005
dev_dbg(rdev_to_dev(rdev),
2006
"netdev (%p) found, ref_count = 0x%x",
2007
netdev, atomic_read(&rdev->ref_count));
2008
return rdev;
2009
}
2010
}
2011
rcu_read_unlock();
2012
return NULL;
2013
}
2014
2015
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
2016
char *buf)
2017
{
2018
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
2019
2020
return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
2021
}
2022
2023
2024
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
2025
char *buf)
2026
{
2027
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
2028
2029
return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
2030
}
2031
2032
static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
2033
static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
2034
static struct device_attribute *bnxt_re_attributes[] = {
2035
&dev_attr_hw_rev,
2036
&dev_attr_hca_type
2037
};
2038
2039
int ib_register_device_compat(struct bnxt_re_dev *rdev)
2040
{
2041
struct ib_device *ibdev = &rdev->ibdev;
2042
char name[IB_DEVICE_NAME_MAX];
2043
2044
memset(name, 0, IB_DEVICE_NAME_MAX);
2045
strlcpy(name, "bnxt_re%d", IB_DEVICE_NAME_MAX);
2046
2047
strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
2048
2049
return ib_register_device(ibdev, NULL);
2050
}
2051
2052
static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
2053
{
2054
struct ib_device *ibdev = &rdev->ibdev;
2055
int ret = 0;
2056
2057
/* ib device init */
2058
ibdev->owner = THIS_MODULE;
2059
ibdev->uverbs_abi_ver = BNXT_RE_ABI_VERSION;
2060
ibdev->node_type = RDMA_NODE_IB_CA;
2061
strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
2062
strlen(BNXT_RE_DESC) + 5);
2063
ibdev->phys_port_cnt = 1;
2064
2065
bnxt_qplib_get_guid(rdev->dev_addr, (u8 *)&ibdev->node_guid);
2066
2067
/* Data path irqs is one less than the max msix vectors */
2068
ibdev->num_comp_vectors = rdev->nqr.num_msix - 1;
2069
bnxt_re_set_dma_device(ibdev, rdev);
2070
ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
2071
2072
/* User space */
2073
ibdev->uverbs_cmd_mask =
2074
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
2075
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
2076
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
2077
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
2078
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
2079
(1ull << IB_USER_VERBS_CMD_REG_MR) |
2080
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
2081
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2082
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
2083
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
2084
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
2085
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
2086
(1ull << IB_USER_VERBS_CMD_QUERY_QP) |
2087
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
2088
(1ull << IB_USER_VERBS_CMD_REREG_MR) |
2089
(1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
2090
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
2091
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
2092
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
2093
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
2094
(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
2095
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW) |
2096
(1ull << IB_USER_VERBS_CMD_CREATE_AH) |
2097
(1ull << IB_USER_VERBS_CMD_MODIFY_AH) |
2098
(1ull << IB_USER_VERBS_CMD_QUERY_AH) |
2099
(1ull << IB_USER_VERBS_CMD_DESTROY_AH);
2100
2101
ibdev->uverbs_ex_cmd_mask = (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP);
2102
ibdev->uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_POLL_CQ);
2103
2104
#define bnxt_re_ib_ah bnxt_re_ah
2105
#define bnxt_re_ib_cq bnxt_re_cq
2106
#define bnxt_re_ib_pd bnxt_re_pd
2107
#define bnxt_re_ib_srq bnxt_re_srq
2108
#define bnxt_re_ib_ucontext bnxt_re_ucontext
2109
INIT_IB_DEVICE_OPS(&ibdev->ops, bnxt_re, BNXT_RE);
2110
2111
ibdev->query_device = bnxt_re_query_device;
2112
ibdev->modify_device = bnxt_re_modify_device;
2113
ibdev->query_port = bnxt_re_query_port;
2114
ibdev->modify_port = bnxt_re_modify_port;
2115
ibdev->get_port_immutable = bnxt_re_get_port_immutable;
2116
ibdev->query_pkey = bnxt_re_query_pkey;
2117
ibdev->query_gid = bnxt_re_query_gid;
2118
ibdev->get_netdev = bnxt_re_get_netdev;
2119
ibdev->add_gid = bnxt_re_add_gid;
2120
ibdev->del_gid = bnxt_re_del_gid;
2121
ibdev->get_link_layer = bnxt_re_get_link_layer;
2122
ibdev->alloc_pd = bnxt_re_alloc_pd;
2123
ibdev->dealloc_pd = bnxt_re_dealloc_pd;
2124
ibdev->create_ah = bnxt_re_create_ah;
2125
ibdev->modify_ah = bnxt_re_modify_ah;
2126
ibdev->query_ah = bnxt_re_query_ah;
2127
ibdev->destroy_ah = bnxt_re_destroy_ah;
2128
ibdev->create_srq = bnxt_re_create_srq;
2129
ibdev->modify_srq = bnxt_re_modify_srq;
2130
ibdev->query_srq = bnxt_re_query_srq;
2131
ibdev->destroy_srq = bnxt_re_destroy_srq;
2132
ibdev->post_srq_recv = bnxt_re_post_srq_recv;
2133
ibdev->create_qp = bnxt_re_create_qp;
2134
ibdev->modify_qp = bnxt_re_modify_qp;
2135
ibdev->query_qp = bnxt_re_query_qp;
2136
ibdev->destroy_qp = bnxt_re_destroy_qp;
2137
ibdev->post_send = bnxt_re_post_send;
2138
ibdev->post_recv = bnxt_re_post_recv;
2139
ibdev->create_cq = bnxt_re_create_cq;
2140
ibdev->modify_cq = bnxt_re_modify_cq;
2141
ibdev->destroy_cq = bnxt_re_destroy_cq;
2142
ibdev->resize_cq = bnxt_re_resize_cq;
2143
ibdev->poll_cq = bnxt_re_poll_cq;
2144
ibdev->req_notify_cq = bnxt_re_req_notify_cq;
2145
ibdev->get_dma_mr = bnxt_re_get_dma_mr;
2146
ibdev->get_hw_stats = bnxt_re_get_hw_stats;
2147
ibdev->alloc_hw_stats = bnxt_re_alloc_hw_port_stats;
2148
ibdev->dereg_mr = bnxt_re_dereg_mr;
2149
ibdev->alloc_mr = bnxt_re_alloc_mr;
2150
ibdev->map_mr_sg = bnxt_re_map_mr_sg;
2151
ibdev->alloc_mw = bnxt_re_alloc_mw;
2152
ibdev->dealloc_mw = bnxt_re_dealloc_mw;
2153
ibdev->reg_user_mr = bnxt_re_reg_user_mr;
2154
ibdev->rereg_user_mr = bnxt_re_rereg_user_mr;
2155
ibdev->disassociate_ucontext = bnxt_re_disassociate_ucntx;
2156
ibdev->alloc_ucontext = bnxt_re_alloc_ucontext;
2157
ibdev->dealloc_ucontext = bnxt_re_dealloc_ucontext;
2158
ibdev->mmap = bnxt_re_mmap;
2159
ibdev->process_mad = bnxt_re_process_mad;
2160
2161
ret = ib_register_device_compat(rdev);
2162
return ret;
2163
}
2164
2165
static void bnxt_re_dev_dealloc(struct bnxt_re_dev *rdev)
2166
{
2167
int i = BNXT_RE_REF_WAIT_COUNT;
2168
2169
dev_dbg(rdev_to_dev(rdev), "%s:Remove the device %p\n", __func__, rdev);
2170
/* Wait for rdev refcount to come down */
2171
while ((atomic_read(&rdev->ref_count) > 1) && i--)
2172
msleep(100);
2173
2174
if (atomic_read(&rdev->ref_count) > 1)
2175
dev_err(rdev_to_dev(rdev),
2176
"Failed waiting for ref count to deplete %d",
2177
atomic_read(&rdev->ref_count));
2178
2179
atomic_set(&rdev->ref_count, 0);
2180
if_rele(rdev->netdev);
2181
rdev->netdev = NULL;
2182
synchronize_rcu();
2183
2184
kfree(rdev->gid_map);
2185
kfree(rdev->dbg_stats);
2186
ib_dealloc_device(&rdev->ibdev);
2187
}
2188
2189
static struct bnxt_re_dev *bnxt_re_dev_alloc(struct ifnet *netdev,
2190
struct bnxt_en_dev *en_dev)
2191
{
2192
struct bnxt_re_dev *rdev;
2193
u32 count;
2194
2195
/* Allocate bnxt_re_dev instance here */
2196
rdev = (struct bnxt_re_dev *)compat_ib_alloc_device(sizeof(*rdev));
2197
if (!rdev) {
2198
pr_err("%s: bnxt_re_dev allocation failure!",
2199
ROCE_DRV_MODULE_NAME);
2200
return NULL;
2201
}
2202
/* Default values */
2203
atomic_set(&rdev->ref_count, 0);
2204
rdev->netdev = netdev;
2205
dev_hold(rdev->netdev);
2206
rdev->en_dev = en_dev;
2207
rdev->id = rdev->en_dev->pdev->devfn;
2208
INIT_LIST_HEAD(&rdev->qp_list);
2209
mutex_init(&rdev->qp_lock);
2210
mutex_init(&rdev->cc_lock);
2211
mutex_init(&rdev->dbq_lock);
2212
bnxt_re_clear_rsors_stat(&rdev->stats.rsors);
2213
rdev->cosq[0] = rdev->cosq[1] = 0xFFFF;
2214
rdev->min_tx_depth = 1;
2215
rdev->stats.stats_query_sec = 1;
2216
/* Disable priority vlan as the default mode is DSCP based PFC */
2217
rdev->cc_param.disable_prio_vlan_tx = 1;
2218
2219
/* Initialize worker for DBR Pacing */
2220
INIT_WORK(&rdev->dbq_fifo_check_work, bnxt_re_db_fifo_check);
2221
INIT_DELAYED_WORK(&rdev->dbq_pacing_work, bnxt_re_pacing_timer_exp);
2222
rdev->gid_map = kzalloc(sizeof(*(rdev->gid_map)) *
2223
BNXT_RE_MAX_SGID_ENTRIES,
2224
GFP_KERNEL);
2225
if (!rdev->gid_map) {
2226
ib_dealloc_device(&rdev->ibdev);
2227
return NULL;
2228
}
2229
for(count = 0; count < BNXT_RE_MAX_SGID_ENTRIES; count++)
2230
rdev->gid_map[count] = -1;
2231
2232
rdev->dbg_stats = kzalloc(sizeof(*rdev->dbg_stats), GFP_KERNEL);
2233
if (!rdev->dbg_stats) {
2234
ib_dealloc_device(&rdev->ibdev);
2235
return NULL;
2236
}
2237
2238
return rdev;
2239
}
2240
2241
static int bnxt_re_handle_unaffi_async_event(
2242
struct creq_func_event *unaffi_async)
2243
{
2244
switch (unaffi_async->event) {
2245
case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
2246
case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
2247
case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
2248
case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
2249
case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
2250
case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
2251
case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
2252
case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
2253
case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
2254
case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
2255
case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
2256
break;
2257
default:
2258
return -EINVAL;
2259
}
2260
return 0;
2261
}
2262
2263
static int bnxt_re_handle_qp_async_event(void *qp_event, struct bnxt_re_qp *qp)
2264
{
2265
struct creq_qp_error_notification *err_event;
2266
struct ib_event event;
2267
unsigned int flags;
2268
2269
if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR &&
2270
!qp->qplib_qp.is_user) {
2271
flags = bnxt_re_lock_cqs(qp);
2272
bnxt_qplib_add_flush_qp(&qp->qplib_qp);
2273
bnxt_re_unlock_cqs(qp, flags);
2274
}
2275
memset(&event, 0, sizeof(event));
2276
event.device = &qp->rdev->ibdev;
2277
event.element.qp = &qp->ib_qp;
2278
event.event = IB_EVENT_QP_FATAL;
2279
2280
err_event = qp_event;
2281
switch(err_event->res_err_state_reason) {
2282
case CFCQ_RES_ERR_STATE_REASON_RES_EXCEED_MAX:
2283
case CFCQ_RES_ERR_STATE_REASON_RES_PAYLOAD_LENGTH_MISMATCH:
2284
case CFCQ_RES_ERR_STATE_REASON_RES_OPCODE_ERROR:
2285
case CFCQ_RES_ERR_STATE_REASON_RES_PSN_SEQ_ERROR_RETRY_LIMIT:
2286
case CFCQ_RES_ERR_STATE_REASON_RES_RX_INVALID_R_KEY:
2287
case CFCQ_RES_ERR_STATE_REASON_RES_RX_DOMAIN_ERROR:
2288
case CFCQ_RES_ERR_STATE_REASON_RES_RX_NO_PERMISSION:
2289
case CFCQ_RES_ERR_STATE_REASON_RES_RX_RANGE_ERROR:
2290
case CFCQ_RES_ERR_STATE_REASON_RES_TX_INVALID_R_KEY:
2291
case CFCQ_RES_ERR_STATE_REASON_RES_TX_DOMAIN_ERROR:
2292
case CFCQ_RES_ERR_STATE_REASON_RES_TX_NO_PERMISSION:
2293
case CFCQ_RES_ERR_STATE_REASON_RES_TX_RANGE_ERROR:
2294
case CFCQ_RES_ERR_STATE_REASON_RES_IVALID_DUP_RKEY:
2295
case CFCQ_RES_ERR_STATE_REASON_RES_UNALIGN_ATOMIC:
2296
event.event = IB_EVENT_QP_ACCESS_ERR;
2297
break;
2298
case CFCQ_RES_ERR_STATE_REASON_RES_EXCEEDS_WQE:
2299
case CFCQ_RES_ERR_STATE_REASON_RES_WQE_FORMAT_ERROR:
2300
case CFCQ_RES_ERR_STATE_REASON_RES_SRQ_LOAD_ERROR:
2301
case CFCQ_RES_ERR_STATE_REASON_RES_UNSUPPORTED_OPCODE:
2302
case CFCQ_RES_ERR_STATE_REASON_RES_REM_INVALIDATE:
2303
event.event = IB_EVENT_QP_REQ_ERR;
2304
break;
2305
case CFCQ_RES_ERR_STATE_REASON_RES_IRRQ_OFLOW:
2306
case CFCQ_RES_ERR_STATE_REASON_RES_CMP_ERROR:
2307
case CFCQ_RES_ERR_STATE_REASON_RES_CQ_LOAD_ERROR:
2308
case CFCQ_RES_ERR_STATE_REASON_RES_TX_PCI_ERROR:
2309
case CFCQ_RES_ERR_STATE_REASON_RES_RX_PCI_ERROR:
2310
case CFCQ_RES_ERR_STATE_REASON_RES_MEMORY_ERROR:
2311
case CFCQ_RES_ERR_STATE_REASON_RES_SRQ_ERROR:
2312
event.event = IB_EVENT_QP_FATAL;
2313
break;
2314
default:
2315
if (qp->qplib_qp.srq)
2316
event.event = IB_EVENT_QP_LAST_WQE_REACHED;
2317
break;
2318
}
2319
2320
if (err_event->res_err_state_reason)
2321
dev_err(rdev_to_dev(qp->rdev),
2322
"%s %s qp_id: %d cons (%d %d) req (%d %d) res (%d %d)\n",
2323
__func__, qp->qplib_qp.is_user ? "user" : "kernel",
2324
qp->qplib_qp.id,
2325
err_event->sq_cons_idx,
2326
err_event->rq_cons_idx,
2327
err_event->req_slow_path_state,
2328
err_event->req_err_state_reason,
2329
err_event->res_slow_path_state,
2330
err_event->res_err_state_reason);
2331
2332
if (event.device && qp->ib_qp.event_handler)
2333
qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
2334
2335
return 0;
2336
}
2337
2338
static int bnxt_re_handle_cq_async_error(void *event, struct bnxt_re_cq *cq)
2339
{
2340
struct creq_cq_error_notification *cqerr;
2341
bool send = false;
2342
2343
cqerr = event;
2344
switch (cqerr->cq_err_reason) {
2345
case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_INVALID_ERROR:
2346
case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_OVERFLOW_ERROR:
2347
case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_LOAD_ERROR:
2348
case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_INVALID_ERROR:
2349
case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_OVERFLOW_ERROR:
2350
case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_LOAD_ERROR:
2351
send = true;
2352
default:
2353
break;
2354
}
2355
2356
if (send && cq->ibcq.event_handler) {
2357
struct ib_event ibevent = {};
2358
2359
ibevent.event = IB_EVENT_CQ_ERR;
2360
ibevent.element.cq = &cq->ibcq;
2361
ibevent.device = &cq->rdev->ibdev;
2362
2363
dev_err(rdev_to_dev(cq->rdev),
2364
"%s err reason %d\n", __func__, cqerr->cq_err_reason);
2365
cq->ibcq.event_handler(&ibevent, cq->ibcq.cq_context);
2366
}
2367
2368
cq->qplib_cq.is_cq_err_event = true;
2369
2370
return 0;
2371
}
2372
2373
static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
2374
void *obj)
2375
{
2376
struct bnxt_qplib_qp *qplqp;
2377
struct bnxt_qplib_cq *qplcq;
2378
struct bnxt_re_qp *qp;
2379
struct bnxt_re_cq *cq;
2380
int rc = 0;
2381
u8 event;
2382
2383
if (!obj)
2384
return rc; /* QP was already dead, still return success */
2385
2386
event = affi_async->event;
2387
switch (event) {
2388
case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
2389
qplqp = obj;
2390
qp = container_of(qplqp, struct bnxt_re_qp, qplib_qp);
2391
rc = bnxt_re_handle_qp_async_event(affi_async, qp);
2392
break;
2393
case CREQ_QP_EVENT_EVENT_CQ_ERROR_NOTIFICATION:
2394
qplcq = obj;
2395
cq = container_of(qplcq, struct bnxt_re_cq, qplib_cq);
2396
rc = bnxt_re_handle_cq_async_error(affi_async, cq);
2397
break;
2398
default:
2399
rc = -EINVAL;
2400
}
2401
2402
return rc;
2403
}
2404
2405
static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
2406
void *aeqe, void *obj)
2407
{
2408
struct creq_func_event *unaffi_async;
2409
struct creq_qp_event *affi_async;
2410
u8 type;
2411
int rc;
2412
2413
type = ((struct creq_base *)aeqe)->type;
2414
if (type == CREQ_BASE_TYPE_FUNC_EVENT) {
2415
unaffi_async = aeqe;
2416
rc = bnxt_re_handle_unaffi_async_event(unaffi_async);
2417
} else {
2418
affi_async = aeqe;
2419
rc = bnxt_re_handle_affi_async_event(affi_async, obj);
2420
}
2421
2422
return rc;
2423
}
2424
2425
static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
2426
struct bnxt_qplib_srq *handle, u8 event)
2427
{
2428
struct bnxt_re_srq *srq = to_bnxt_re(handle, struct bnxt_re_srq,
2429
qplib_srq);
2430
struct ib_event ib_event;
2431
2432
if (srq == NULL) {
2433
pr_err("%s: SRQ is NULL, SRQN not handled",
2434
ROCE_DRV_MODULE_NAME);
2435
return -EINVAL;
2436
}
2437
ib_event.device = &srq->rdev->ibdev;
2438
ib_event.element.srq = &srq->ibsrq;
2439
if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT)
2440
ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED;
2441
else
2442
ib_event.event = IB_EVENT_SRQ_ERR;
2443
2444
if (srq->ibsrq.event_handler) {
2445
/* Lock event_handler? */
2446
(*srq->ibsrq.event_handler)(&ib_event,
2447
srq->ibsrq.srq_context);
2448
}
2449
return 0;
2450
}
2451
2452
static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
2453
struct bnxt_qplib_cq *handle)
2454
{
2455
struct bnxt_re_cq *cq = to_bnxt_re(handle, struct bnxt_re_cq,
2456
qplib_cq);
2457
u32 *cq_ptr;
2458
2459
if (cq == NULL) {
2460
pr_err("%s: CQ is NULL, CQN not handled",
2461
ROCE_DRV_MODULE_NAME);
2462
return -EINVAL;
2463
}
2464
/* CQ already in destroy path. Do not handle any more events */
2465
if (handle->destroyed || !atomic_read(&cq->ibcq.usecnt)) {
2466
if (!handle->destroyed)
2467
dev_dbg(NULL, "%s: CQ being destroyed, CQN not handled",
2468
ROCE_DRV_MODULE_NAME);
2469
return 0;
2470
}
2471
2472
if (cq->ibcq.comp_handler) {
2473
if (cq->uctx_cq_page) {
2474
cq_ptr = (u32 *)cq->uctx_cq_page;
2475
*cq_ptr = cq->qplib_cq.toggle;
2476
}
2477
/* Lock comp_handler? */
2478
(*cq->ibcq.comp_handler)(&cq->ibcq, cq->ibcq.cq_context);
2479
}
2480
2481
return 0;
2482
}
2483
2484
struct bnxt_qplib_nq *bnxt_re_get_nq(struct bnxt_re_dev *rdev)
2485
{
2486
int min, indx;
2487
2488
mutex_lock(&rdev->nqr.load_lock);
2489
for (indx = 0, min = 0; indx < (rdev->nqr.num_msix - 1); indx++) {
2490
if (rdev->nqr.nq[min].load > rdev->nqr.nq[indx].load)
2491
min = indx;
2492
}
2493
rdev->nqr.nq[min].load++;
2494
mutex_unlock(&rdev->nqr.load_lock);
2495
2496
return &rdev->nqr.nq[min];
2497
}
2498
2499
void bnxt_re_put_nq(struct bnxt_re_dev *rdev, struct bnxt_qplib_nq *nq)
2500
{
2501
mutex_lock(&rdev->nqr.load_lock);
2502
nq->load--;
2503
mutex_unlock(&rdev->nqr.load_lock);
2504
}
2505
2506
static bool bnxt_re_check_min_attr(struct bnxt_re_dev *rdev)
2507
{
2508
struct bnxt_qplib_dev_attr *attr;
2509
bool rc = true;
2510
2511
attr = rdev->dev_attr;
2512
2513
if (!attr->max_cq || !attr->max_qp ||
2514
!attr->max_sgid || !attr->max_mr) {
2515
dev_err(rdev_to_dev(rdev),"Insufficient RoCE resources");
2516
dev_dbg(rdev_to_dev(rdev),
2517
"max_cq = %d, max_qp = %d, max_dpi = %d, max_sgid = %d, max_mr = %d",
2518
attr->max_cq, attr->max_qp, attr->max_dpi,
2519
attr->max_sgid, attr->max_mr);
2520
rc = false;
2521
}
2522
return rc;
2523
}
2524
2525
static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
2526
u8 port_num, enum ib_event_type event)
2527
{
2528
struct ib_event ib_event;
2529
2530
ib_event.device = ibdev;
2531
if (qp) {
2532
ib_event.element.qp = qp;
2533
ib_event.event = event;
2534
if (qp->event_handler)
2535
qp->event_handler(&ib_event, qp->qp_context);
2536
} else {
2537
ib_event.element.port_num = port_num;
2538
ib_event.event = event;
2539
ib_dispatch_event(&ib_event);
2540
}
2541
2542
dev_dbg(rdev_to_dev(to_bnxt_re_dev(ibdev, ibdev)),
2543
"ibdev %p Event 0x%x port_num 0x%x", ibdev, event, port_num);
2544
}
2545
2546
static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev,
2547
struct bnxt_re_qp *qp)
2548
{
2549
if (rdev->gsi_ctx.gsi_qp_mode == BNXT_RE_GSI_MODE_ALL)
2550
return (qp->ib_qp.qp_type == IB_QPT_GSI) ||
2551
(qp == rdev->gsi_ctx.gsi_sqp);
2552
else
2553
return (qp->ib_qp.qp_type == IB_QPT_GSI);
2554
}
2555
2556
static void bnxt_re_stop_all_nonqp1_nonshadow_qps(struct bnxt_re_dev *rdev)
2557
{
2558
struct bnxt_qplib_qp *qpl_qp;
2559
bool dev_detached = false;
2560
struct ib_qp_attr qp_attr;
2561
int num_qps_stopped = 0;
2562
int mask = IB_QP_STATE;
2563
struct bnxt_re_qp *qp;
2564
unsigned long flags;
2565
2566
if (!rdev)
2567
return;
2568
2569
restart:
2570
if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
2571
dev_detached = true;
2572
2573
qp_attr.qp_state = IB_QPS_ERR;
2574
mutex_lock(&rdev->qp_lock);
2575
list_for_each_entry(qp, &rdev->qp_list, list) {
2576
qpl_qp = &qp->qplib_qp;
2577
if (dev_detached || !bnxt_re_is_qp1_or_shadow_qp(rdev, qp)) {
2578
if (qpl_qp->state !=
2579
CMDQ_MODIFY_QP_NEW_STATE_RESET &&
2580
qpl_qp->state !=
2581
CMDQ_MODIFY_QP_NEW_STATE_ERR) {
2582
if (dev_detached) {
2583
/*
2584
* Cant actually send the command down,
2585
* marking the state for bookkeeping
2586
*/
2587
qpl_qp->state =
2588
CMDQ_MODIFY_QP_NEW_STATE_ERR;
2589
qpl_qp->cur_qp_state = qpl_qp->state;
2590
if (!qpl_qp->is_user) {
2591
/* Add to flush list */
2592
flags = bnxt_re_lock_cqs(qp);
2593
bnxt_qplib_add_flush_qp(qpl_qp);
2594
bnxt_re_unlock_cqs(qp, flags);
2595
}
2596
} else {
2597
num_qps_stopped++;
2598
bnxt_re_modify_qp(&qp->ib_qp,
2599
&qp_attr, mask,
2600
NULL);
2601
}
2602
2603
bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp,
2604
1, IB_EVENT_QP_FATAL);
2605
/*
2606
* 1. Release qp_lock after a budget to unblock other verb
2607
* requests (like qp_destroy) from stack.
2608
* 2. Traverse through the qp_list freshly as addition / deletion
2609
* might have happened since qp_lock is getting released here.
2610
*/
2611
if (num_qps_stopped % BNXT_RE_STOP_QPS_BUDGET == 0) {
2612
mutex_unlock(&rdev->qp_lock);
2613
goto restart;
2614
}
2615
}
2616
}
2617
}
2618
2619
mutex_unlock(&rdev->qp_lock);
2620
}
2621
2622
static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
2623
{
2624
struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
2625
struct bnxt_qplib_gid gid;
2626
u16 gid_idx, index;
2627
int rc = 0;
2628
2629
if (!test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
2630
return 0;
2631
2632
if (sgid_tbl == NULL) {
2633
dev_err(rdev_to_dev(rdev), "QPLIB: SGID table not allocated");
2634
return -EINVAL;
2635
}
2636
2637
for (index = 0; index < sgid_tbl->active; index++) {
2638
gid_idx = sgid_tbl->hw_id[index];
2639
2640
if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
2641
sizeof(bnxt_qplib_gid_zero)))
2642
continue;
2643
/* Need to modify the VLAN enable setting of non VLAN GID only
2644
* as setting is done for VLAN GID while adding GID
2645
*
2646
* If disable_prio_vlan_tx is enable, then we'll need to remove the
2647
* vlan entry from the sgid_tbl.
2648
*/
2649
if (sgid_tbl->vlan[index] == true)
2650
continue;
2651
2652
memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid));
2653
2654
rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx,
2655
rdev->dev_addr);
2656
}
2657
2658
return rc;
2659
}
2660
2661
static void bnxt_re_clear_cc(struct bnxt_re_dev *rdev)
2662
{
2663
struct bnxt_qplib_cc_param *cc_param = &rdev->cc_param;
2664
2665
if (_is_chip_p7(rdev->chip_ctx)) {
2666
cc_param->mask = CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP;
2667
} else {
2668
cc_param->mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE |
2669
CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
2670
CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
2671
2672
if (!is_qport_service_type_supported(rdev))
2673
cc_param->mask |=
2674
(CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP |
2675
CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP |
2676
CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP);
2677
}
2678
2679
cc_param->cur_mask = cc_param->mask;
2680
2681
if (bnxt_qplib_modify_cc(&rdev->qplib_res, cc_param))
2682
dev_err(rdev_to_dev(rdev), "Failed to modify cc\n");
2683
}
2684
2685
static int bnxt_re_setup_cc(struct bnxt_re_dev *rdev)
2686
{
2687
struct bnxt_qplib_cc_param *cc_param = &rdev->cc_param;
2688
int rc;
2689
2690
if (_is_chip_p7(rdev->chip_ctx)) {
2691
cc_param->enable = 0x0;
2692
cc_param->mask = CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP;
2693
} else {
2694
cc_param->enable = 0x1;
2695
cc_param->mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE |
2696
CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
2697
CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
2698
2699
if (!is_qport_service_type_supported(rdev))
2700
cc_param->mask |=
2701
(CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP |
2702
CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP |
2703
CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP);
2704
}
2705
2706
cc_param->cur_mask = cc_param->mask;
2707
2708
rc = bnxt_qplib_modify_cc(&rdev->qplib_res, cc_param);
2709
if (rc) {
2710
dev_err(rdev_to_dev(rdev), "Failed to modify cc\n");
2711
return rc;
2712
}
2713
/* Reset the programming mask */
2714
cc_param->mask = 0;
2715
if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) {
2716
cc_param->qp1_tos_dscp = cc_param->tos_dscp;
2717
rc = bnxt_re_update_qp1_tos_dscp(rdev);
2718
if (rc) {
2719
dev_err(rdev_to_dev(rdev), "%s:Failed to modify QP1:%d",
2720
__func__, rc);
2721
goto clear;
2722
}
2723
}
2724
return 0;
2725
2726
clear:
2727
bnxt_re_clear_cc(rdev);
2728
return rc;
2729
}
2730
2731
int bnxt_re_query_hwrm_dscp2pri(struct bnxt_re_dev *rdev,
2732
struct bnxt_re_dscp2pri *d2p, u16 *count,
2733
u16 target_id)
2734
{
2735
struct bnxt_en_dev *en_dev = rdev->en_dev;
2736
struct hwrm_queue_dscp2pri_qcfg_input req;
2737
struct hwrm_queue_dscp2pri_qcfg_output resp;
2738
struct bnxt_re_dscp2pri *dscp2pri;
2739
struct bnxt_fw_msg fw_msg;
2740
u16 in_count = *count;
2741
dma_addr_t dma_handle;
2742
int rc = 0, i;
2743
u16 data_len;
2744
u8 *kmem;
2745
2746
data_len = *count * sizeof(*dscp2pri);
2747
memset(&fw_msg, 0, sizeof(fw_msg));
2748
memset(&req, 0, sizeof(req));
2749
bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
2750
HWRM_QUEUE_DSCP2PRI_QCFG, -1, target_id);
2751
req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1;
2752
2753
kmem = dma_zalloc_coherent(&en_dev->pdev->dev, data_len, &dma_handle,
2754
GFP_KERNEL);
2755
if (!kmem) {
2756
dev_err(rdev_to_dev(rdev),
2757
"dma_zalloc_coherent failure, length = %u\n",
2758
(unsigned)data_len);
2759
return -ENOMEM;
2760
}
2761
req.dest_data_addr = cpu_to_le64(dma_handle);
2762
req.dest_data_buffer_size = cpu_to_le16(data_len);
2763
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
2764
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
2765
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
2766
if (rc)
2767
goto out;
2768
2769
/* Upload the DSCP-MASK-PRI tuple(s) */
2770
dscp2pri = (struct bnxt_re_dscp2pri *)kmem;
2771
for (i = 0; i < le16_to_cpu(resp.entry_cnt) && i < in_count; i++) {
2772
d2p[i].dscp = dscp2pri->dscp;
2773
d2p[i].mask = dscp2pri->mask;
2774
d2p[i].pri = dscp2pri->pri;
2775
dscp2pri++;
2776
}
2777
*count = le16_to_cpu(resp.entry_cnt);
2778
out:
2779
dma_free_coherent(&en_dev->pdev->dev, data_len, kmem, dma_handle);
2780
return rc;
2781
}
2782
2783
int bnxt_re_prio_vlan_tx_update(struct bnxt_re_dev *rdev)
2784
{
2785
/* Remove the VLAN from the GID entry */
2786
if (rdev->cc_param.disable_prio_vlan_tx)
2787
rdev->qplib_res.prio = false;
2788
else
2789
rdev->qplib_res.prio = true;
2790
2791
return bnxt_re_update_gid(rdev);
2792
}
2793
2794
int bnxt_re_set_hwrm_dscp2pri(struct bnxt_re_dev *rdev,
2795
struct bnxt_re_dscp2pri *d2p, u16 count,
2796
u16 target_id)
2797
{
2798
struct bnxt_en_dev *en_dev = rdev->en_dev;
2799
struct hwrm_queue_dscp2pri_cfg_input req;
2800
struct hwrm_queue_dscp2pri_cfg_output resp;
2801
struct bnxt_fw_msg fw_msg;
2802
struct bnxt_re_dscp2pri *dscp2pri;
2803
int i, rc, data_len = 3 * 256;
2804
dma_addr_t dma_handle;
2805
u8 *kmem;
2806
2807
memset(&req, 0, sizeof(req));
2808
memset(&fw_msg, 0, sizeof(fw_msg));
2809
bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
2810
HWRM_QUEUE_DSCP2PRI_CFG, -1, target_id);
2811
req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1;
2812
2813
kmem = dma_alloc_coherent(&en_dev->pdev->dev, data_len, &dma_handle,
2814
GFP_KERNEL);
2815
if (!kmem) {
2816
dev_err(rdev_to_dev(rdev),
2817
"dma_alloc_coherent failure, length = %u\n",
2818
(unsigned)data_len);
2819
return -ENOMEM;
2820
}
2821
req.src_data_addr = cpu_to_le64(dma_handle);
2822
2823
/* Download the DSCP-MASK-PRI tuple(s) */
2824
dscp2pri = (struct bnxt_re_dscp2pri *)kmem;
2825
for (i = 0; i < count; i++) {
2826
dscp2pri->dscp = d2p[i].dscp;
2827
dscp2pri->mask = d2p[i].mask;
2828
dscp2pri->pri = d2p[i].pri;
2829
dscp2pri++;
2830
}
2831
2832
req.entry_cnt = cpu_to_le16(count);
2833
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
2834
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
2835
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
2836
dma_free_coherent(&en_dev->pdev->dev, data_len, kmem, dma_handle);
2837
return rc;
2838
}
2839
2840
int bnxt_re_query_hwrm_qportcfg(struct bnxt_re_dev *rdev,
2841
struct bnxt_re_tc_rec *tc_rec, u16 tid)
2842
{
2843
u8 max_tc, tc, *qptr, *type_ptr0, *type_ptr1;
2844
struct hwrm_queue_qportcfg_output resp = {0};
2845
struct hwrm_queue_qportcfg_input req = {0};
2846
struct bnxt_en_dev *en_dev = rdev->en_dev;
2847
struct bnxt_fw_msg fw_msg;
2848
bool def_init = false;
2849
u8 *tmp_type;
2850
u8 cos_id;
2851
int rc;
2852
2853
memset(&fw_msg, 0, sizeof(fw_msg));
2854
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_QUEUE_QPORTCFG,
2855
-1, tid);
2856
req.port_id = (tid == 0xFFFF) ? en_dev->pf_port_id : 1;
2857
if (BNXT_EN_ASYM_Q(en_dev))
2858
req.flags = htole32(HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_RX);
2859
2860
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
2861
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
2862
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
2863
if (rc)
2864
return rc;
2865
2866
if (!resp.max_configurable_queues)
2867
return -EINVAL;
2868
2869
max_tc = resp.max_configurable_queues;
2870
tc_rec->max_tc = max_tc;
2871
2872
if (resp.queue_cfg_info & HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_CFG_INFO_USE_PROFILE_TYPE)
2873
tc_rec->serv_type_enabled = true;
2874
2875
qptr = &resp.queue_id0;
2876
type_ptr0 = &resp.queue_id0_service_profile_type;
2877
type_ptr1 = &resp.queue_id1_service_profile_type;
2878
for (tc = 0; tc < max_tc; tc++) {
2879
tmp_type = tc ? type_ptr1 + (tc - 1) : type_ptr0;
2880
2881
cos_id = *qptr++;
2882
/* RoCE CoS queue is the first cos queue.
2883
* For MP12 and MP17 order is 405 and 141015.
2884
*/
2885
if (is_bnxt_roce_queue(rdev, *qptr, *tmp_type)) {
2886
tc_rec->cos_id_roce = cos_id;
2887
tc_rec->tc_roce = tc;
2888
} else if (is_bnxt_cnp_queue(rdev, *qptr, *tmp_type)) {
2889
tc_rec->cos_id_cnp = cos_id;
2890
tc_rec->tc_cnp = tc;
2891
} else if (!def_init) {
2892
def_init = true;
2893
tc_rec->tc_def = tc;
2894
tc_rec->cos_id_def = cos_id;
2895
}
2896
qptr++;
2897
}
2898
2899
return rc;
2900
}
2901
2902
int bnxt_re_hwrm_cos2bw_qcfg(struct bnxt_re_dev *rdev, u16 target_id,
2903
struct bnxt_re_cos2bw_cfg *cfg)
2904
{
2905
struct bnxt_en_dev *en_dev = rdev->en_dev;
2906
struct hwrm_queue_cos2bw_qcfg_output resp;
2907
struct hwrm_queue_cos2bw_qcfg_input req = {0};
2908
struct bnxt_fw_msg fw_msg;
2909
int rc, indx;
2910
void *data;
2911
2912
memset(&fw_msg, 0, sizeof(fw_msg));
2913
bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
2914
HWRM_QUEUE_COS2BW_QCFG, -1, target_id);
2915
req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1;
2916
2917
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
2918
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
2919
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
2920
if (rc)
2921
return rc;
2922
data = &resp.queue_id0 + offsetof(struct bnxt_re_cos2bw_cfg,
2923
queue_id);
2924
for (indx = 0; indx < 8; indx++, data += (sizeof(cfg->cfg))) {
2925
memcpy(&cfg->cfg, data, sizeof(cfg->cfg));
2926
if (indx == 0)
2927
cfg->queue_id = resp.queue_id0;
2928
cfg++;
2929
}
2930
2931
return rc;
2932
}
2933
2934
int bnxt_re_hwrm_cos2bw_cfg(struct bnxt_re_dev *rdev, u16 target_id,
2935
struct bnxt_re_cos2bw_cfg *cfg)
2936
{
2937
struct bnxt_en_dev *en_dev = rdev->en_dev;
2938
struct hwrm_queue_cos2bw_cfg_input req = {0};
2939
struct hwrm_queue_cos2bw_cfg_output resp = {0};
2940
struct bnxt_fw_msg fw_msg;
2941
void *data;
2942
int indx;
2943
int rc;
2944
2945
memset(&fw_msg, 0, sizeof(fw_msg));
2946
bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
2947
HWRM_QUEUE_COS2BW_CFG, -1, target_id);
2948
req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1;
2949
2950
/* Chimp wants enable bit to retain previous
2951
* config done by L2 driver
2952
*/
2953
for (indx = 0; indx < 8; indx++) {
2954
if (cfg[indx].queue_id < 40) {
2955
req.enables |= cpu_to_le32(
2956
HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID0_VALID <<
2957
indx);
2958
}
2959
2960
data = (char *)&req.unused_0 + indx * (sizeof(*cfg) - 4);
2961
memcpy(data, &cfg[indx].queue_id, sizeof(*cfg) - 4);
2962
if (indx == 0) {
2963
req.queue_id0 = cfg[0].queue_id;
2964
req.unused_0 = 0;
2965
}
2966
}
2967
2968
memset(&resp, 0, sizeof(resp));
2969
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
2970
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
2971
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
2972
return rc;
2973
}
2974
2975
int bnxt_re_host_pf_id_query(struct bnxt_re_dev *rdev,
2976
struct bnxt_qplib_query_fn_info *fn_info,
2977
u32 *pf_mask, u32 *first_pf)
2978
{
2979
struct hwrm_func_host_pf_ids_query_output resp = {0};
2980
struct hwrm_func_host_pf_ids_query_input req;
2981
struct bnxt_en_dev *en_dev = rdev->en_dev;
2982
struct bnxt_fw_msg fw_msg;
2983
int rc;
2984
2985
memset(&fw_msg, 0, sizeof(fw_msg));
2986
memset(&req, 0, sizeof(req));
2987
bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
2988
HWRM_FUNC_HOST_PF_IDS_QUERY, -1, -1);
2989
/* To query the info from the host EPs */
2990
switch (fn_info->host) {
2991
case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_SOC:
2992
case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_0:
2993
case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_1:
2994
case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_2:
2995
case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_3:
2996
req.host = fn_info->host;
2997
break;
2998
default:
2999
req.host = HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_0;
3000
break;
3001
}
3002
3003
req.filter = fn_info->filter;
3004
if (req.filter > HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_FILTER_ROCE)
3005
req.filter = HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_FILTER_ALL;
3006
3007
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
3008
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
3009
rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
3010
3011
3012
*first_pf = le16_to_cpu(resp.first_pf_id);
3013
*pf_mask = le16_to_cpu(resp.pf_ordinal_mask);
3014
3015
return rc;
3016
}
3017
3018
static void bnxt_re_put_stats_ctx(struct bnxt_re_dev *rdev)
3019
{
3020
struct bnxt_qplib_ctx *hctx;
3021
struct bnxt_qplib_res *res;
3022
u16 tid = 0xffff;
3023
3024
res = &rdev->qplib_res;
3025
hctx = res->hctx;
3026
3027
if (test_and_clear_bit(BNXT_RE_FLAG_STATS_CTX_ALLOC, &rdev->flags)) {
3028
bnxt_re_net_stats_ctx_free(rdev, hctx->stats.fw_id, tid);
3029
bnxt_qplib_free_stat_mem(res, &hctx->stats);
3030
}
3031
}
3032
3033
static void bnxt_re_put_stats2_ctx(struct bnxt_re_dev *rdev)
3034
{
3035
test_and_clear_bit(BNXT_RE_FLAG_STATS_CTX2_ALLOC, &rdev->flags);
3036
}
3037
3038
static int bnxt_re_get_stats_ctx(struct bnxt_re_dev *rdev)
3039
{
3040
struct bnxt_qplib_ctx *hctx;
3041
struct bnxt_qplib_res *res;
3042
u16 tid = 0xffff;
3043
int rc;
3044
3045
res = &rdev->qplib_res;
3046
hctx = res->hctx;
3047
3048
rc = bnxt_qplib_alloc_stat_mem(res->pdev, rdev->chip_ctx, &hctx->stats);
3049
if (rc)
3050
return -ENOMEM;
3051
rc = bnxt_re_net_stats_ctx_alloc(rdev, tid);
3052
if (rc)
3053
goto free_stat_mem;
3054
set_bit(BNXT_RE_FLAG_STATS_CTX_ALLOC, &rdev->flags);
3055
3056
return 0;
3057
3058
free_stat_mem:
3059
bnxt_qplib_free_stat_mem(res, &hctx->stats);
3060
3061
return rc;
3062
}
3063
3064
static int bnxt_re_update_dev_attr(struct bnxt_re_dev *rdev)
3065
{
3066
int rc;
3067
3068
rc = bnxt_qplib_get_dev_attr(&rdev->rcfw);
3069
if (rc)
3070
return rc;
3071
if (!bnxt_re_check_min_attr(rdev))
3072
return -EINVAL;
3073
return 0;
3074
}
3075
3076
static void bnxt_re_free_tbls(struct bnxt_re_dev *rdev)
3077
{
3078
bnxt_qplib_clear_tbls(&rdev->qplib_res);
3079
bnxt_qplib_free_tbls(&rdev->qplib_res);
3080
}
3081
3082
static int bnxt_re_alloc_init_tbls(struct bnxt_re_dev *rdev)
3083
{
3084
struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx;
3085
u8 pppp_factor = 0;
3086
int rc;
3087
3088
/*
3089
* TODO: Need a better mechanism for spreading of the
3090
* 512 extended PPP pages. For now, spreading it
3091
* based on port_count
3092
*/
3093
if (_is_chip_p7(chip_ctx) && chip_ctx->modes.db_push)
3094
pppp_factor = rdev->en_dev->port_count;
3095
rc = bnxt_qplib_alloc_tbls(&rdev->qplib_res, pppp_factor);
3096
if (rc)
3097
return rc;
3098
bnxt_qplib_init_tbls(&rdev->qplib_res);
3099
set_bit(BNXT_RE_FLAG_TBLS_ALLOCINIT, &rdev->flags);
3100
3101
return 0;
3102
}
3103
3104
static void bnxt_re_clean_nqs(struct bnxt_re_dev *rdev)
3105
{
3106
struct bnxt_qplib_nq *nq;
3107
int i;
3108
3109
if (!rdev->nqr.max_init)
3110
return;
3111
3112
for (i = (rdev->nqr.max_init - 1) ; i >= 0; i--) {
3113
nq = &rdev->nqr.nq[i];
3114
bnxt_qplib_disable_nq(nq);
3115
bnxt_re_net_ring_free(rdev, nq->ring_id);
3116
bnxt_qplib_free_nq_mem(nq);
3117
}
3118
rdev->nqr.max_init = 0;
3119
}
3120
3121
static int bnxt_re_setup_nqs(struct bnxt_re_dev *rdev)
3122
{
3123
struct bnxt_re_ring_attr rattr = {};
3124
struct bnxt_qplib_nq *nq;
3125
int rc, i;
3126
int depth;
3127
u32 offt;
3128
u16 vec;
3129
3130
mutex_init(&rdev->nqr.load_lock);
3131
/*
3132
* TODO: Optimize the depth based on the
3133
* number of NQs.
3134
*/
3135
depth = BNXT_QPLIB_NQE_MAX_CNT;
3136
for (i = 0; i < rdev->nqr.num_msix - 1; i++) {
3137
nq = &rdev->nqr.nq[i];
3138
vec = rdev->nqr.msix_entries[i + 1].vector;
3139
offt = rdev->nqr.msix_entries[i + 1].db_offset;
3140
nq->hwq.max_elements = depth;
3141
rc = bnxt_qplib_alloc_nq_mem(&rdev->qplib_res, nq);
3142
if (rc) {
3143
dev_err(rdev_to_dev(rdev),
3144
"Failed to get mem for NQ %d, rc = 0x%x",
3145
i, rc);
3146
goto fail_mem;
3147
}
3148
3149
rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr;
3150
rattr.pages = nq->hwq.pbl[rdev->nqr.nq[i].hwq.level].pg_count;
3151
rattr.type = bnxt_re_get_rtype(rdev);
3152
rattr.mode = HWRM_RING_ALLOC_INPUT_INT_MODE_MSIX;
3153
rattr.depth = nq->hwq.max_elements - 1;
3154
rattr.lrid = rdev->nqr.msix_entries[i + 1].ring_idx;
3155
3156
/* Set DBR pacing capability on the first NQ ring only */
3157
if (!i && bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx))
3158
rattr.flags = HWRM_RING_ALLOC_INPUT_FLAGS_NQ_DBR_PACING;
3159
else
3160
rattr.flags = 0;
3161
3162
rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id);
3163
if (rc) {
3164
nq->ring_id = 0xffff; /* Invalid ring-id */
3165
dev_err(rdev_to_dev(rdev),
3166
"Failed to get fw id for NQ %d, rc = 0x%x",
3167
i, rc);
3168
goto fail_ring;
3169
}
3170
3171
rc = bnxt_qplib_enable_nq(nq, i, vec, offt,
3172
&bnxt_re_cqn_handler,
3173
&bnxt_re_srqn_handler);
3174
if (rc) {
3175
dev_err(rdev_to_dev(rdev),
3176
"Failed to enable NQ %d, rc = 0x%x", i, rc);
3177
goto fail_en;
3178
}
3179
}
3180
3181
rdev->nqr.max_init = i;
3182
return 0;
3183
fail_en:
3184
/* *nq was i'th nq */
3185
bnxt_re_net_ring_free(rdev, nq->ring_id);
3186
fail_ring:
3187
bnxt_qplib_free_nq_mem(nq);
3188
fail_mem:
3189
rdev->nqr.max_init = i;
3190
return rc;
3191
}
3192
3193
static void bnxt_re_sysfs_destroy_file(struct bnxt_re_dev *rdev)
3194
{
3195
int i;
3196
3197
for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++)
3198
device_remove_file(&rdev->ibdev.dev, bnxt_re_attributes[i]);
3199
}
3200
3201
static int bnxt_re_sysfs_create_file(struct bnxt_re_dev *rdev)
3202
{
3203
int i, j, rc = 0;
3204
3205
for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
3206
rc = device_create_file(&rdev->ibdev.dev,
3207
bnxt_re_attributes[i]);
3208
if (rc) {
3209
dev_err(rdev_to_dev(rdev),
3210
"Failed to create IB sysfs with rc = 0x%x", rc);
3211
/* Must clean up all created device files */
3212
for (j = 0; j < i; j++)
3213
device_remove_file(&rdev->ibdev.dev,
3214
bnxt_re_attributes[j]);
3215
clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
3216
ib_unregister_device(&rdev->ibdev);
3217
return 1;
3218
}
3219
}
3220
return 0;
3221
}
3222
3223
/* worker thread for polling periodic events. Now used for QoS programming*/
3224
static void bnxt_re_worker(struct work_struct *work)
3225
{
3226
struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
3227
worker.work);
3228
int rc;
3229
3230
/* QoS is in 30s cadence for PFs*/
3231
if (!rdev->is_virtfn && !rdev->worker_30s--)
3232
rdev->worker_30s = 30;
3233
/* Use trylock for bnxt_re_dev_lock as this can be
3234
* held for long time by debugfs show path while issuing
3235
* HWRMS. If the debugfs name update is not done in this
3236
* iteration, the driver will check for the same in the
3237
* next schedule of the worker i.e after 1 sec.
3238
*/
3239
if (mutex_trylock(&bnxt_re_dev_lock))
3240
mutex_unlock(&bnxt_re_dev_lock);
3241
3242
if (!rdev->stats.stats_query_sec)
3243
goto resched;
3244
3245
if (test_bit(BNXT_RE_FLAG_ISSUE_CFA_FLOW_STATS, &rdev->flags) &&
3246
(rdev->is_virtfn ||
3247
!_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags))) {
3248
if (!(rdev->stats.stats_query_counter++ %
3249
rdev->stats.stats_query_sec)) {
3250
rc = bnxt_re_get_qos_stats(rdev);
3251
if (rc && rc != -ENOMEM)
3252
clear_bit(BNXT_RE_FLAG_ISSUE_CFA_FLOW_STATS,
3253
&rdev->flags);
3254
}
3255
}
3256
3257
resched:
3258
schedule_delayed_work(&rdev->worker, msecs_to_jiffies(1000));
3259
}
3260
3261
static int bnxt_re_alloc_dbr_sw_stats_mem(struct bnxt_re_dev *rdev)
3262
{
3263
if (!(rdev->dbr_drop_recov || rdev->dbr_pacing))
3264
return 0;
3265
3266
rdev->dbr_sw_stats = kzalloc(sizeof(*rdev->dbr_sw_stats), GFP_KERNEL);
3267
if (!rdev->dbr_sw_stats)
3268
return -ENOMEM;
3269
3270
return 0;
3271
}
3272
3273
static void bnxt_re_free_dbr_sw_stats_mem(struct bnxt_re_dev *rdev)
3274
{
3275
kfree(rdev->dbr_sw_stats);
3276
rdev->dbr_sw_stats = NULL;
3277
}
3278
3279
static int bnxt_re_initialize_dbr_drop_recov(struct bnxt_re_dev *rdev)
3280
{
3281
rdev->dbr_drop_recov_wq =
3282
create_singlethread_workqueue("bnxt_re_dbr_drop_recov");
3283
if (!rdev->dbr_drop_recov_wq) {
3284
dev_err(rdev_to_dev(rdev), "DBR Drop Revov wq alloc failed!");
3285
return -EINVAL;
3286
}
3287
rdev->dbr_drop_recov = true;
3288
3289
/* Enable configfs setting dbr_drop_recov by default*/
3290
rdev->user_dbr_drop_recov = true;
3291
3292
rdev->user_dbr_drop_recov_timeout = BNXT_RE_DBR_RECOV_USERLAND_TIMEOUT;
3293
return 0;
3294
}
3295
3296
static void bnxt_re_deinitialize_dbr_drop_recov(struct bnxt_re_dev *rdev)
3297
{
3298
if (rdev->dbr_drop_recov_wq) {
3299
flush_workqueue(rdev->dbr_drop_recov_wq);
3300
destroy_workqueue(rdev->dbr_drop_recov_wq);
3301
rdev->dbr_drop_recov_wq = NULL;
3302
}
3303
rdev->dbr_drop_recov = false;
3304
}
3305
3306
static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev)
3307
{
3308
int rc;
3309
3310
/* Allocate a page for app use */
3311
rdev->dbr_page = (void *)__get_free_page(GFP_KERNEL);
3312
if (!rdev->dbr_page) {
3313
dev_err(rdev_to_dev(rdev), "DBR page allocation failed!");
3314
return -ENOMEM;
3315
}
3316
memset((u8 *)rdev->dbr_page, 0, PAGE_SIZE);
3317
rdev->qplib_res.pacing_data = (struct bnxt_qplib_db_pacing_data *)rdev->dbr_page;
3318
rc = bnxt_re_hwrm_dbr_pacing_qcfg(rdev);
3319
if (rc) {
3320
dev_err(rdev_to_dev(rdev),
3321
"Failed to query dbr pacing config %d\n", rc);
3322
goto fail;
3323
}
3324
/* Create a work queue for scheduling dbq event */
3325
rdev->dbq_wq = create_singlethread_workqueue("bnxt_re_dbq");
3326
if (!rdev->dbq_wq) {
3327
dev_err(rdev_to_dev(rdev), "DBQ wq alloc failed!");
3328
rc = -ENOMEM;
3329
goto fail;
3330
}
3331
/* MAP grc window 2 for reading db fifo depth */
3332
writel_fbsd(rdev->en_dev->softc, BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4, 0,
3333
rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_BASE_MASK);
3334
rdev->dbr_db_fifo_reg_off =
3335
(rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_OFFSET_MASK) +
3336
0x2000;
3337
rdev->qplib_res.pacing_data->grc_reg_offset = rdev->dbr_db_fifo_reg_off;
3338
3339
rdev->dbr_bar_addr =
3340
pci_resource_start(rdev->qplib_res.pdev, 0) +
3341
rdev->dbr_db_fifo_reg_off;
3342
3343
/* Percentage of DB FIFO */
3344
rdev->dbq_watermark = BNXT_RE_PACING_DBQ_THRESHOLD;
3345
rdev->pacing_en_int_th = BNXT_RE_PACING_EN_INT_THRESHOLD;
3346
rdev->pacing_algo_th = BNXT_RE_PACING_ALGO_THRESHOLD;
3347
rdev->dbq_pacing_time = BNXT_RE_DBR_INT_TIME;
3348
rdev->dbr_def_do_pacing = BNXT_RE_DBR_DO_PACING_NO_CONGESTION;
3349
rdev->do_pacing_save = rdev->dbr_def_do_pacing;
3350
bnxt_re_set_default_pacing_data(rdev);
3351
dev_dbg(rdev_to_dev(rdev), "Initialized db pacing\n");
3352
3353
return 0;
3354
fail:
3355
free_page((u64)rdev->dbr_page);
3356
rdev->dbr_page = NULL;
3357
return rc;
3358
}
3359
3360
static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev)
3361
{
3362
if (rdev->dbq_wq)
3363
flush_workqueue(rdev->dbq_wq);
3364
3365
cancel_work_sync(&rdev->dbq_fifo_check_work);
3366
cancel_delayed_work_sync(&rdev->dbq_pacing_work);
3367
3368
if (rdev->dbq_wq) {
3369
destroy_workqueue(rdev->dbq_wq);
3370
rdev->dbq_wq = NULL;
3371
}
3372
3373
if (rdev->dbr_page)
3374
free_page((u64)rdev->dbr_page);
3375
rdev->dbr_page = NULL;
3376
rdev->dbr_pacing = false;
3377
}
3378
3379
/* enable_dbr_pacing needs to be done only for older FWs
3380
* where host selects primary function. ie. pacing_ext
3381
* flags is not set.
3382
*/
3383
int bnxt_re_enable_dbr_pacing(struct bnxt_re_dev *rdev)
3384
{
3385
struct bnxt_qplib_nq *nq;
3386
3387
nq = &rdev->nqr.nq[0];
3388
rdev->dbq_nq_id = nq->ring_id;
3389
3390
if (!bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx) &&
3391
bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx)) {
3392
if (bnxt_re_hwrm_dbr_pacing_cfg(rdev, true)) {
3393
dev_err(rdev_to_dev(rdev),
3394
"Failed to set dbr pacing config\n");
3395
return -EIO;
3396
}
3397
/* MAP grc window 8 for ARMing the NQ DBQ */
3398
writel_fbsd(rdev->en_dev->softc, BNXT_GRCPF_REG_WINDOW_BASE_OUT + 28 , 0,
3399
rdev->chip_ctx->dbr_aeq_arm_reg & BNXT_GRC_BASE_MASK);
3400
rdev->dbr_aeq_arm_reg_off =
3401
(rdev->chip_ctx->dbr_aeq_arm_reg &
3402
BNXT_GRC_OFFSET_MASK) + 0x8000;
3403
writel_fbsd(rdev->en_dev->softc, rdev->dbr_aeq_arm_reg_off , 0, 1);
3404
}
3405
3406
return 0;
3407
}
3408
3409
/* disable_dbr_pacing needs to be done only for older FWs
3410
* where host selects primary function. ie. pacing_ext
3411
* flags is not set.
3412
*/
3413
3414
int bnxt_re_disable_dbr_pacing(struct bnxt_re_dev *rdev)
3415
{
3416
int rc = 0;
3417
3418
if (!bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx) &&
3419
bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx))
3420
rc = bnxt_re_hwrm_dbr_pacing_cfg(rdev, false);
3421
3422
return rc;
3423
}
3424
3425
static void bnxt_re_ib_uninit(struct bnxt_re_dev *rdev)
3426
{
3427
if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
3428
bnxt_re_sysfs_destroy_file(rdev);
3429
/* Cleanup ib dev */
3430
ib_unregister_device(&rdev->ibdev);
3431
clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
3432
return;
3433
}
3434
}
3435
3436
static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
3437
{
3438
struct bnxt_qplib_dpi *kdpi;
3439
int rc, wait_count = BNXT_RE_RES_FREE_WAIT_COUNT;
3440
3441
bnxt_re_net_unregister_async_event(rdev);
3442
3443
bnxt_re_put_stats2_ctx(rdev);
3444
if (test_and_clear_bit(BNXT_RE_FLAG_DEV_LIST_INITIALIZED,
3445
&rdev->flags)) {
3446
/* did the caller hold the lock? */
3447
mutex_lock(&bnxt_re_dev_lock);
3448
list_del_rcu(&rdev->list);
3449
mutex_unlock(&bnxt_re_dev_lock);
3450
}
3451
3452
bnxt_re_uninit_resolve_wq(rdev);
3453
bnxt_re_uninit_dcb_wq(rdev);
3454
bnxt_re_uninit_aer_wq(rdev);
3455
3456
bnxt_re_deinitialize_dbr_drop_recov(rdev);
3457
3458
if (bnxt_qplib_dbr_pacing_en(rdev->chip_ctx))
3459
(void)bnxt_re_disable_dbr_pacing(rdev);
3460
3461
if (test_and_clear_bit(BNXT_RE_FLAG_WORKER_REG, &rdev->flags)) {
3462
cancel_delayed_work_sync(&rdev->worker);
3463
}
3464
3465
/* Wait for ULPs to release references */
3466
while (atomic_read(&rdev->stats.rsors.cq_count) && --wait_count)
3467
usleep_range(500, 1000);
3468
if (!wait_count)
3469
dev_err(rdev_to_dev(rdev),
3470
"CQ resources not freed by stack, count = 0x%x",
3471
atomic_read(&rdev->stats.rsors.cq_count));
3472
3473
kdpi = &rdev->dpi_privileged;
3474
if (kdpi->umdbr) { /* kernel DPI was allocated with success */
3475
(void)bnxt_qplib_dealloc_dpi(&rdev->qplib_res, kdpi);
3476
/*
3477
* Driver just need to know no command had failed
3478
* during driver load sequence and below command is
3479
* required indeed. Piggybacking dpi allocation status.
3480
*/
3481
}
3482
3483
/* Protect the device uninitialization and start_irq/stop_irq L2
3484
* callbacks with rtnl lock to avoid race condition between these calls
3485
*/
3486
rtnl_lock();
3487
if (test_and_clear_bit(BNXT_RE_FLAG_SETUP_NQ, &rdev->flags))
3488
bnxt_re_clean_nqs(rdev);
3489
rtnl_unlock();
3490
3491
if (test_and_clear_bit(BNXT_RE_FLAG_TBLS_ALLOCINIT, &rdev->flags))
3492
bnxt_re_free_tbls(rdev);
3493
if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_INIT, &rdev->flags)) {
3494
rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
3495
if (rc)
3496
dev_warn(rdev_to_dev(rdev),
3497
"Failed to deinitialize fw, rc = 0x%x", rc);
3498
}
3499
3500
bnxt_re_put_stats_ctx(rdev);
3501
3502
if (test_and_clear_bit(BNXT_RE_FLAG_ALLOC_CTX, &rdev->flags))
3503
bnxt_qplib_free_hwctx(&rdev->qplib_res);
3504
3505
rtnl_lock();
3506
if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags))
3507
bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
3508
3509
if (rdev->dbr_pacing)
3510
bnxt_re_deinitialize_dbr_pacing(rdev);
3511
3512
bnxt_re_free_dbr_sw_stats_mem(rdev);
3513
3514
if (test_and_clear_bit(BNXT_RE_FLAG_NET_RING_ALLOC, &rdev->flags))
3515
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id);
3516
3517
if (test_and_clear_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags))
3518
bnxt_qplib_free_rcfw_channel(&rdev->qplib_res);
3519
3520
if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags))
3521
bnxt_re_free_msix(rdev);
3522
rtnl_unlock();
3523
3524
bnxt_re_destroy_chip_ctx(rdev);
3525
3526
if (op_type != BNXT_RE_PRE_RECOVERY_REMOVE) {
3527
if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED,
3528
&rdev->flags))
3529
bnxt_re_unregister_netdev(rdev);
3530
}
3531
}
3532
3533
static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type, u8 wqe_mode)
3534
{
3535
struct bnxt_re_ring_attr rattr = {};
3536
struct bnxt_qplib_creq_ctx *creq;
3537
int vec, offset;
3538
int rc = 0;
3539
3540
if (op_type != BNXT_RE_POST_RECOVERY_INIT) {
3541
/* Registered a new RoCE device instance to netdev */
3542
rc = bnxt_re_register_netdev(rdev);
3543
if (rc)
3544
return -EINVAL;
3545
}
3546
set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
3547
3548
rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode);
3549
if (rc) {
3550
dev_err(rdev_to_dev(rdev), "Failed to get chip context rc 0x%x", rc);
3551
bnxt_re_unregister_netdev(rdev);
3552
clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
3553
rc = -EINVAL;
3554
return rc;
3555
}
3556
3557
/* Protect the device initialization and start_irq/stop_irq L2 callbacks
3558
* with rtnl lock to avoid race condition between these calls
3559
*/
3560
rtnl_lock();
3561
rc = bnxt_re_request_msix(rdev);
3562
if (rc) {
3563
dev_err(rdev_to_dev(rdev),
3564
"Requesting MSI-X vectors failed with rc = 0x%x", rc);
3565
rc = -EINVAL;
3566
goto release_rtnl;
3567
}
3568
set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags);
3569
3570
/* Establish RCFW Communication Channel to initialize the context
3571
memory for the function and all child VFs */
3572
rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res);
3573
if (rc) {
3574
dev_err(rdev_to_dev(rdev),
3575
"Failed to alloc mem for rcfw, rc = %#x\n", rc);
3576
goto release_rtnl;
3577
}
3578
set_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags);
3579
3580
creq = &rdev->rcfw.creq;
3581
rattr.dma_arr = creq->hwq.pbl[PBL_LVL_0].pg_map_arr;
3582
rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count;
3583
rattr.type = bnxt_re_get_rtype(rdev);
3584
rattr.mode = HWRM_RING_ALLOC_INPUT_INT_MODE_MSIX;
3585
rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1;
3586
rattr.lrid = rdev->nqr.msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
3587
rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id);
3588
if (rc) {
3589
creq->ring_id = 0xffff;
3590
dev_err(rdev_to_dev(rdev),
3591
"Failed to allocate CREQ fw id with rc = 0x%x", rc);
3592
goto release_rtnl;
3593
}
3594
3595
if (!rdev->chip_ctx)
3596
goto release_rtnl;
3597
/* Program the NQ ID for DBQ notification */
3598
if (rdev->chip_ctx->modes.dbr_pacing_v0 ||
3599
bnxt_qplib_dbr_pacing_en(rdev->chip_ctx) ||
3600
bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) {
3601
rc = bnxt_re_initialize_dbr_pacing(rdev);
3602
if (!rc)
3603
rdev->dbr_pacing = true;
3604
else
3605
rdev->dbr_pacing = false;
3606
dev_dbg(rdev_to_dev(rdev), "%s: initialize db pacing ret %d\n",
3607
__func__, rc);
3608
}
3609
3610
vec = rdev->nqr.msix_entries[BNXT_RE_AEQ_IDX].vector;
3611
offset = rdev->nqr.msix_entries[BNXT_RE_AEQ_IDX].db_offset;
3612
rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw, vec, offset,
3613
&bnxt_re_aeq_handler);
3614
if (rc) {
3615
dev_err(rdev_to_dev(rdev),
3616
"Failed to enable RCFW channel with rc = 0x%x", rc);
3617
goto release_rtnl;
3618
}
3619
set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
3620
3621
rc = bnxt_re_update_dev_attr(rdev);
3622
if (rc)
3623
goto release_rtnl;
3624
bnxt_re_set_resource_limits(rdev);
3625
if (!rdev->is_virtfn && !_is_chip_gen_p5_p7(rdev->chip_ctx)) {
3626
rc = bnxt_qplib_alloc_hwctx(&rdev->qplib_res);
3627
if (rc) {
3628
dev_err(rdev_to_dev(rdev),
3629
"Failed to alloc hw contexts, rc = 0x%x", rc);
3630
goto release_rtnl;
3631
}
3632
set_bit(BNXT_RE_FLAG_ALLOC_CTX, &rdev->flags);
3633
}
3634
3635
rc = bnxt_re_get_stats_ctx(rdev);
3636
if (rc)
3637
goto release_rtnl;
3638
3639
rc = bnxt_qplib_init_rcfw(&rdev->rcfw, rdev->is_virtfn);
3640
if (rc) {
3641
dev_err(rdev_to_dev(rdev),
3642
"Failed to initialize fw with rc = 0x%x", rc);
3643
goto release_rtnl;
3644
}
3645
set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_INIT, &rdev->flags);
3646
3647
/* Based resource count on the 'new' device caps */
3648
rc = bnxt_re_update_dev_attr(rdev);
3649
if (rc)
3650
goto release_rtnl;
3651
rc = bnxt_re_alloc_init_tbls(rdev);
3652
if (rc) {
3653
dev_err(rdev_to_dev(rdev), "tbls alloc-init failed rc = %#x",
3654
rc);
3655
goto release_rtnl;
3656
}
3657
rc = bnxt_re_setup_nqs(rdev);
3658
if (rc) {
3659
dev_err(rdev_to_dev(rdev), "NQs alloc-init failed rc = %#x\n",
3660
rc);
3661
if (rdev->nqr.max_init == 0)
3662
goto release_rtnl;
3663
3664
dev_warn(rdev_to_dev(rdev),
3665
"expected nqs %d available nqs %d\n",
3666
rdev->nqr.num_msix, rdev->nqr.max_init);
3667
}
3668
set_bit(BNXT_RE_FLAG_SETUP_NQ, &rdev->flags);
3669
rtnl_unlock();
3670
3671
rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res, &rdev->dpi_privileged,
3672
rdev, BNXT_QPLIB_DPI_TYPE_KERNEL);
3673
if (rc)
3674
goto fail;
3675
3676
if (rdev->dbr_pacing)
3677
bnxt_re_enable_dbr_pacing(rdev);
3678
3679
if (rdev->chip_ctx->modes.dbr_drop_recov)
3680
bnxt_re_initialize_dbr_drop_recov(rdev);
3681
3682
rc = bnxt_re_alloc_dbr_sw_stats_mem(rdev);
3683
if (rc)
3684
goto fail;
3685
3686
/* This block of code is needed for error recovery support */
3687
if (!rdev->is_virtfn) {
3688
struct bnxt_re_tc_rec *tc_rec;
3689
3690
tc_rec = &rdev->tc_rec[0];
3691
rc = bnxt_re_query_hwrm_qportcfg(rdev, tc_rec, 0xFFFF);
3692
if (rc) {
3693
dev_err(rdev_to_dev(rdev),
3694
"Failed to query port config rc:%d", rc);
3695
return rc;
3696
}
3697
3698
/* Query f/w defaults of CC params */
3699
rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &rdev->cc_param);
3700
if (rc)
3701
dev_warn(rdev_to_dev(rdev),
3702
"Failed to query CC defaults\n");
3703
if (1) {
3704
rdev->num_vfs = pci_num_vf(rdev->en_dev->pdev);
3705
if (rdev->num_vfs) {
3706
bnxt_re_set_resource_limits(rdev);
3707
bnxt_qplib_set_func_resources(&rdev->qplib_res);
3708
}
3709
}
3710
}
3711
INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
3712
set_bit(BNXT_RE_FLAG_WORKER_REG, &rdev->flags);
3713
schedule_delayed_work(&rdev->worker, msecs_to_jiffies(1000));
3714
3715
bnxt_re_init_dcb_wq(rdev);
3716
bnxt_re_init_aer_wq(rdev);
3717
bnxt_re_init_resolve_wq(rdev);
3718
mutex_lock(&bnxt_re_dev_lock);
3719
list_add_tail_rcu(&rdev->list, &bnxt_re_dev_list);
3720
/* Added to the list, not in progress anymore */
3721
gadd_dev_inprogress--;
3722
set_bit(BNXT_RE_FLAG_DEV_LIST_INITIALIZED, &rdev->flags);
3723
mutex_unlock(&bnxt_re_dev_lock);
3724
3725
3726
return rc;
3727
release_rtnl:
3728
rtnl_unlock();
3729
fail:
3730
bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
3731
3732
return rc;
3733
}
3734
3735
static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
3736
{
3737
int rc = 0;
3738
3739
rc = bnxt_re_register_ib(rdev);
3740
if (rc) {
3741
dev_err(rdev_to_dev(rdev),
3742
"Register IB failed with rc = 0x%x", rc);
3743
goto fail;
3744
}
3745
if (bnxt_re_sysfs_create_file(rdev)) {
3746
bnxt_re_stopqps_and_ib_uninit(rdev);
3747
goto fail;
3748
}
3749
3750
set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
3751
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
3752
set_bit(BNXT_RE_FLAG_ISSUE_CFA_FLOW_STATS, &rdev->flags);
3753
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
3754
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
3755
3756
return rc;
3757
fail:
3758
bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
3759
return rc;
3760
}
3761
3762
/* wrapper for ib_init funcs */
3763
int _bnxt_re_ib_init(struct bnxt_re_dev *rdev)
3764
{
3765
return bnxt_re_ib_init(rdev);
3766
}
3767
3768
/* wrapper for aux init funcs */
3769
int _bnxt_re_ib_init2(struct bnxt_re_dev *rdev)
3770
{
3771
bnxt_re_ib_init_2(rdev);
3772
return 0; /* add return for future proof */
3773
}
3774
3775
static void bnxt_re_dev_unreg(struct bnxt_re_dev *rdev)
3776
{
3777
bnxt_re_dev_dealloc(rdev);
3778
}
3779
3780
3781
static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct ifnet *netdev,
3782
struct bnxt_en_dev *en_dev)
3783
{
3784
struct ifnet *realdev = NULL;
3785
3786
realdev = netdev;
3787
if (realdev)
3788
dev_dbg(NULL, "%s: realdev = %p netdev = %p\n", __func__,
3789
realdev, netdev);
3790
/*
3791
* Note:
3792
* The first argument to bnxt_re_dev_alloc() is 'netdev' and
3793
* not 'realdev', since in the case of bonding we want to
3794
* register the bonded virtual netdev (master) to the ib stack.
3795
* And 'en_dev' (for L2/PCI communication) is the first slave
3796
* device (PF0 on the card).
3797
* In the case of a regular netdev, both netdev and the en_dev
3798
* correspond to the same device.
3799
*/
3800
*rdev = bnxt_re_dev_alloc(netdev, en_dev);
3801
if (!*rdev) {
3802
pr_err("%s: netdev %p not handled",
3803
ROCE_DRV_MODULE_NAME, netdev);
3804
return -ENOMEM;
3805
}
3806
bnxt_re_hold(*rdev);
3807
3808
return 0;
3809
}
3810
3811
void bnxt_re_get_link_speed(struct bnxt_re_dev *rdev)
3812
{
3813
rdev->espeed = rdev->en_dev->espeed;
3814
return;
3815
}
3816
3817
void bnxt_re_stopqps_and_ib_uninit(struct bnxt_re_dev *rdev)
3818
{
3819
dev_dbg(rdev_to_dev(rdev), "%s: Stopping QPs, IB uninit on rdev: %p\n",
3820
__func__, rdev);
3821
bnxt_re_stop_all_nonqp1_nonshadow_qps(rdev);
3822
bnxt_re_ib_uninit(rdev);
3823
}
3824
3825
void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type,
3826
struct auxiliary_device *aux_dev)
3827
{
3828
struct bnxt_re_en_dev_info *en_info;
3829
struct bnxt_qplib_cmdq_ctx *cmdq;
3830
struct bnxt_qplib_rcfw *rcfw;
3831
3832
rcfw = &rdev->rcfw;
3833
cmdq = &rcfw->cmdq;
3834
if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
3835
set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
3836
3837
dev_dbg(rdev_to_dev(rdev), "%s: Removing rdev: %p\n", __func__, rdev);
3838
bnxt_re_dev_uninit(rdev, op_type);
3839
en_info = auxiliary_get_drvdata(aux_dev);
3840
if (en_info) {
3841
rtnl_lock();
3842
en_info->rdev = NULL;
3843
rtnl_unlock();
3844
if (op_type != BNXT_RE_PRE_RECOVERY_REMOVE) {
3845
clear_bit(BNXT_RE_FLAG_EN_DEV_PRIMARY_DEV, &en_info->flags);
3846
clear_bit(BNXT_RE_FLAG_EN_DEV_SECONDARY_DEV, &en_info->flags);
3847
clear_bit(BNXT_RE_FLAG_EN_DEV_NETDEV_REG, &en_info->flags);
3848
}
3849
}
3850
bnxt_re_dev_unreg(rdev);
3851
}
3852
3853
int bnxt_re_add_device(struct bnxt_re_dev **rdev,
3854
struct ifnet *netdev,
3855
u8 qp_mode, u8 op_type, u8 wqe_mode,
3856
u32 num_msix_requested,
3857
struct auxiliary_device *aux_dev)
3858
{
3859
struct bnxt_re_en_dev_info *en_info;
3860
struct bnxt_en_dev *en_dev;
3861
int rc = 0;
3862
3863
en_info = auxiliary_get_drvdata(aux_dev);
3864
en_dev = en_info->en_dev;
3865
3866
mutex_lock(&bnxt_re_dev_lock);
3867
/* Check if driver already in mod exit and aux_dev is valid */
3868
if (gmod_exit || !aux_dev) {
3869
mutex_unlock(&bnxt_re_dev_lock);
3870
return -ENODEV;
3871
}
3872
/* Add device in progress */
3873
gadd_dev_inprogress++;
3874
mutex_unlock(&bnxt_re_dev_lock);
3875
3876
rc = bnxt_re_dev_reg(rdev, netdev, en_dev);
3877
if (rc) {
3878
dev_dbg(NULL, "Failed to create add device for netdev %p\n",
3879
netdev);
3880
/*
3881
* For BNXT_RE_POST_RECOVERY_INIT special case
3882
* called from bnxt_re_start, the work is
3883
* complete only after, bnxt_re_start completes
3884
* bnxt_unregister_device in case of failure.
3885
* So bnxt_re_start will decrement gadd_dev_inprogress
3886
* in case of failure.
3887
*/
3888
if (op_type != BNXT_RE_POST_RECOVERY_INIT) {
3889
mutex_lock(&bnxt_re_dev_lock);
3890
gadd_dev_inprogress--;
3891
mutex_unlock(&bnxt_re_dev_lock);
3892
}
3893
return rc;
3894
}
3895
3896
if (rc != 0)
3897
goto ref_error;
3898
3899
/*
3900
* num_msix_requested = BNXT_RE_MSIX_FROM_MOD_PARAM indicates fresh driver load.
3901
* Otherwaise, this invocation can be the result of lag create / destroy,
3902
* err revovery, hot fw upgrade, etc..
3903
*/
3904
if (num_msix_requested == BNXT_RE_MSIX_FROM_MOD_PARAM) {
3905
if (bnxt_re_probe_count < BNXT_RE_MAX_DEVICES)
3906
num_msix_requested = max_msix_vec[bnxt_re_probe_count++];
3907
else
3908
/* Consider as default when probe_count exceeds its limit */
3909
num_msix_requested = 0;
3910
3911
/* if user specifies only one value, use the same for all PFs */
3912
if (max_msix_vec_argc == 1)
3913
num_msix_requested = max_msix_vec[0];
3914
}
3915
3916
(*rdev)->num_msix_requested = num_msix_requested;
3917
(*rdev)->gsi_ctx.gsi_qp_mode = qp_mode;
3918
(*rdev)->adev = aux_dev;
3919
(*rdev)->dev_addr = en_dev->softc->func.mac_addr;
3920
/* Before updating the rdev pointer in bnxt_re_en_dev_info structure,
3921
* take the rtnl lock to avoid accessing invalid rdev pointer from
3922
* L2 ULP callbacks. This is applicable in all the places where rdev
3923
* pointer is updated in bnxt_re_en_dev_info.
3924
*/
3925
rtnl_lock();
3926
en_info->rdev = *rdev;
3927
rtnl_unlock();
3928
rc = bnxt_re_dev_init(*rdev, op_type, wqe_mode);
3929
if (rc) {
3930
ref_error:
3931
bnxt_re_dev_unreg(*rdev);
3932
*rdev = NULL;
3933
/*
3934
* For BNXT_RE_POST_RECOVERY_INIT special case
3935
* called from bnxt_re_start, the work is
3936
* complete only after, bnxt_re_start completes
3937
* bnxt_unregister_device in case of failure.
3938
* So bnxt_re_start will decrement gadd_dev_inprogress
3939
* in case of failure.
3940
*/
3941
if (op_type != BNXT_RE_POST_RECOVERY_INIT) {
3942
mutex_lock(&bnxt_re_dev_lock);
3943
gadd_dev_inprogress--;
3944
mutex_unlock(&bnxt_re_dev_lock);
3945
}
3946
}
3947
dev_dbg(rdev_to_dev(*rdev), "%s: Adding rdev: %p\n", __func__, *rdev);
3948
if (!rc) {
3949
set_bit(BNXT_RE_FLAG_EN_DEV_NETDEV_REG, &en_info->flags);
3950
}
3951
return rc;
3952
}
3953
3954
struct bnxt_re_dev *bnxt_re_get_peer_pf(struct bnxt_re_dev *rdev)
3955
{
3956
struct pci_dev *pdev_in = rdev->en_dev->pdev;
3957
int tmp_bus_num, bus_num = pdev_in->bus->number;
3958
int tmp_dev_num, dev_num = PCI_SLOT(pdev_in->devfn);
3959
int tmp_func_num, func_num = PCI_FUNC(pdev_in->devfn);
3960
struct bnxt_re_dev *tmp_rdev;
3961
3962
rcu_read_lock();
3963
list_for_each_entry_rcu(tmp_rdev, &bnxt_re_dev_list, list) {
3964
tmp_bus_num = tmp_rdev->en_dev->pdev->bus->number;
3965
tmp_dev_num = PCI_SLOT(tmp_rdev->en_dev->pdev->devfn);
3966
tmp_func_num = PCI_FUNC(tmp_rdev->en_dev->pdev->devfn);
3967
3968
if (bus_num == tmp_bus_num && dev_num == tmp_dev_num &&
3969
func_num != tmp_func_num) {
3970
rcu_read_unlock();
3971
return tmp_rdev;
3972
}
3973
}
3974
rcu_read_unlock();
3975
return NULL;
3976
}
3977
3978
3979
int bnxt_re_schedule_work(struct bnxt_re_dev *rdev, unsigned long event,
3980
struct ifnet *vlan_dev,
3981
struct ifnet *netdev,
3982
struct auxiliary_device *adev)
3983
{
3984
struct bnxt_re_work *re_work;
3985
3986
/* Allocate for the deferred task */
3987
re_work = kzalloc(sizeof(*re_work), GFP_KERNEL);
3988
if (!re_work)
3989
return -ENOMEM;
3990
3991
re_work->rdev = rdev;
3992
re_work->event = event;
3993
re_work->vlan_dev = vlan_dev;
3994
re_work->adev = adev;
3995
INIT_WORK(&re_work->work, bnxt_re_task);
3996
if (rdev)
3997
atomic_inc(&rdev->sched_count);
3998
re_work->netdev = netdev;
3999
queue_work(bnxt_re_wq, &re_work->work);
4000
4001
return 0;
4002
}
4003
4004
4005
int bnxt_re_get_slot_pf_count(struct bnxt_re_dev *rdev)
4006
{
4007
struct pci_dev *pdev_in = rdev->en_dev->pdev;
4008
int tmp_bus_num, bus_num = pdev_in->bus->number;
4009
int tmp_dev_num, dev_num = PCI_SLOT(pdev_in->devfn);
4010
struct bnxt_re_dev *tmp_rdev;
4011
int pf_cnt = 0;
4012
4013
rcu_read_lock();
4014
list_for_each_entry_rcu(tmp_rdev, &bnxt_re_dev_list, list) {
4015
tmp_bus_num = tmp_rdev->en_dev->pdev->bus->number;
4016
tmp_dev_num = PCI_SLOT(tmp_rdev->en_dev->pdev->devfn);
4017
4018
if (bus_num == tmp_bus_num && dev_num == tmp_dev_num)
4019
pf_cnt++;
4020
}
4021
rcu_read_unlock();
4022
return pf_cnt;
4023
}
4024
4025
/* Handle all deferred netevents tasks */
4026
static void bnxt_re_task(struct work_struct *work)
4027
{
4028
struct bnxt_re_en_dev_info *en_info;
4029
struct auxiliary_device *aux_dev;
4030
struct bnxt_re_work *re_work;
4031
struct bnxt_re_dev *rdev;
4032
4033
re_work = container_of(work, struct bnxt_re_work, work);
4034
4035
mutex_lock(&bnxt_re_mutex);
4036
rdev = re_work->rdev;
4037
4038
/*
4039
* If the previous rdev is deleted due to bond creation
4040
* do not handle the event
4041
*/
4042
if (!bnxt_re_is_rdev_valid(rdev))
4043
goto exit;
4044
4045
/* Ignore the event, if the device is not registred with IB stack. This
4046
* is to avoid handling any event while the device is added/removed.
4047
*/
4048
if (rdev && !test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
4049
dev_dbg(rdev_to_dev(rdev), "%s: Ignoring netdev event 0x%lx",
4050
__func__, re_work->event);
4051
goto done;
4052
}
4053
4054
/* Extra check to silence coverity. We shouldn't handle any event
4055
* when rdev is NULL.
4056
*/
4057
if (!rdev)
4058
goto exit;
4059
4060
dev_dbg(rdev_to_dev(rdev), "Scheduled work for event 0x%lx",
4061
re_work->event);
4062
4063
switch (re_work->event) {
4064
case NETDEV_UP:
4065
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
4066
IB_EVENT_PORT_ACTIVE);
4067
bnxt_re_net_register_async_event(rdev);
4068
break;
4069
4070
case NETDEV_DOWN:
4071
bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 0);
4072
bnxt_re_stop_all_nonqp1_nonshadow_qps(rdev);
4073
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
4074
IB_EVENT_PORT_ERR);
4075
break;
4076
4077
case NETDEV_CHANGE:
4078
if (bnxt_re_get_link_state(rdev) == IB_PORT_DOWN) {
4079
bnxt_re_stop_all_nonqp1_nonshadow_qps(rdev);
4080
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
4081
IB_EVENT_PORT_ERR);
4082
break;
4083
} else if (bnxt_re_get_link_state(rdev) == IB_PORT_ACTIVE) {
4084
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
4085
IB_EVENT_PORT_ACTIVE);
4086
}
4087
4088
/* temporarily disable the check for SR2 */
4089
if (!bnxt_qplib_query_cc_param(&rdev->qplib_res,
4090
&rdev->cc_param) &&
4091
!_is_chip_p7(rdev->chip_ctx)) {
4092
/*
4093
* Disable CC for 10G speed
4094
* for non p5 devices
4095
*/
4096
if (rdev->sl_espeed == SPEED_10000 &&
4097
!_is_chip_gen_p5_p7(rdev->chip_ctx)) {
4098
if (rdev->cc_param.enable)
4099
bnxt_re_clear_cc(rdev);
4100
} else {
4101
if (!rdev->cc_param.enable &&
4102
rdev->cc_param.admin_enable)
4103
bnxt_re_setup_cc(rdev);
4104
}
4105
}
4106
break;
4107
4108
case NETDEV_UNREGISTER:
4109
bnxt_re_stopqps_and_ib_uninit(rdev);
4110
aux_dev = rdev->adev;
4111
if (re_work->adev)
4112
goto done;
4113
4114
bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, aux_dev);
4115
4116
break;
4117
4118
default:
4119
break;
4120
}
4121
done:
4122
if (rdev) {
4123
/* memory barrier to guarantee task completion
4124
* before decrementing sched count
4125
*/
4126
mmiowb();
4127
atomic_dec(&rdev->sched_count);
4128
}
4129
exit:
4130
if (re_work->adev && re_work->event == NETDEV_UNREGISTER) {
4131
en_info = auxiliary_get_drvdata(re_work->adev);
4132
en_info->ib_uninit_done = true;
4133
wake_up(&en_info->waitq);
4134
}
4135
kfree(re_work);
4136
mutex_unlock(&bnxt_re_mutex);
4137
}
4138
4139
/*
4140
"Notifier chain callback can be invoked for the same chain from
4141
different CPUs at the same time".
4142
4143
For cases when the netdev is already present, our call to the
4144
register_netdevice_notifier() will actually get the rtnl_lock()
4145
before sending NETDEV_REGISTER and (if up) NETDEV_UP
4146
events.
4147
4148
But for cases when the netdev is not already present, the notifier
4149
chain is subjected to be invoked from different CPUs simultaneously.
4150
4151
This is protected by the netdev_mutex.
4152
*/
4153
static int bnxt_re_netdev_event(struct notifier_block *notifier,
4154
unsigned long event, void *ptr)
4155
{
4156
struct ifnet *real_dev, *netdev;
4157
struct bnxt_re_dev *rdev = NULL;
4158
4159
netdev = netdev_notifier_info_to_ifp(ptr);
4160
real_dev = rdma_vlan_dev_real_dev(netdev);
4161
if (!real_dev)
4162
real_dev = netdev;
4163
/* In case of bonding,this will be bond's rdev */
4164
rdev = bnxt_re_from_netdev(real_dev);
4165
4166
if (!rdev)
4167
goto exit;
4168
4169
dev_info(rdev_to_dev(rdev), "%s: Event = %s (0x%lx), rdev %s (real_dev %s)\n",
4170
__func__, bnxt_re_netevent(event), event,
4171
rdev ? rdev->netdev ? if_getdname(rdev->netdev) : "->netdev = NULL" : "= NULL",
4172
(real_dev == netdev) ? "= netdev" : if_getdname(real_dev));
4173
4174
if (!test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
4175
goto exit;
4176
4177
bnxt_re_hold(rdev);
4178
4179
if (real_dev != netdev) {
4180
switch (event) {
4181
case NETDEV_UP:
4182
bnxt_re_schedule_work(rdev, event, netdev,
4183
NULL, NULL);
4184
break;
4185
case NETDEV_DOWN:
4186
break;
4187
default:
4188
break;
4189
}
4190
goto done;
4191
}
4192
4193
switch (event) {
4194
case NETDEV_CHANGEADDR:
4195
if (!_is_chip_gen_p5_p7(rdev->chip_ctx))
4196
bnxt_re_update_shadow_ah(rdev);
4197
bnxt_qplib_get_guid(rdev->dev_addr,
4198
(u8 *)&rdev->ibdev.node_guid);
4199
break;
4200
4201
case NETDEV_CHANGE:
4202
bnxt_re_get_link_speed(rdev);
4203
bnxt_re_schedule_work(rdev, event, NULL, NULL, NULL);
4204
break;
4205
case NETDEV_UNREGISTER:
4206
/* netdev notifier will call NETDEV_UNREGISTER again later since
4207
* we are still holding the reference to the netdev
4208
*/
4209
4210
/*
4211
* Workaround to avoid ib_unregister hang. Check for module
4212
* reference and dont free up the device if the reference
4213
* is non zero. Checking only for PF functions.
4214
*/
4215
4216
if (rdev) {
4217
dev_info(rdev_to_dev(rdev),
4218
"bnxt_re:Unreg recvd when module refcnt > 0");
4219
dev_info(rdev_to_dev(rdev),
4220
"bnxt_re:Close all apps using bnxt_re devs");
4221
dev_info(rdev_to_dev(rdev),
4222
"bnxt_re:Remove the configfs entry created for the device");
4223
dev_info(rdev_to_dev(rdev),
4224
"bnxt_re:Refer documentation for details");
4225
goto done;
4226
}
4227
4228
if (atomic_read(&rdev->sched_count) > 0)
4229
goto done;
4230
if (!rdev->unreg_sched) {
4231
bnxt_re_schedule_work(rdev, NETDEV_UNREGISTER,
4232
NULL, NULL, NULL);
4233
rdev->unreg_sched = true;
4234
goto done;
4235
}
4236
4237
break;
4238
default:
4239
break;
4240
}
4241
done:
4242
if (rdev)
4243
bnxt_re_put(rdev);
4244
exit:
4245
return NOTIFY_DONE;
4246
}
4247
4248
static struct notifier_block bnxt_re_netdev_notifier = {
4249
.notifier_call = bnxt_re_netdev_event
4250
};
4251
4252
static void bnxt_re_remove_base_interface(struct bnxt_re_dev *rdev,
4253
struct auxiliary_device *adev)
4254
{
4255
bnxt_re_stopqps_and_ib_uninit(rdev);
4256
bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, adev);
4257
auxiliary_set_drvdata(adev, NULL);
4258
}
4259
4260
/*
4261
* bnxt_re_remove - Removes the roce aux device
4262
* @adev - aux device pointer
4263
*
4264
* This function removes the roce device. This gets
4265
* called in the mod exit path and pci unbind path.
4266
* If the rdev is bond interace, destroys the lag
4267
* in module exit path, and in pci unbind case
4268
* destroys the lag and recreates other base interface.
4269
* If the device is already removed in error recovery
4270
* path, it just unregister with the L2.
4271
*/
4272
static void bnxt_re_remove(struct auxiliary_device *adev)
4273
{
4274
struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
4275
struct bnxt_en_dev *en_dev;
4276
struct bnxt_re_dev *rdev;
4277
bool primary_dev = false;
4278
bool secondary_dev = false;
4279
4280
if (!en_info)
4281
return;
4282
4283
mutex_lock(&bnxt_re_mutex);
4284
en_dev = en_info->en_dev;
4285
4286
rdev = en_info->rdev;
4287
4288
if (rdev && bnxt_re_is_rdev_valid(rdev)) {
4289
if (pci_channel_offline(rdev->rcfw.pdev))
4290
set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
4291
4292
if (test_bit(BNXT_RE_FLAG_EN_DEV_PRIMARY_DEV, &en_info->flags))
4293
primary_dev = true;
4294
if (test_bit(BNXT_RE_FLAG_EN_DEV_SECONDARY_DEV, &en_info->flags))
4295
secondary_dev = true;
4296
4297
/*
4298
* en_dev_info of primary device and secondary device have the
4299
* same rdev pointer when LAG is configured. This rdev pointer
4300
* is rdev of bond interface.
4301
*/
4302
if (!primary_dev && !secondary_dev) {
4303
/* removal of non bond interface */
4304
bnxt_re_remove_base_interface(rdev, adev);
4305
} else {
4306
/*
4307
* removal of bond primary/secondary interface. In this
4308
* case bond device is already removed, so rdev->binfo
4309
* is NULL.
4310
*/
4311
auxiliary_set_drvdata(adev, NULL);
4312
}
4313
} else {
4314
/* device is removed from ulp stop, unregister the net dev */
4315
if (test_bit(BNXT_RE_FLAG_EN_DEV_NETDEV_REG, &en_info->flags)) {
4316
rtnl_lock();
4317
en_dev->en_ops->bnxt_unregister_device(en_dev,
4318
BNXT_ROCE_ULP);
4319
rtnl_unlock();
4320
}
4321
}
4322
mutex_unlock(&bnxt_re_mutex);
4323
return;
4324
}
4325
4326
/* wrapper for all external user context callers */
4327
void _bnxt_re_remove(struct auxiliary_device *adev)
4328
{
4329
bnxt_re_remove(adev);
4330
}
4331
4332
static void bnxt_re_ib_init_2(struct bnxt_re_dev *rdev)
4333
{
4334
int rc;
4335
4336
rc = bnxt_re_get_device_stats(rdev);
4337
if (rc)
4338
dev_err(rdev_to_dev(rdev),
4339
"Failed initial device stat query");
4340
4341
bnxt_re_net_register_async_event(rdev);
4342
}
4343
4344
static int bnxt_re_probe(struct auxiliary_device *adev,
4345
const struct auxiliary_device_id *id)
4346
{
4347
struct bnxt_aux_dev *aux_dev =
4348
container_of(adev, struct bnxt_aux_dev, aux_dev);
4349
struct bnxt_re_en_dev_info *en_info;
4350
struct bnxt_en_dev *en_dev = NULL;
4351
struct bnxt_re_dev *rdev;
4352
int rc = -ENODEV;
4353
4354
if (aux_dev)
4355
en_dev = aux_dev->edev;
4356
4357
if (!en_dev)
4358
return rc;
4359
4360
if (en_dev->ulp_version != BNXT_ULP_VERSION) {
4361
pr_err("%s: probe error: bnxt_en ulp version magic %x is not compatible!\n",
4362
ROCE_DRV_MODULE_NAME, en_dev->ulp_version);
4363
return -EINVAL;
4364
}
4365
4366
en_info = kzalloc(sizeof(*en_info), GFP_KERNEL);
4367
if (!en_info)
4368
return -ENOMEM;
4369
memset(en_info, 0, sizeof(struct bnxt_re_en_dev_info));
4370
en_info->en_dev = en_dev;
4371
auxiliary_set_drvdata(adev, en_info);
4372
4373
mutex_lock(&bnxt_re_mutex);
4374
rc = bnxt_re_add_device(&rdev, en_dev->net,
4375
BNXT_RE_GSI_MODE_ALL,
4376
BNXT_RE_COMPLETE_INIT,
4377
BNXT_QPLIB_WQE_MODE_STATIC,
4378
BNXT_RE_MSIX_FROM_MOD_PARAM, adev);
4379
if (rc) {
4380
mutex_unlock(&bnxt_re_mutex);
4381
return rc;
4382
}
4383
4384
rc = bnxt_re_ib_init(rdev);
4385
if (rc)
4386
goto err;
4387
4388
bnxt_re_ib_init_2(rdev);
4389
4390
dev_dbg(rdev_to_dev(rdev), "%s: adev: %p\n", __func__, adev);
4391
rdev->adev = adev;
4392
4393
mutex_unlock(&bnxt_re_mutex);
4394
4395
return 0;
4396
4397
err:
4398
mutex_unlock(&bnxt_re_mutex);
4399
bnxt_re_remove(adev);
4400
4401
return rc;
4402
}
4403
4404
static const struct auxiliary_device_id bnxt_re_id_table[] = {
4405
{ .name = BNXT_ADEV_NAME ".rdma", },
4406
{},
4407
};
4408
4409
MODULE_DEVICE_TABLE(auxiliary, bnxt_re_id_table);
4410
4411
static struct auxiliary_driver bnxt_re_driver = {
4412
.name = "rdma",
4413
.probe = bnxt_re_probe,
4414
.remove = bnxt_re_remove,
4415
.id_table = bnxt_re_id_table,
4416
};
4417
4418
static int __init bnxt_re_mod_init(void)
4419
{
4420
int rc = 0;
4421
4422
pr_info("%s: %s", ROCE_DRV_MODULE_NAME, drv_version);
4423
4424
bnxt_re_wq = create_singlethread_workqueue("bnxt_re");
4425
if (!bnxt_re_wq)
4426
return -ENOMEM;
4427
4428
rc = bnxt_re_register_netdevice_notifier(&bnxt_re_netdev_notifier);
4429
if (rc) {
4430
pr_err("%s: Cannot register to netdevice_notifier",
4431
ROCE_DRV_MODULE_NAME);
4432
goto err_netdev;
4433
}
4434
4435
INIT_LIST_HEAD(&bnxt_re_dev_list);
4436
4437
rc = auxiliary_driver_register(&bnxt_re_driver);
4438
if (rc) {
4439
pr_err("%s: Failed to register auxiliary driver\n",
4440
ROCE_DRV_MODULE_NAME);
4441
goto err_auxdrv;
4442
}
4443
4444
return 0;
4445
4446
err_auxdrv:
4447
bnxt_re_unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
4448
4449
err_netdev:
4450
destroy_workqueue(bnxt_re_wq);
4451
4452
return rc;
4453
}
4454
4455
static void __exit bnxt_re_mod_exit(void)
4456
{
4457
gmod_exit = 1;
4458
auxiliary_driver_unregister(&bnxt_re_driver);
4459
4460
bnxt_re_unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
4461
4462
if (bnxt_re_wq)
4463
destroy_workqueue(bnxt_re_wq);
4464
}
4465
4466
module_init(bnxt_re_mod_init);
4467
module_exit(bnxt_re_mod_exit);
4468
4469