Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/drivers/dma/ioat/dma_v3.c
15111 views
1
/*
2
* This file is provided under a dual BSD/GPLv2 license. When using or
3
* redistributing this file, you may do so under either license.
4
*
5
* GPL LICENSE SUMMARY
6
*
7
* Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
8
*
9
* This program is free software; you can redistribute it and/or modify it
10
* under the terms and conditions of the GNU General Public License,
11
* version 2, as published by the Free Software Foundation.
12
*
13
* This program is distributed in the hope that it will be useful, but WITHOUT
14
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16
* more details.
17
*
18
* You should have received a copy of the GNU General Public License along with
19
* this program; if not, write to the Free Software Foundation, Inc.,
20
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21
*
22
* The full GNU General Public License is included in this distribution in
23
* the file called "COPYING".
24
*
25
* BSD LICENSE
26
*
27
* Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
28
*
29
* Redistribution and use in source and binary forms, with or without
30
* modification, are permitted provided that the following conditions are met:
31
*
32
* * Redistributions of source code must retain the above copyright
33
* notice, this list of conditions and the following disclaimer.
34
* * Redistributions in binary form must reproduce the above copyright
35
* notice, this list of conditions and the following disclaimer in
36
* the documentation and/or other materials provided with the
37
* distribution.
38
* * Neither the name of Intel Corporation nor the names of its
39
* contributors may be used to endorse or promote products derived
40
* from this software without specific prior written permission.
41
*
42
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
43
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
46
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
47
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
48
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
49
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
50
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
51
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
52
* POSSIBILITY OF SUCH DAMAGE.
53
*/
54
55
/*
56
* Support routines for v3+ hardware
57
*/
58
59
#include <linux/pci.h>
60
#include <linux/gfp.h>
61
#include <linux/dmaengine.h>
62
#include <linux/dma-mapping.h>
63
#include <linux/prefetch.h>
64
#include "registers.h"
65
#include "hw.h"
66
#include "dma.h"
67
#include "dma_v2.h"
68
69
/* ioat hardware assumes at least two sources for raid operations */
70
#define src_cnt_to_sw(x) ((x) + 2)
71
#define src_cnt_to_hw(x) ((x) - 2)
72
73
/* provide a lookup table for setting the source address in the base or
74
* extended descriptor of an xor or pq descriptor
75
*/
76
static const u8 xor_idx_to_desc __read_mostly = 0xd0;
77
static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 };
78
static const u8 pq_idx_to_desc __read_mostly = 0xf8;
79
static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 };
80
81
static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
82
{
83
struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
84
85
return raw->field[xor_idx_to_field[idx]];
86
}
87
88
static void xor_set_src(struct ioat_raw_descriptor *descs[2],
89
dma_addr_t addr, u32 offset, int idx)
90
{
91
struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
92
93
raw->field[xor_idx_to_field[idx]] = addr + offset;
94
}
95
96
static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
97
{
98
struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
99
100
return raw->field[pq_idx_to_field[idx]];
101
}
102
103
static void pq_set_src(struct ioat_raw_descriptor *descs[2],
104
dma_addr_t addr, u32 offset, u8 coef, int idx)
105
{
106
struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
107
struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
108
109
raw->field[pq_idx_to_field[idx]] = addr + offset;
110
pq->coef[idx] = coef;
111
}
112
113
static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
114
struct ioat_ring_ent *desc, int idx)
115
{
116
struct ioat_chan_common *chan = &ioat->base;
117
struct pci_dev *pdev = chan->device->pdev;
118
size_t len = desc->len;
119
size_t offset = len - desc->hw->size;
120
struct dma_async_tx_descriptor *tx = &desc->txd;
121
enum dma_ctrl_flags flags = tx->flags;
122
123
switch (desc->hw->ctl_f.op) {
124
case IOAT_OP_COPY:
125
if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
126
ioat_dma_unmap(chan, flags, len, desc->hw);
127
break;
128
case IOAT_OP_FILL: {
129
struct ioat_fill_descriptor *hw = desc->fill;
130
131
if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
132
ioat_unmap(pdev, hw->dst_addr - offset, len,
133
PCI_DMA_FROMDEVICE, flags, 1);
134
break;
135
}
136
case IOAT_OP_XOR_VAL:
137
case IOAT_OP_XOR: {
138
struct ioat_xor_descriptor *xor = desc->xor;
139
struct ioat_ring_ent *ext;
140
struct ioat_xor_ext_descriptor *xor_ex = NULL;
141
int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
142
struct ioat_raw_descriptor *descs[2];
143
int i;
144
145
if (src_cnt > 5) {
146
ext = ioat2_get_ring_ent(ioat, idx + 1);
147
xor_ex = ext->xor_ex;
148
}
149
150
if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
151
descs[0] = (struct ioat_raw_descriptor *) xor;
152
descs[1] = (struct ioat_raw_descriptor *) xor_ex;
153
for (i = 0; i < src_cnt; i++) {
154
dma_addr_t src = xor_get_src(descs, i);
155
156
ioat_unmap(pdev, src - offset, len,
157
PCI_DMA_TODEVICE, flags, 0);
158
}
159
160
/* dest is a source in xor validate operations */
161
if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
162
ioat_unmap(pdev, xor->dst_addr - offset, len,
163
PCI_DMA_TODEVICE, flags, 1);
164
break;
165
}
166
}
167
168
if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
169
ioat_unmap(pdev, xor->dst_addr - offset, len,
170
PCI_DMA_FROMDEVICE, flags, 1);
171
break;
172
}
173
case IOAT_OP_PQ_VAL:
174
case IOAT_OP_PQ: {
175
struct ioat_pq_descriptor *pq = desc->pq;
176
struct ioat_ring_ent *ext;
177
struct ioat_pq_ext_descriptor *pq_ex = NULL;
178
int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
179
struct ioat_raw_descriptor *descs[2];
180
int i;
181
182
if (src_cnt > 3) {
183
ext = ioat2_get_ring_ent(ioat, idx + 1);
184
pq_ex = ext->pq_ex;
185
}
186
187
/* in the 'continue' case don't unmap the dests as sources */
188
if (dmaf_p_disabled_continue(flags))
189
src_cnt--;
190
else if (dmaf_continue(flags))
191
src_cnt -= 3;
192
193
if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
194
descs[0] = (struct ioat_raw_descriptor *) pq;
195
descs[1] = (struct ioat_raw_descriptor *) pq_ex;
196
for (i = 0; i < src_cnt; i++) {
197
dma_addr_t src = pq_get_src(descs, i);
198
199
ioat_unmap(pdev, src - offset, len,
200
PCI_DMA_TODEVICE, flags, 0);
201
}
202
203
/* the dests are sources in pq validate operations */
204
if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
205
if (!(flags & DMA_PREP_PQ_DISABLE_P))
206
ioat_unmap(pdev, pq->p_addr - offset,
207
len, PCI_DMA_TODEVICE, flags, 0);
208
if (!(flags & DMA_PREP_PQ_DISABLE_Q))
209
ioat_unmap(pdev, pq->q_addr - offset,
210
len, PCI_DMA_TODEVICE, flags, 0);
211
break;
212
}
213
}
214
215
if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
216
if (!(flags & DMA_PREP_PQ_DISABLE_P))
217
ioat_unmap(pdev, pq->p_addr - offset, len,
218
PCI_DMA_BIDIRECTIONAL, flags, 1);
219
if (!(flags & DMA_PREP_PQ_DISABLE_Q))
220
ioat_unmap(pdev, pq->q_addr - offset, len,
221
PCI_DMA_BIDIRECTIONAL, flags, 1);
222
}
223
break;
224
}
225
default:
226
dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
227
__func__, desc->hw->ctl_f.op);
228
}
229
}
230
231
static bool desc_has_ext(struct ioat_ring_ent *desc)
232
{
233
struct ioat_dma_descriptor *hw = desc->hw;
234
235
if (hw->ctl_f.op == IOAT_OP_XOR ||
236
hw->ctl_f.op == IOAT_OP_XOR_VAL) {
237
struct ioat_xor_descriptor *xor = desc->xor;
238
239
if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
240
return true;
241
} else if (hw->ctl_f.op == IOAT_OP_PQ ||
242
hw->ctl_f.op == IOAT_OP_PQ_VAL) {
243
struct ioat_pq_descriptor *pq = desc->pq;
244
245
if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
246
return true;
247
}
248
249
return false;
250
}
251
252
/**
253
* __cleanup - reclaim used descriptors
254
* @ioat: channel (ring) to clean
255
*
256
* The difference from the dma_v2.c __cleanup() is that this routine
257
* handles extended descriptors and dma-unmapping raid operations.
258
*/
259
static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
260
{
261
struct ioat_chan_common *chan = &ioat->base;
262
struct ioat_ring_ent *desc;
263
bool seen_current = false;
264
int idx = ioat->tail, i;
265
u16 active;
266
267
dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
268
__func__, ioat->head, ioat->tail, ioat->issued);
269
270
active = ioat2_ring_active(ioat);
271
for (i = 0; i < active && !seen_current; i++) {
272
struct dma_async_tx_descriptor *tx;
273
274
smp_read_barrier_depends();
275
prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
276
desc = ioat2_get_ring_ent(ioat, idx + i);
277
dump_desc_dbg(ioat, desc);
278
tx = &desc->txd;
279
if (tx->cookie) {
280
chan->completed_cookie = tx->cookie;
281
ioat3_dma_unmap(ioat, desc, idx + i);
282
tx->cookie = 0;
283
if (tx->callback) {
284
tx->callback(tx->callback_param);
285
tx->callback = NULL;
286
}
287
}
288
289
if (tx->phys == phys_complete)
290
seen_current = true;
291
292
/* skip extended descriptors */
293
if (desc_has_ext(desc)) {
294
BUG_ON(i + 1 >= active);
295
i++;
296
}
297
}
298
smp_mb(); /* finish all descriptor reads before incrementing tail */
299
ioat->tail = idx + i;
300
BUG_ON(active && !seen_current); /* no active descs have written a completion? */
301
chan->last_completion = phys_complete;
302
303
if (active - i == 0) {
304
dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
305
__func__);
306
clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
307
mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
308
}
309
/* 5 microsecond delay per pending descriptor */
310
writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK),
311
chan->device->reg_base + IOAT_INTRDELAY_OFFSET);
312
}
313
314
static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
315
{
316
struct ioat_chan_common *chan = &ioat->base;
317
unsigned long phys_complete;
318
319
spin_lock_bh(&chan->cleanup_lock);
320
if (ioat_cleanup_preamble(chan, &phys_complete))
321
__cleanup(ioat, phys_complete);
322
spin_unlock_bh(&chan->cleanup_lock);
323
}
324
325
static void ioat3_cleanup_event(unsigned long data)
326
{
327
struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
328
329
ioat3_cleanup(ioat);
330
writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
331
}
332
333
static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
334
{
335
struct ioat_chan_common *chan = &ioat->base;
336
unsigned long phys_complete;
337
338
ioat2_quiesce(chan, 0);
339
if (ioat_cleanup_preamble(chan, &phys_complete))
340
__cleanup(ioat, phys_complete);
341
342
__ioat2_restart_chan(ioat);
343
}
344
345
static void ioat3_timer_event(unsigned long data)
346
{
347
struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
348
struct ioat_chan_common *chan = &ioat->base;
349
350
if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
351
unsigned long phys_complete;
352
u64 status;
353
354
status = ioat_chansts(chan);
355
356
/* when halted due to errors check for channel
357
* programming errors before advancing the completion state
358
*/
359
if (is_ioat_halted(status)) {
360
u32 chanerr;
361
362
chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
363
dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
364
__func__, chanerr);
365
if (test_bit(IOAT_RUN, &chan->state))
366
BUG_ON(is_ioat_bug(chanerr));
367
else /* we never got off the ground */
368
return;
369
}
370
371
/* if we haven't made progress and we have already
372
* acknowledged a pending completion once, then be more
373
* forceful with a restart
374
*/
375
spin_lock_bh(&chan->cleanup_lock);
376
if (ioat_cleanup_preamble(chan, &phys_complete))
377
__cleanup(ioat, phys_complete);
378
else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
379
spin_lock_bh(&ioat->prep_lock);
380
ioat3_restart_channel(ioat);
381
spin_unlock_bh(&ioat->prep_lock);
382
} else {
383
set_bit(IOAT_COMPLETION_ACK, &chan->state);
384
mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
385
}
386
spin_unlock_bh(&chan->cleanup_lock);
387
} else {
388
u16 active;
389
390
/* if the ring is idle, empty, and oversized try to step
391
* down the size
392
*/
393
spin_lock_bh(&chan->cleanup_lock);
394
spin_lock_bh(&ioat->prep_lock);
395
active = ioat2_ring_active(ioat);
396
if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
397
reshape_ring(ioat, ioat->alloc_order-1);
398
spin_unlock_bh(&ioat->prep_lock);
399
spin_unlock_bh(&chan->cleanup_lock);
400
401
/* keep shrinking until we get back to our minimum
402
* default size
403
*/
404
if (ioat->alloc_order > ioat_get_alloc_order())
405
mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
406
}
407
}
408
409
static enum dma_status
410
ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
411
struct dma_tx_state *txstate)
412
{
413
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
414
415
if (ioat_tx_status(c, cookie, txstate) == DMA_SUCCESS)
416
return DMA_SUCCESS;
417
418
ioat3_cleanup(ioat);
419
420
return ioat_tx_status(c, cookie, txstate);
421
}
422
423
static struct dma_async_tx_descriptor *
424
ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
425
size_t len, unsigned long flags)
426
{
427
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
428
struct ioat_ring_ent *desc;
429
size_t total_len = len;
430
struct ioat_fill_descriptor *fill;
431
u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
432
int num_descs, idx, i;
433
434
num_descs = ioat2_xferlen_to_descs(ioat, len);
435
if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0)
436
idx = ioat->head;
437
else
438
return NULL;
439
i = 0;
440
do {
441
size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
442
443
desc = ioat2_get_ring_ent(ioat, idx + i);
444
fill = desc->fill;
445
446
fill->size = xfer_size;
447
fill->src_data = src_data;
448
fill->dst_addr = dest;
449
fill->ctl = 0;
450
fill->ctl_f.op = IOAT_OP_FILL;
451
452
len -= xfer_size;
453
dest += xfer_size;
454
dump_desc_dbg(ioat, desc);
455
} while (++i < num_descs);
456
457
desc->txd.flags = flags;
458
desc->len = total_len;
459
fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
460
fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
461
fill->ctl_f.compl_write = 1;
462
dump_desc_dbg(ioat, desc);
463
464
/* we leave the channel locked to ensure in order submission */
465
return &desc->txd;
466
}
467
468
static struct dma_async_tx_descriptor *
469
__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
470
dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
471
size_t len, unsigned long flags)
472
{
473
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
474
struct ioat_ring_ent *compl_desc;
475
struct ioat_ring_ent *desc;
476
struct ioat_ring_ent *ext;
477
size_t total_len = len;
478
struct ioat_xor_descriptor *xor;
479
struct ioat_xor_ext_descriptor *xor_ex = NULL;
480
struct ioat_dma_descriptor *hw;
481
int num_descs, with_ext, idx, i;
482
u32 offset = 0;
483
u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
484
485
BUG_ON(src_cnt < 2);
486
487
num_descs = ioat2_xferlen_to_descs(ioat, len);
488
/* we need 2x the number of descriptors to cover greater than 5
489
* sources
490
*/
491
if (src_cnt > 5) {
492
with_ext = 1;
493
num_descs *= 2;
494
} else
495
with_ext = 0;
496
497
/* completion writes from the raid engine may pass completion
498
* writes from the legacy engine, so we need one extra null
499
* (legacy) descriptor to ensure all completion writes arrive in
500
* order.
501
*/
502
if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0)
503
idx = ioat->head;
504
else
505
return NULL;
506
i = 0;
507
do {
508
struct ioat_raw_descriptor *descs[2];
509
size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
510
int s;
511
512
desc = ioat2_get_ring_ent(ioat, idx + i);
513
xor = desc->xor;
514
515
/* save a branch by unconditionally retrieving the
516
* extended descriptor xor_set_src() knows to not write
517
* to it in the single descriptor case
518
*/
519
ext = ioat2_get_ring_ent(ioat, idx + i + 1);
520
xor_ex = ext->xor_ex;
521
522
descs[0] = (struct ioat_raw_descriptor *) xor;
523
descs[1] = (struct ioat_raw_descriptor *) xor_ex;
524
for (s = 0; s < src_cnt; s++)
525
xor_set_src(descs, src[s], offset, s);
526
xor->size = xfer_size;
527
xor->dst_addr = dest + offset;
528
xor->ctl = 0;
529
xor->ctl_f.op = op;
530
xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
531
532
len -= xfer_size;
533
offset += xfer_size;
534
dump_desc_dbg(ioat, desc);
535
} while ((i += 1 + with_ext) < num_descs);
536
537
/* last xor descriptor carries the unmap parameters and fence bit */
538
desc->txd.flags = flags;
539
desc->len = total_len;
540
if (result)
541
desc->result = result;
542
xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
543
544
/* completion descriptor carries interrupt bit */
545
compl_desc = ioat2_get_ring_ent(ioat, idx + i);
546
compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
547
hw = compl_desc->hw;
548
hw->ctl = 0;
549
hw->ctl_f.null = 1;
550
hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
551
hw->ctl_f.compl_write = 1;
552
hw->size = NULL_DESC_BUFFER_SIZE;
553
dump_desc_dbg(ioat, compl_desc);
554
555
/* we leave the channel locked to ensure in order submission */
556
return &compl_desc->txd;
557
}
558
559
static struct dma_async_tx_descriptor *
560
ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
561
unsigned int src_cnt, size_t len, unsigned long flags)
562
{
563
return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
564
}
565
566
struct dma_async_tx_descriptor *
567
ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
568
unsigned int src_cnt, size_t len,
569
enum sum_check_flags *result, unsigned long flags)
570
{
571
/* the cleanup routine only sets bits on validate failure, it
572
* does not clear bits on validate success... so clear it here
573
*/
574
*result = 0;
575
576
return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
577
src_cnt - 1, len, flags);
578
}
579
580
static void
581
dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
582
{
583
struct device *dev = to_dev(&ioat->base);
584
struct ioat_pq_descriptor *pq = desc->pq;
585
struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
586
struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
587
int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
588
int i;
589
590
dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
591
" sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
592
desc_id(desc), (unsigned long long) desc->txd.phys,
593
(unsigned long long) (pq_ex ? pq_ex->next : pq->next),
594
desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
595
pq->ctl_f.compl_write,
596
pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
597
pq->ctl_f.src_cnt);
598
for (i = 0; i < src_cnt; i++)
599
dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
600
(unsigned long long) pq_get_src(descs, i), pq->coef[i]);
601
dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
602
dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
603
}
604
605
static struct dma_async_tx_descriptor *
606
__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
607
const dma_addr_t *dst, const dma_addr_t *src,
608
unsigned int src_cnt, const unsigned char *scf,
609
size_t len, unsigned long flags)
610
{
611
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
612
struct ioat_chan_common *chan = &ioat->base;
613
struct ioat_ring_ent *compl_desc;
614
struct ioat_ring_ent *desc;
615
struct ioat_ring_ent *ext;
616
size_t total_len = len;
617
struct ioat_pq_descriptor *pq;
618
struct ioat_pq_ext_descriptor *pq_ex = NULL;
619
struct ioat_dma_descriptor *hw;
620
u32 offset = 0;
621
u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
622
int i, s, idx, with_ext, num_descs;
623
624
dev_dbg(to_dev(chan), "%s\n", __func__);
625
/* the engine requires at least two sources (we provide
626
* at least 1 implied source in the DMA_PREP_CONTINUE case)
627
*/
628
BUG_ON(src_cnt + dmaf_continue(flags) < 2);
629
630
num_descs = ioat2_xferlen_to_descs(ioat, len);
631
/* we need 2x the number of descriptors to cover greater than 3
632
* sources (we need 1 extra source in the q-only continuation
633
* case and 3 extra sources in the p+q continuation case.
634
*/
635
if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
636
(dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
637
with_ext = 1;
638
num_descs *= 2;
639
} else
640
with_ext = 0;
641
642
/* completion writes from the raid engine may pass completion
643
* writes from the legacy engine, so we need one extra null
644
* (legacy) descriptor to ensure all completion writes arrive in
645
* order.
646
*/
647
if (likely(num_descs) &&
648
ioat2_check_space_lock(ioat, num_descs+1) == 0)
649
idx = ioat->head;
650
else
651
return NULL;
652
i = 0;
653
do {
654
struct ioat_raw_descriptor *descs[2];
655
size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
656
657
desc = ioat2_get_ring_ent(ioat, idx + i);
658
pq = desc->pq;
659
660
/* save a branch by unconditionally retrieving the
661
* extended descriptor pq_set_src() knows to not write
662
* to it in the single descriptor case
663
*/
664
ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
665
pq_ex = ext->pq_ex;
666
667
descs[0] = (struct ioat_raw_descriptor *) pq;
668
descs[1] = (struct ioat_raw_descriptor *) pq_ex;
669
670
for (s = 0; s < src_cnt; s++)
671
pq_set_src(descs, src[s], offset, scf[s], s);
672
673
/* see the comment for dma_maxpq in include/linux/dmaengine.h */
674
if (dmaf_p_disabled_continue(flags))
675
pq_set_src(descs, dst[1], offset, 1, s++);
676
else if (dmaf_continue(flags)) {
677
pq_set_src(descs, dst[0], offset, 0, s++);
678
pq_set_src(descs, dst[1], offset, 1, s++);
679
pq_set_src(descs, dst[1], offset, 0, s++);
680
}
681
pq->size = xfer_size;
682
pq->p_addr = dst[0] + offset;
683
pq->q_addr = dst[1] + offset;
684
pq->ctl = 0;
685
pq->ctl_f.op = op;
686
pq->ctl_f.src_cnt = src_cnt_to_hw(s);
687
pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
688
pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
689
690
len -= xfer_size;
691
offset += xfer_size;
692
} while ((i += 1 + with_ext) < num_descs);
693
694
/* last pq descriptor carries the unmap parameters and fence bit */
695
desc->txd.flags = flags;
696
desc->len = total_len;
697
if (result)
698
desc->result = result;
699
pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
700
dump_pq_desc_dbg(ioat, desc, ext);
701
702
/* completion descriptor carries interrupt bit */
703
compl_desc = ioat2_get_ring_ent(ioat, idx + i);
704
compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
705
hw = compl_desc->hw;
706
hw->ctl = 0;
707
hw->ctl_f.null = 1;
708
hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
709
hw->ctl_f.compl_write = 1;
710
hw->size = NULL_DESC_BUFFER_SIZE;
711
dump_desc_dbg(ioat, compl_desc);
712
713
/* we leave the channel locked to ensure in order submission */
714
return &compl_desc->txd;
715
}
716
717
static struct dma_async_tx_descriptor *
718
ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
719
unsigned int src_cnt, const unsigned char *scf, size_t len,
720
unsigned long flags)
721
{
722
/* specify valid address for disabled result */
723
if (flags & DMA_PREP_PQ_DISABLE_P)
724
dst[0] = dst[1];
725
if (flags & DMA_PREP_PQ_DISABLE_Q)
726
dst[1] = dst[0];
727
728
/* handle the single source multiply case from the raid6
729
* recovery path
730
*/
731
if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
732
dma_addr_t single_source[2];
733
unsigned char single_source_coef[2];
734
735
BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
736
single_source[0] = src[0];
737
single_source[1] = src[0];
738
single_source_coef[0] = scf[0];
739
single_source_coef[1] = 0;
740
741
return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
742
single_source_coef, len, flags);
743
} else
744
return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
745
len, flags);
746
}
747
748
struct dma_async_tx_descriptor *
749
ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
750
unsigned int src_cnt, const unsigned char *scf, size_t len,
751
enum sum_check_flags *pqres, unsigned long flags)
752
{
753
/* specify valid address for disabled result */
754
if (flags & DMA_PREP_PQ_DISABLE_P)
755
pq[0] = pq[1];
756
if (flags & DMA_PREP_PQ_DISABLE_Q)
757
pq[1] = pq[0];
758
759
/* the cleanup routine only sets bits on validate failure, it
760
* does not clear bits on validate success... so clear it here
761
*/
762
*pqres = 0;
763
764
return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
765
flags);
766
}
767
768
static struct dma_async_tx_descriptor *
769
ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
770
unsigned int src_cnt, size_t len, unsigned long flags)
771
{
772
unsigned char scf[src_cnt];
773
dma_addr_t pq[2];
774
775
memset(scf, 0, src_cnt);
776
pq[0] = dst;
777
flags |= DMA_PREP_PQ_DISABLE_Q;
778
pq[1] = dst; /* specify valid address for disabled result */
779
780
return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
781
flags);
782
}
783
784
struct dma_async_tx_descriptor *
785
ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
786
unsigned int src_cnt, size_t len,
787
enum sum_check_flags *result, unsigned long flags)
788
{
789
unsigned char scf[src_cnt];
790
dma_addr_t pq[2];
791
792
/* the cleanup routine only sets bits on validate failure, it
793
* does not clear bits on validate success... so clear it here
794
*/
795
*result = 0;
796
797
memset(scf, 0, src_cnt);
798
pq[0] = src[0];
799
flags |= DMA_PREP_PQ_DISABLE_Q;
800
pq[1] = pq[0]; /* specify valid address for disabled result */
801
802
return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
803
len, flags);
804
}
805
806
static struct dma_async_tx_descriptor *
807
ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
808
{
809
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
810
struct ioat_ring_ent *desc;
811
struct ioat_dma_descriptor *hw;
812
813
if (ioat2_check_space_lock(ioat, 1) == 0)
814
desc = ioat2_get_ring_ent(ioat, ioat->head);
815
else
816
return NULL;
817
818
hw = desc->hw;
819
hw->ctl = 0;
820
hw->ctl_f.null = 1;
821
hw->ctl_f.int_en = 1;
822
hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
823
hw->ctl_f.compl_write = 1;
824
hw->size = NULL_DESC_BUFFER_SIZE;
825
hw->src_addr = 0;
826
hw->dst_addr = 0;
827
828
desc->txd.flags = flags;
829
desc->len = 1;
830
831
dump_desc_dbg(ioat, desc);
832
833
/* we leave the channel locked to ensure in order submission */
834
return &desc->txd;
835
}
836
837
static void __devinit ioat3_dma_test_callback(void *dma_async_param)
838
{
839
struct completion *cmp = dma_async_param;
840
841
complete(cmp);
842
}
843
844
#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
845
static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
846
{
847
int i, src_idx;
848
struct page *dest;
849
struct page *xor_srcs[IOAT_NUM_SRC_TEST];
850
struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
851
dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
852
dma_addr_t dma_addr, dest_dma;
853
struct dma_async_tx_descriptor *tx;
854
struct dma_chan *dma_chan;
855
dma_cookie_t cookie;
856
u8 cmp_byte = 0;
857
u32 cmp_word;
858
u32 xor_val_result;
859
int err = 0;
860
struct completion cmp;
861
unsigned long tmo;
862
struct device *dev = &device->pdev->dev;
863
struct dma_device *dma = &device->common;
864
865
dev_dbg(dev, "%s\n", __func__);
866
867
if (!dma_has_cap(DMA_XOR, dma->cap_mask))
868
return 0;
869
870
for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
871
xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
872
if (!xor_srcs[src_idx]) {
873
while (src_idx--)
874
__free_page(xor_srcs[src_idx]);
875
return -ENOMEM;
876
}
877
}
878
879
dest = alloc_page(GFP_KERNEL);
880
if (!dest) {
881
while (src_idx--)
882
__free_page(xor_srcs[src_idx]);
883
return -ENOMEM;
884
}
885
886
/* Fill in src buffers */
887
for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
888
u8 *ptr = page_address(xor_srcs[src_idx]);
889
for (i = 0; i < PAGE_SIZE; i++)
890
ptr[i] = (1 << src_idx);
891
}
892
893
for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
894
cmp_byte ^= (u8) (1 << src_idx);
895
896
cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
897
(cmp_byte << 8) | cmp_byte;
898
899
memset(page_address(dest), 0, PAGE_SIZE);
900
901
dma_chan = container_of(dma->channels.next, struct dma_chan,
902
device_node);
903
if (dma->device_alloc_chan_resources(dma_chan) < 1) {
904
err = -ENODEV;
905
goto out;
906
}
907
908
/* test xor */
909
dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
910
for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
911
dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
912
DMA_TO_DEVICE);
913
tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
914
IOAT_NUM_SRC_TEST, PAGE_SIZE,
915
DMA_PREP_INTERRUPT);
916
917
if (!tx) {
918
dev_err(dev, "Self-test xor prep failed\n");
919
err = -ENODEV;
920
goto free_resources;
921
}
922
923
async_tx_ack(tx);
924
init_completion(&cmp);
925
tx->callback = ioat3_dma_test_callback;
926
tx->callback_param = &cmp;
927
cookie = tx->tx_submit(tx);
928
if (cookie < 0) {
929
dev_err(dev, "Self-test xor setup failed\n");
930
err = -ENODEV;
931
goto free_resources;
932
}
933
dma->device_issue_pending(dma_chan);
934
935
tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
936
937
if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
938
dev_err(dev, "Self-test xor timed out\n");
939
err = -ENODEV;
940
goto free_resources;
941
}
942
943
dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
944
for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
945
u32 *ptr = page_address(dest);
946
if (ptr[i] != cmp_word) {
947
dev_err(dev, "Self-test xor failed compare\n");
948
err = -ENODEV;
949
goto free_resources;
950
}
951
}
952
dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE);
953
954
/* skip validate if the capability is not present */
955
if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
956
goto free_resources;
957
958
/* validate the sources with the destintation page */
959
for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
960
xor_val_srcs[i] = xor_srcs[i];
961
xor_val_srcs[i] = dest;
962
963
xor_val_result = 1;
964
965
for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
966
dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
967
DMA_TO_DEVICE);
968
tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
969
IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
970
&xor_val_result, DMA_PREP_INTERRUPT);
971
if (!tx) {
972
dev_err(dev, "Self-test zero prep failed\n");
973
err = -ENODEV;
974
goto free_resources;
975
}
976
977
async_tx_ack(tx);
978
init_completion(&cmp);
979
tx->callback = ioat3_dma_test_callback;
980
tx->callback_param = &cmp;
981
cookie = tx->tx_submit(tx);
982
if (cookie < 0) {
983
dev_err(dev, "Self-test zero setup failed\n");
984
err = -ENODEV;
985
goto free_resources;
986
}
987
dma->device_issue_pending(dma_chan);
988
989
tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
990
991
if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
992
dev_err(dev, "Self-test validate timed out\n");
993
err = -ENODEV;
994
goto free_resources;
995
}
996
997
if (xor_val_result != 0) {
998
dev_err(dev, "Self-test validate failed compare\n");
999
err = -ENODEV;
1000
goto free_resources;
1001
}
1002
1003
/* skip memset if the capability is not present */
1004
if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
1005
goto free_resources;
1006
1007
/* test memset */
1008
dma_addr = dma_map_page(dev, dest, 0,
1009
PAGE_SIZE, DMA_FROM_DEVICE);
1010
tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
1011
DMA_PREP_INTERRUPT);
1012
if (!tx) {
1013
dev_err(dev, "Self-test memset prep failed\n");
1014
err = -ENODEV;
1015
goto free_resources;
1016
}
1017
1018
async_tx_ack(tx);
1019
init_completion(&cmp);
1020
tx->callback = ioat3_dma_test_callback;
1021
tx->callback_param = &cmp;
1022
cookie = tx->tx_submit(tx);
1023
if (cookie < 0) {
1024
dev_err(dev, "Self-test memset setup failed\n");
1025
err = -ENODEV;
1026
goto free_resources;
1027
}
1028
dma->device_issue_pending(dma_chan);
1029
1030
tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1031
1032
if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
1033
dev_err(dev, "Self-test memset timed out\n");
1034
err = -ENODEV;
1035
goto free_resources;
1036
}
1037
1038
for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
1039
u32 *ptr = page_address(dest);
1040
if (ptr[i]) {
1041
dev_err(dev, "Self-test memset failed compare\n");
1042
err = -ENODEV;
1043
goto free_resources;
1044
}
1045
}
1046
1047
/* test for non-zero parity sum */
1048
xor_val_result = 0;
1049
for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
1050
dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
1051
DMA_TO_DEVICE);
1052
tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
1053
IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
1054
&xor_val_result, DMA_PREP_INTERRUPT);
1055
if (!tx) {
1056
dev_err(dev, "Self-test 2nd zero prep failed\n");
1057
err = -ENODEV;
1058
goto free_resources;
1059
}
1060
1061
async_tx_ack(tx);
1062
init_completion(&cmp);
1063
tx->callback = ioat3_dma_test_callback;
1064
tx->callback_param = &cmp;
1065
cookie = tx->tx_submit(tx);
1066
if (cookie < 0) {
1067
dev_err(dev, "Self-test 2nd zero setup failed\n");
1068
err = -ENODEV;
1069
goto free_resources;
1070
}
1071
dma->device_issue_pending(dma_chan);
1072
1073
tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1074
1075
if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
1076
dev_err(dev, "Self-test 2nd validate timed out\n");
1077
err = -ENODEV;
1078
goto free_resources;
1079
}
1080
1081
if (xor_val_result != SUM_CHECK_P_RESULT) {
1082
dev_err(dev, "Self-test validate failed compare\n");
1083
err = -ENODEV;
1084
goto free_resources;
1085
}
1086
1087
free_resources:
1088
dma->device_free_chan_resources(dma_chan);
1089
out:
1090
src_idx = IOAT_NUM_SRC_TEST;
1091
while (src_idx--)
1092
__free_page(xor_srcs[src_idx]);
1093
__free_page(dest);
1094
return err;
1095
}
1096
1097
static int __devinit ioat3_dma_self_test(struct ioatdma_device *device)
1098
{
1099
int rc = ioat_dma_self_test(device);
1100
1101
if (rc)
1102
return rc;
1103
1104
rc = ioat_xor_val_self_test(device);
1105
if (rc)
1106
return rc;
1107
1108
return 0;
1109
}
1110
1111
static int ioat3_reset_hw(struct ioat_chan_common *chan)
1112
{
1113
/* throw away whatever the channel was doing and get it
1114
* initialized, with ioat3 specific workarounds
1115
*/
1116
struct ioatdma_device *device = chan->device;
1117
struct pci_dev *pdev = device->pdev;
1118
u32 chanerr;
1119
u16 dev_id;
1120
int err;
1121
1122
ioat2_quiesce(chan, msecs_to_jiffies(100));
1123
1124
chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
1125
writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
1126
1127
/* -= IOAT ver.3 workarounds =- */
1128
/* Write CHANERRMSK_INT with 3E07h to mask out the errors
1129
* that can cause stability issues for IOAT ver.3, and clear any
1130
* pending errors
1131
*/
1132
pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
1133
err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
1134
if (err) {
1135
dev_err(&pdev->dev, "channel error register unreachable\n");
1136
return err;
1137
}
1138
pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
1139
1140
/* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
1141
* (workaround for spurious config parity error after restart)
1142
*/
1143
pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
1144
if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
1145
pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
1146
1147
return ioat2_reset_sync(chan, msecs_to_jiffies(200));
1148
}
1149
1150
int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
1151
{
1152
struct pci_dev *pdev = device->pdev;
1153
int dca_en = system_has_dca_enabled(pdev);
1154
struct dma_device *dma;
1155
struct dma_chan *c;
1156
struct ioat_chan_common *chan;
1157
bool is_raid_device = false;
1158
int err;
1159
u32 cap;
1160
1161
device->enumerate_channels = ioat2_enumerate_channels;
1162
device->reset_hw = ioat3_reset_hw;
1163
device->self_test = ioat3_dma_self_test;
1164
dma = &device->common;
1165
dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
1166
dma->device_issue_pending = ioat2_issue_pending;
1167
dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
1168
dma->device_free_chan_resources = ioat2_free_chan_resources;
1169
1170
dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
1171
dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
1172
1173
cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
1174
1175
/* dca is incompatible with raid operations */
1176
if (dca_en && (cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
1177
cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
1178
1179
if (cap & IOAT_CAP_XOR) {
1180
is_raid_device = true;
1181
dma->max_xor = 8;
1182
dma->xor_align = 6;
1183
1184
dma_cap_set(DMA_XOR, dma->cap_mask);
1185
dma->device_prep_dma_xor = ioat3_prep_xor;
1186
1187
dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1188
dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
1189
}
1190
if (cap & IOAT_CAP_PQ) {
1191
is_raid_device = true;
1192
dma_set_maxpq(dma, 8, 0);
1193
dma->pq_align = 6;
1194
1195
dma_cap_set(DMA_PQ, dma->cap_mask);
1196
dma->device_prep_dma_pq = ioat3_prep_pq;
1197
1198
dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
1199
dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
1200
1201
if (!(cap & IOAT_CAP_XOR)) {
1202
dma->max_xor = 8;
1203
dma->xor_align = 6;
1204
1205
dma_cap_set(DMA_XOR, dma->cap_mask);
1206
dma->device_prep_dma_xor = ioat3_prep_pqxor;
1207
1208
dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
1209
dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
1210
}
1211
}
1212
if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
1213
dma_cap_set(DMA_MEMSET, dma->cap_mask);
1214
dma->device_prep_dma_memset = ioat3_prep_memset_lock;
1215
}
1216
1217
1218
if (is_raid_device) {
1219
dma->device_tx_status = ioat3_tx_status;
1220
device->cleanup_fn = ioat3_cleanup_event;
1221
device->timer_fn = ioat3_timer_event;
1222
} else {
1223
dma->device_tx_status = ioat_dma_tx_status;
1224
device->cleanup_fn = ioat2_cleanup_event;
1225
device->timer_fn = ioat2_timer_event;
1226
}
1227
1228
#ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA
1229
dma_cap_clear(DMA_PQ_VAL, dma->cap_mask);
1230
dma->device_prep_dma_pq_val = NULL;
1231
#endif
1232
1233
#ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
1234
dma_cap_clear(DMA_XOR_VAL, dma->cap_mask);
1235
dma->device_prep_dma_xor_val = NULL;
1236
#endif
1237
1238
err = ioat_probe(device);
1239
if (err)
1240
return err;
1241
ioat_set_tcp_copy_break(262144);
1242
1243
list_for_each_entry(c, &dma->channels, device_node) {
1244
chan = to_chan_common(c);
1245
writel(IOAT_DMA_DCA_ANY_CPU,
1246
chan->reg_base + IOAT_DCACTRL_OFFSET);
1247
}
1248
1249
err = ioat_register(device);
1250
if (err)
1251
return err;
1252
1253
ioat_kobject_add(device, &ioat2_ktype);
1254
1255
if (dca)
1256
device->dca = ioat3_dca_init(pdev, device->reg_base);
1257
1258
return 0;
1259
}
1260
1261