Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/crypto/intel/iaa/iaa_crypto_main.c
51759 views
1
// SPDX-License-Identifier: GPL-2.0
2
/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
3
4
#include <linux/init.h>
5
#include <linux/kernel.h>
6
#include <linux/module.h>
7
#include <linux/pci.h>
8
#include <linux/sysfs.h>
9
#include <linux/device.h>
10
#include <linux/iommu.h>
11
#include <uapi/linux/idxd.h>
12
#include <linux/highmem.h>
13
#include <linux/sched/smt.h>
14
#include <crypto/internal/acompress.h>
15
16
#include "idxd.h"
17
#include "iaa_crypto.h"
18
#include "iaa_crypto_stats.h"
19
20
#ifdef pr_fmt
21
#undef pr_fmt
22
#endif
23
24
#define pr_fmt(fmt) "idxd: " IDXD_SUBDRIVER_NAME ": " fmt
25
26
#define IAA_ALG_PRIORITY 300
27
28
/* number of iaa instances probed */
29
static unsigned int nr_iaa;
30
static unsigned int nr_cpus;
31
static unsigned int nr_nodes;
32
static unsigned int nr_cpus_per_node;
33
34
/* Number of physical cpus sharing each iaa instance */
35
static unsigned int cpus_per_iaa;
36
37
/* Per-cpu lookup table for balanced wqs */
38
static struct wq_table_entry __percpu *wq_table;
39
40
static struct idxd_wq *wq_table_next_wq(int cpu)
41
{
42
struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
43
44
if (++entry->cur_wq >= entry->n_wqs)
45
entry->cur_wq = 0;
46
47
if (!entry->wqs[entry->cur_wq])
48
return NULL;
49
50
pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__,
51
entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id,
52
entry->wqs[entry->cur_wq]->id, cpu);
53
54
return entry->wqs[entry->cur_wq];
55
}
56
57
static void wq_table_add(int cpu, struct idxd_wq *wq)
58
{
59
struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
60
61
if (WARN_ON(entry->n_wqs == entry->max_wqs))
62
return;
63
64
entry->wqs[entry->n_wqs++] = wq;
65
66
pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__,
67
entry->wqs[entry->n_wqs - 1]->idxd->id,
68
entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu);
69
}
70
71
static void wq_table_free_entry(int cpu)
72
{
73
struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
74
75
kfree(entry->wqs);
76
memset(entry, 0, sizeof(*entry));
77
}
78
79
static void wq_table_clear_entry(int cpu)
80
{
81
struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu);
82
83
entry->n_wqs = 0;
84
entry->cur_wq = 0;
85
memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *));
86
}
87
88
LIST_HEAD(iaa_devices);
89
DEFINE_MUTEX(iaa_devices_lock);
90
91
/* If enabled, IAA hw crypto algos are registered, unavailable otherwise */
92
static bool iaa_crypto_enabled;
93
static bool iaa_crypto_registered;
94
95
/* Verify results of IAA compress or not */
96
static bool iaa_verify_compress = true;
97
98
static ssize_t verify_compress_show(struct device_driver *driver, char *buf)
99
{
100
return sysfs_emit(buf, "%d\n", iaa_verify_compress);
101
}
102
103
static ssize_t verify_compress_store(struct device_driver *driver,
104
const char *buf, size_t count)
105
{
106
int ret = -EBUSY;
107
108
mutex_lock(&iaa_devices_lock);
109
110
if (iaa_crypto_enabled)
111
goto out;
112
113
ret = kstrtobool(buf, &iaa_verify_compress);
114
if (ret)
115
goto out;
116
117
ret = count;
118
out:
119
mutex_unlock(&iaa_devices_lock);
120
121
return ret;
122
}
123
static DRIVER_ATTR_RW(verify_compress);
124
125
/*
126
* The iaa crypto driver supports three 'sync' methods determining how
127
* compressions and decompressions are performed:
128
*
129
* - sync: the compression or decompression completes before
130
* returning. This is the mode used by the async crypto
131
* interface when the sync mode is set to 'sync' and by
132
* the sync crypto interface regardless of setting.
133
*
134
* - async: the compression or decompression is submitted and returns
135
* immediately. Completion interrupts are not used so
136
* the caller is responsible for polling the descriptor
137
* for completion. This mode is applicable to only the
138
* async crypto interface and is ignored for anything
139
* else.
140
*
141
* - async_irq: the compression or decompression is submitted and
142
* returns immediately. Completion interrupts are
143
* enabled so the caller can wait for the completion and
144
* yield to other threads. When the compression or
145
* decompression completes, the completion is signaled
146
* and the caller awakened. This mode is applicable to
147
* only the async crypto interface and is ignored for
148
* anything else.
149
*
150
* These modes can be set using the iaa_crypto sync_mode driver
151
* attribute.
152
*/
153
154
/* Use async mode */
155
static bool async_mode;
156
/* Use interrupts */
157
static bool use_irq;
158
159
/**
160
* set_iaa_sync_mode - Set IAA sync mode
161
* @name: The name of the sync mode
162
*
163
* Make the IAA sync mode named @name the current sync mode used by
164
* compression/decompression.
165
*/
166
167
static int set_iaa_sync_mode(const char *name)
168
{
169
int ret = 0;
170
171
if (sysfs_streq(name, "sync")) {
172
async_mode = false;
173
use_irq = false;
174
} else if (sysfs_streq(name, "async")) {
175
async_mode = false;
176
use_irq = false;
177
} else if (sysfs_streq(name, "async_irq")) {
178
async_mode = true;
179
use_irq = true;
180
} else {
181
ret = -EINVAL;
182
}
183
184
return ret;
185
}
186
187
static ssize_t sync_mode_show(struct device_driver *driver, char *buf)
188
{
189
int ret = 0;
190
191
if (!async_mode && !use_irq)
192
ret = sysfs_emit(buf, "%s\n", "sync");
193
else if (async_mode && !use_irq)
194
ret = sysfs_emit(buf, "%s\n", "async");
195
else if (async_mode && use_irq)
196
ret = sysfs_emit(buf, "%s\n", "async_irq");
197
198
return ret;
199
}
200
201
static ssize_t sync_mode_store(struct device_driver *driver,
202
const char *buf, size_t count)
203
{
204
int ret = -EBUSY;
205
206
mutex_lock(&iaa_devices_lock);
207
208
if (iaa_crypto_enabled)
209
goto out;
210
211
ret = set_iaa_sync_mode(buf);
212
if (ret == 0)
213
ret = count;
214
out:
215
mutex_unlock(&iaa_devices_lock);
216
217
return ret;
218
}
219
static DRIVER_ATTR_RW(sync_mode);
220
221
static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX];
222
223
static int find_empty_iaa_compression_mode(void)
224
{
225
int i;
226
227
for (i = 0; i < IAA_COMP_MODES_MAX; i++)
228
if (!iaa_compression_modes[i])
229
return i;
230
231
return -EINVAL;
232
}
233
234
static struct iaa_compression_mode *find_iaa_compression_mode(const char *name, int *idx)
235
{
236
struct iaa_compression_mode *mode;
237
int i;
238
239
for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
240
mode = iaa_compression_modes[i];
241
if (!mode)
242
continue;
243
244
if (!strcmp(mode->name, name)) {
245
*idx = i;
246
return iaa_compression_modes[i];
247
}
248
}
249
250
return NULL;
251
}
252
253
static void free_iaa_compression_mode(struct iaa_compression_mode *mode)
254
{
255
kfree(mode->name);
256
kfree(mode->ll_table);
257
kfree(mode->d_table);
258
259
kfree(mode);
260
}
261
262
/*
263
* IAA Compression modes are defined by an ll_table and a d_table.
264
* These tables are typically generated and captured using statistics
265
* collected from running actual compress/decompress workloads.
266
*
267
* A module or other kernel code can add and remove compression modes
268
* with a given name using the exported @add_iaa_compression_mode()
269
* and @remove_iaa_compression_mode functions.
270
*
271
* When a new compression mode is added, the tables are saved in a
272
* global compression mode list. When IAA devices are added, a
273
* per-IAA device dma mapping is created for each IAA device, for each
274
* compression mode. These are the tables used to do the actual
275
* compression/deccompression and are unmapped if/when the devices are
276
* removed. Currently, compression modes must be added before any
277
* device is added, and removed after all devices have been removed.
278
*/
279
280
/**
281
* remove_iaa_compression_mode - Remove an IAA compression mode
282
* @name: The name the compression mode will be known as
283
*
284
* Remove the IAA compression mode named @name.
285
*/
286
void remove_iaa_compression_mode(const char *name)
287
{
288
struct iaa_compression_mode *mode;
289
int idx;
290
291
mutex_lock(&iaa_devices_lock);
292
293
if (!list_empty(&iaa_devices))
294
goto out;
295
296
mode = find_iaa_compression_mode(name, &idx);
297
if (mode) {
298
free_iaa_compression_mode(mode);
299
iaa_compression_modes[idx] = NULL;
300
}
301
out:
302
mutex_unlock(&iaa_devices_lock);
303
}
304
EXPORT_SYMBOL_GPL(remove_iaa_compression_mode);
305
306
/**
307
* add_iaa_compression_mode - Add an IAA compression mode
308
* @name: The name the compression mode will be known as
309
* @ll_table: The ll table
310
* @ll_table_size: The ll table size in bytes
311
* @d_table: The d table
312
* @d_table_size: The d table size in bytes
313
* @init: Optional callback function to init the compression mode data
314
* @free: Optional callback function to free the compression mode data
315
*
316
* Add a new IAA compression mode named @name.
317
*
318
* Returns 0 if successful, errcode otherwise.
319
*/
320
int add_iaa_compression_mode(const char *name,
321
const u32 *ll_table,
322
int ll_table_size,
323
const u32 *d_table,
324
int d_table_size,
325
iaa_dev_comp_init_fn_t init,
326
iaa_dev_comp_free_fn_t free)
327
{
328
struct iaa_compression_mode *mode;
329
int idx, ret = -ENOMEM;
330
331
mutex_lock(&iaa_devices_lock);
332
333
if (!list_empty(&iaa_devices)) {
334
ret = -EBUSY;
335
goto out;
336
}
337
338
mode = kzalloc(sizeof(*mode), GFP_KERNEL);
339
if (!mode)
340
goto out;
341
342
mode->name = kstrdup(name, GFP_KERNEL);
343
if (!mode->name)
344
goto free;
345
346
if (ll_table) {
347
mode->ll_table = kmemdup(ll_table, ll_table_size, GFP_KERNEL);
348
if (!mode->ll_table)
349
goto free;
350
mode->ll_table_size = ll_table_size;
351
}
352
353
if (d_table) {
354
mode->d_table = kmemdup(d_table, d_table_size, GFP_KERNEL);
355
if (!mode->d_table)
356
goto free;
357
mode->d_table_size = d_table_size;
358
}
359
360
mode->init = init;
361
mode->free = free;
362
363
idx = find_empty_iaa_compression_mode();
364
if (idx < 0)
365
goto free;
366
367
pr_debug("IAA compression mode %s added at idx %d\n",
368
mode->name, idx);
369
370
iaa_compression_modes[idx] = mode;
371
372
ret = 0;
373
out:
374
mutex_unlock(&iaa_devices_lock);
375
376
return ret;
377
free:
378
free_iaa_compression_mode(mode);
379
goto out;
380
}
381
EXPORT_SYMBOL_GPL(add_iaa_compression_mode);
382
383
static struct iaa_device_compression_mode *
384
get_iaa_device_compression_mode(struct iaa_device *iaa_device, int idx)
385
{
386
return iaa_device->compression_modes[idx];
387
}
388
389
static void free_device_compression_mode(struct iaa_device *iaa_device,
390
struct iaa_device_compression_mode *device_mode)
391
{
392
size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
393
struct device *dev = &iaa_device->idxd->pdev->dev;
394
395
kfree(device_mode->name);
396
397
if (device_mode->aecs_comp_table)
398
dma_free_coherent(dev, size, device_mode->aecs_comp_table,
399
device_mode->aecs_comp_table_dma_addr);
400
kfree(device_mode);
401
}
402
403
#define IDXD_OP_FLAG_AECS_RW_TGLS 0x400000
404
#define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC)
405
#define IAX_AECS_COMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
406
#define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS)
407
#define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \
408
IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \
409
IDXD_OP_FLAG_AECS_RW_TGLS)
410
411
static int check_completion(struct device *dev,
412
struct iax_completion_record *comp,
413
bool compress,
414
bool only_once);
415
416
static int init_device_compression_mode(struct iaa_device *iaa_device,
417
struct iaa_compression_mode *mode,
418
int idx, struct idxd_wq *wq)
419
{
420
size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN;
421
struct device *dev = &iaa_device->idxd->pdev->dev;
422
struct iaa_device_compression_mode *device_mode;
423
int ret = -ENOMEM;
424
425
device_mode = kzalloc(sizeof(*device_mode), GFP_KERNEL);
426
if (!device_mode)
427
return -ENOMEM;
428
429
device_mode->name = kstrdup(mode->name, GFP_KERNEL);
430
if (!device_mode->name)
431
goto free;
432
433
device_mode->aecs_comp_table = dma_alloc_coherent(dev, size,
434
&device_mode->aecs_comp_table_dma_addr, GFP_KERNEL);
435
if (!device_mode->aecs_comp_table)
436
goto free;
437
438
/* Add Huffman table to aecs */
439
memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table));
440
memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size);
441
memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size);
442
443
if (mode->init) {
444
ret = mode->init(device_mode);
445
if (ret)
446
goto free;
447
}
448
449
/* mode index should match iaa_compression_modes idx */
450
iaa_device->compression_modes[idx] = device_mode;
451
452
pr_debug("IAA %s compression mode initialized for iaa device %d\n",
453
mode->name, iaa_device->idxd->id);
454
455
ret = 0;
456
out:
457
return ret;
458
free:
459
pr_debug("IAA %s compression mode initialization failed for iaa device %d\n",
460
mode->name, iaa_device->idxd->id);
461
462
free_device_compression_mode(iaa_device, device_mode);
463
goto out;
464
}
465
466
static int init_device_compression_modes(struct iaa_device *iaa_device,
467
struct idxd_wq *wq)
468
{
469
struct iaa_compression_mode *mode;
470
int i, ret = 0;
471
472
for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
473
mode = iaa_compression_modes[i];
474
if (!mode)
475
continue;
476
477
ret = init_device_compression_mode(iaa_device, mode, i, wq);
478
if (ret)
479
break;
480
}
481
482
return ret;
483
}
484
485
static void remove_device_compression_modes(struct iaa_device *iaa_device)
486
{
487
struct iaa_device_compression_mode *device_mode;
488
int i;
489
490
for (i = 0; i < IAA_COMP_MODES_MAX; i++) {
491
device_mode = iaa_device->compression_modes[i];
492
if (!device_mode)
493
continue;
494
495
if (iaa_compression_modes[i]->free)
496
iaa_compression_modes[i]->free(device_mode);
497
free_device_compression_mode(iaa_device, device_mode);
498
iaa_device->compression_modes[i] = NULL;
499
}
500
}
501
502
static struct iaa_device *iaa_device_alloc(void)
503
{
504
struct iaa_device *iaa_device;
505
506
iaa_device = kzalloc(sizeof(*iaa_device), GFP_KERNEL);
507
if (!iaa_device)
508
return NULL;
509
510
INIT_LIST_HEAD(&iaa_device->wqs);
511
512
return iaa_device;
513
}
514
515
static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
516
{
517
struct iaa_wq *iaa_wq;
518
519
list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
520
if (iaa_wq->wq == wq)
521
return true;
522
}
523
524
return false;
525
}
526
527
static struct iaa_device *add_iaa_device(struct idxd_device *idxd)
528
{
529
struct iaa_device *iaa_device;
530
531
iaa_device = iaa_device_alloc();
532
if (!iaa_device)
533
return NULL;
534
535
iaa_device->idxd = idxd;
536
537
list_add_tail(&iaa_device->list, &iaa_devices);
538
539
nr_iaa++;
540
541
return iaa_device;
542
}
543
544
static int init_iaa_device(struct iaa_device *iaa_device, struct iaa_wq *iaa_wq)
545
{
546
return init_device_compression_modes(iaa_device, iaa_wq->wq);
547
}
548
549
static void del_iaa_device(struct iaa_device *iaa_device)
550
{
551
list_del(&iaa_device->list);
552
553
nr_iaa--;
554
}
555
556
static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq,
557
struct iaa_wq **new_wq)
558
{
559
struct idxd_device *idxd = iaa_device->idxd;
560
struct pci_dev *pdev = idxd->pdev;
561
struct device *dev = &pdev->dev;
562
struct iaa_wq *iaa_wq;
563
564
iaa_wq = kzalloc(sizeof(*iaa_wq), GFP_KERNEL);
565
if (!iaa_wq)
566
return -ENOMEM;
567
568
iaa_wq->wq = wq;
569
iaa_wq->iaa_device = iaa_device;
570
idxd_wq_set_private(wq, iaa_wq);
571
572
list_add_tail(&iaa_wq->list, &iaa_device->wqs);
573
574
iaa_device->n_wq++;
575
576
if (new_wq)
577
*new_wq = iaa_wq;
578
579
dev_dbg(dev, "added wq %d to iaa device %d, n_wq %d\n",
580
wq->id, iaa_device->idxd->id, iaa_device->n_wq);
581
582
return 0;
583
}
584
585
static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq)
586
{
587
struct idxd_device *idxd = iaa_device->idxd;
588
struct pci_dev *pdev = idxd->pdev;
589
struct device *dev = &pdev->dev;
590
struct iaa_wq *iaa_wq;
591
592
list_for_each_entry(iaa_wq, &iaa_device->wqs, list) {
593
if (iaa_wq->wq == wq) {
594
list_del(&iaa_wq->list);
595
iaa_device->n_wq--;
596
597
dev_dbg(dev, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n",
598
wq->id, iaa_device->idxd->id,
599
iaa_device->n_wq, nr_iaa);
600
601
if (iaa_device->n_wq == 0)
602
del_iaa_device(iaa_device);
603
break;
604
}
605
}
606
}
607
608
static void clear_wq_table(void)
609
{
610
int cpu;
611
612
for (cpu = 0; cpu < nr_cpus; cpu++)
613
wq_table_clear_entry(cpu);
614
615
pr_debug("cleared wq table\n");
616
}
617
618
static void free_iaa_device(struct iaa_device *iaa_device)
619
{
620
if (!iaa_device)
621
return;
622
623
remove_device_compression_modes(iaa_device);
624
kfree(iaa_device);
625
}
626
627
static void __free_iaa_wq(struct iaa_wq *iaa_wq)
628
{
629
struct iaa_device *iaa_device;
630
631
if (!iaa_wq)
632
return;
633
634
iaa_device = iaa_wq->iaa_device;
635
if (iaa_device->n_wq == 0)
636
free_iaa_device(iaa_wq->iaa_device);
637
}
638
639
static void free_iaa_wq(struct iaa_wq *iaa_wq)
640
{
641
struct idxd_wq *wq;
642
643
__free_iaa_wq(iaa_wq);
644
645
wq = iaa_wq->wq;
646
647
kfree(iaa_wq);
648
idxd_wq_set_private(wq, NULL);
649
}
650
651
static int iaa_wq_get(struct idxd_wq *wq)
652
{
653
struct idxd_device *idxd = wq->idxd;
654
struct iaa_wq *iaa_wq;
655
int ret = 0;
656
657
spin_lock(&idxd->dev_lock);
658
iaa_wq = idxd_wq_get_private(wq);
659
if (iaa_wq && !iaa_wq->remove) {
660
iaa_wq->ref++;
661
idxd_wq_get(wq);
662
} else {
663
ret = -ENODEV;
664
}
665
spin_unlock(&idxd->dev_lock);
666
667
return ret;
668
}
669
670
static int iaa_wq_put(struct idxd_wq *wq)
671
{
672
struct idxd_device *idxd = wq->idxd;
673
struct iaa_wq *iaa_wq;
674
bool free = false;
675
int ret = 0;
676
677
spin_lock(&idxd->dev_lock);
678
iaa_wq = idxd_wq_get_private(wq);
679
if (iaa_wq) {
680
iaa_wq->ref--;
681
if (iaa_wq->ref == 0 && iaa_wq->remove) {
682
idxd_wq_set_private(wq, NULL);
683
free = true;
684
}
685
idxd_wq_put(wq);
686
} else {
687
ret = -ENODEV;
688
}
689
spin_unlock(&idxd->dev_lock);
690
if (free) {
691
__free_iaa_wq(iaa_wq);
692
kfree(iaa_wq);
693
}
694
695
return ret;
696
}
697
698
static void free_wq_table(void)
699
{
700
int cpu;
701
702
for (cpu = 0; cpu < nr_cpus; cpu++)
703
wq_table_free_entry(cpu);
704
705
free_percpu(wq_table);
706
707
pr_debug("freed wq table\n");
708
}
709
710
static int alloc_wq_table(int max_wqs)
711
{
712
struct wq_table_entry *entry;
713
int cpu;
714
715
wq_table = alloc_percpu(struct wq_table_entry);
716
if (!wq_table)
717
return -ENOMEM;
718
719
for (cpu = 0; cpu < nr_cpus; cpu++) {
720
entry = per_cpu_ptr(wq_table, cpu);
721
entry->wqs = kcalloc(max_wqs, sizeof(*entry->wqs), GFP_KERNEL);
722
if (!entry->wqs) {
723
free_wq_table();
724
return -ENOMEM;
725
}
726
727
entry->max_wqs = max_wqs;
728
}
729
730
pr_debug("initialized wq table\n");
731
732
return 0;
733
}
734
735
static int save_iaa_wq(struct idxd_wq *wq)
736
{
737
struct iaa_device *iaa_device, *found = NULL;
738
struct idxd_device *idxd;
739
struct pci_dev *pdev;
740
struct device *dev;
741
int ret = 0;
742
743
list_for_each_entry(iaa_device, &iaa_devices, list) {
744
if (iaa_device->idxd == wq->idxd) {
745
idxd = iaa_device->idxd;
746
pdev = idxd->pdev;
747
dev = &pdev->dev;
748
/*
749
* Check to see that we don't already have this wq.
750
* Shouldn't happen but we don't control probing.
751
*/
752
if (iaa_has_wq(iaa_device, wq)) {
753
dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n",
754
iaa_device);
755
goto out;
756
}
757
758
found = iaa_device;
759
760
ret = add_iaa_wq(iaa_device, wq, NULL);
761
if (ret)
762
goto out;
763
764
break;
765
}
766
}
767
768
if (!found) {
769
struct iaa_device *new_device;
770
struct iaa_wq *new_wq;
771
772
new_device = add_iaa_device(wq->idxd);
773
if (!new_device) {
774
ret = -ENOMEM;
775
goto out;
776
}
777
778
ret = add_iaa_wq(new_device, wq, &new_wq);
779
if (ret) {
780
del_iaa_device(new_device);
781
free_iaa_device(new_device);
782
goto out;
783
}
784
785
ret = init_iaa_device(new_device, new_wq);
786
if (ret) {
787
del_iaa_wq(new_device, new_wq->wq);
788
del_iaa_device(new_device);
789
free_iaa_wq(new_wq);
790
goto out;
791
}
792
}
793
794
if (WARN_ON(nr_iaa == 0))
795
return -EINVAL;
796
797
cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
798
if (!cpus_per_iaa)
799
cpus_per_iaa = 1;
800
out:
801
return ret;
802
}
803
804
static void remove_iaa_wq(struct idxd_wq *wq)
805
{
806
struct iaa_device *iaa_device;
807
808
list_for_each_entry(iaa_device, &iaa_devices, list) {
809
if (iaa_has_wq(iaa_device, wq)) {
810
del_iaa_wq(iaa_device, wq);
811
break;
812
}
813
}
814
815
if (nr_iaa) {
816
cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
817
if (!cpus_per_iaa)
818
cpus_per_iaa = 1;
819
} else
820
cpus_per_iaa = 1;
821
}
822
823
static int wq_table_add_wqs(int iaa, int cpu)
824
{
825
struct iaa_device *iaa_device, *found_device = NULL;
826
int ret = 0, cur_iaa = 0, n_wqs_added = 0;
827
struct idxd_device *idxd;
828
struct iaa_wq *iaa_wq;
829
struct pci_dev *pdev;
830
struct device *dev;
831
832
list_for_each_entry(iaa_device, &iaa_devices, list) {
833
idxd = iaa_device->idxd;
834
pdev = idxd->pdev;
835
dev = &pdev->dev;
836
837
if (cur_iaa != iaa) {
838
cur_iaa++;
839
continue;
840
}
841
842
found_device = iaa_device;
843
dev_dbg(dev, "getting wq from iaa_device %d, cur_iaa %d\n",
844
found_device->idxd->id, cur_iaa);
845
break;
846
}
847
848
if (!found_device) {
849
found_device = list_first_entry_or_null(&iaa_devices,
850
struct iaa_device, list);
851
if (!found_device) {
852
pr_debug("couldn't find any iaa devices with wqs!\n");
853
ret = -EINVAL;
854
goto out;
855
}
856
cur_iaa = 0;
857
858
idxd = found_device->idxd;
859
pdev = idxd->pdev;
860
dev = &pdev->dev;
861
dev_dbg(dev, "getting wq from only iaa_device %d, cur_iaa %d\n",
862
found_device->idxd->id, cur_iaa);
863
}
864
865
list_for_each_entry(iaa_wq, &found_device->wqs, list) {
866
wq_table_add(cpu, iaa_wq->wq);
867
pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n",
868
cpu, iaa_wq->wq->idxd->id, iaa_wq->wq->id);
869
n_wqs_added++;
870
}
871
872
if (!n_wqs_added) {
873
pr_debug("couldn't find any iaa wqs!\n");
874
ret = -EINVAL;
875
goto out;
876
}
877
out:
878
return ret;
879
}
880
881
/*
882
* Rebalance the wq table so that given a cpu, it's easy to find the
883
* closest IAA instance. The idea is to try to choose the most
884
* appropriate IAA instance for a caller and spread available
885
* workqueues around to clients.
886
*/
887
static void rebalance_wq_table(void)
888
{
889
const struct cpumask *node_cpus;
890
int node_cpu, node, cpu, iaa = 0;
891
892
if (nr_iaa == 0)
893
return;
894
895
pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n",
896
nr_nodes, nr_cpus, nr_iaa, cpus_per_iaa);
897
898
clear_wq_table();
899
900
if (nr_iaa == 1) {
901
for_each_possible_cpu(cpu) {
902
if (WARN_ON(wq_table_add_wqs(0, cpu)))
903
goto err;
904
}
905
906
return;
907
}
908
909
for_each_node_with_cpus(node) {
910
cpu = 0;
911
node_cpus = cpumask_of_node(node);
912
913
for_each_cpu(node_cpu, node_cpus) {
914
iaa = cpu / cpus_per_iaa;
915
if (WARN_ON(wq_table_add_wqs(iaa, node_cpu)))
916
goto err;
917
cpu++;
918
}
919
}
920
921
return;
922
err:
923
pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu);
924
}
925
926
static inline int check_completion(struct device *dev,
927
struct iax_completion_record *comp,
928
bool compress,
929
bool only_once)
930
{
931
char *op_str = compress ? "compress" : "decompress";
932
int status_checks = 0;
933
int ret = 0;
934
935
while (!comp->status) {
936
if (only_once)
937
return -EAGAIN;
938
cpu_relax();
939
if (status_checks++ >= IAA_COMPLETION_TIMEOUT) {
940
/* Something is wrong with the hw, disable it. */
941
dev_err(dev, "%s completion timed out - "
942
"assuming broken hw, iaa_crypto now DISABLED\n",
943
op_str);
944
iaa_crypto_enabled = false;
945
ret = -ETIMEDOUT;
946
goto out;
947
}
948
}
949
950
if (comp->status != IAX_COMP_SUCCESS) {
951
if (comp->status == IAA_ERROR_WATCHDOG_EXPIRED) {
952
ret = -ETIMEDOUT;
953
dev_dbg(dev, "%s timed out, size=0x%x\n",
954
op_str, comp->output_size);
955
update_completion_timeout_errs();
956
goto out;
957
}
958
959
if (comp->status == IAA_ANALYTICS_ERROR &&
960
comp->error_code == IAA_ERROR_COMP_BUF_OVERFLOW && compress) {
961
ret = -E2BIG;
962
dev_dbg(dev, "compressed > uncompressed size,"
963
" not compressing, size=0x%x\n",
964
comp->output_size);
965
update_completion_comp_buf_overflow_errs();
966
goto out;
967
}
968
969
if (comp->status == IAA_ERROR_DECOMP_BUF_OVERFLOW) {
970
ret = -EOVERFLOW;
971
goto out;
972
}
973
974
ret = -EINVAL;
975
dev_dbg(dev, "iaa %s status=0x%x, error=0x%x, size=0x%x\n",
976
op_str, comp->status, comp->error_code, comp->output_size);
977
print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, comp, 64, 0);
978
update_completion_einval_errs();
979
980
goto out;
981
}
982
out:
983
return ret;
984
}
985
986
static int deflate_generic_decompress(struct acomp_req *req)
987
{
988
ACOMP_FBREQ_ON_STACK(fbreq, req);
989
int ret;
990
991
ret = crypto_acomp_decompress(fbreq);
992
req->dlen = fbreq->dlen;
993
994
update_total_sw_decomp_calls();
995
996
return ret;
997
}
998
999
static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1000
struct acomp_req *req,
1001
dma_addr_t *src_addr, dma_addr_t *dst_addr);
1002
1003
static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1004
struct idxd_wq *wq,
1005
dma_addr_t src_addr, unsigned int slen,
1006
dma_addr_t dst_addr, unsigned int *dlen);
1007
1008
static void iaa_desc_complete(struct idxd_desc *idxd_desc,
1009
enum idxd_complete_type comp_type,
1010
bool free_desc, void *__ctx,
1011
u32 *status)
1012
{
1013
struct iaa_device_compression_mode *active_compression_mode;
1014
struct iaa_compression_ctx *compression_ctx;
1015
struct crypto_ctx *ctx = __ctx;
1016
struct iaa_device *iaa_device;
1017
struct idxd_device *idxd;
1018
struct iaa_wq *iaa_wq;
1019
struct pci_dev *pdev;
1020
struct device *dev;
1021
int ret, err = 0;
1022
1023
compression_ctx = crypto_tfm_ctx(ctx->tfm);
1024
1025
iaa_wq = idxd_wq_get_private(idxd_desc->wq);
1026
iaa_device = iaa_wq->iaa_device;
1027
idxd = iaa_device->idxd;
1028
pdev = idxd->pdev;
1029
dev = &pdev->dev;
1030
1031
active_compression_mode = get_iaa_device_compression_mode(iaa_device,
1032
compression_ctx->mode);
1033
dev_dbg(dev, "%s: compression mode %s,"
1034
" ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__,
1035
active_compression_mode->name,
1036
ctx->src_addr, ctx->dst_addr);
1037
1038
ret = check_completion(dev, idxd_desc->iax_completion,
1039
ctx->compress, false);
1040
if (ret) {
1041
dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1042
if (!ctx->compress &&
1043
idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1044
pr_warn("%s: falling back to deflate-generic decompress, "
1045
"analytics error code %x\n", __func__,
1046
idxd_desc->iax_completion->error_code);
1047
ret = deflate_generic_decompress(ctx->req);
1048
if (ret) {
1049
dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1050
__func__, ret);
1051
err = -EIO;
1052
goto err;
1053
}
1054
} else {
1055
err = -EIO;
1056
goto err;
1057
}
1058
} else {
1059
ctx->req->dlen = idxd_desc->iax_completion->output_size;
1060
}
1061
1062
/* Update stats */
1063
if (ctx->compress) {
1064
update_total_comp_bytes_out(ctx->req->dlen);
1065
update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen);
1066
} else {
1067
update_total_decomp_bytes_in(ctx->req->slen);
1068
update_wq_decomp_bytes(iaa_wq->wq, ctx->req->slen);
1069
}
1070
1071
if (ctx->compress && compression_ctx->verify_compress) {
1072
u32 *compression_crc = acomp_request_ctx(ctx->req);
1073
dma_addr_t src_addr, dst_addr;
1074
1075
*compression_crc = idxd_desc->iax_completion->crc;
1076
1077
ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr);
1078
if (ret) {
1079
dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1080
err = -EIO;
1081
goto out;
1082
}
1083
1084
ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr,
1085
ctx->req->slen, dst_addr, &ctx->req->dlen);
1086
if (ret) {
1087
dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret);
1088
err = -EIO;
1089
}
1090
1091
dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_TO_DEVICE);
1092
dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_FROM_DEVICE);
1093
1094
goto out;
1095
}
1096
err:
1097
dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_FROM_DEVICE);
1098
dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_TO_DEVICE);
1099
out:
1100
if (ret != 0)
1101
dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1102
1103
if (ctx->req->base.complete)
1104
acomp_request_complete(ctx->req, err);
1105
1106
if (free_desc)
1107
idxd_free_desc(idxd_desc->wq, idxd_desc);
1108
iaa_wq_put(idxd_desc->wq);
1109
}
1110
1111
static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
1112
struct idxd_wq *wq,
1113
dma_addr_t src_addr, unsigned int slen,
1114
dma_addr_t dst_addr, unsigned int *dlen)
1115
{
1116
struct iaa_device_compression_mode *active_compression_mode;
1117
struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1118
u32 *compression_crc = acomp_request_ctx(req);
1119
struct iaa_device *iaa_device;
1120
struct idxd_desc *idxd_desc;
1121
struct iax_hw_desc *desc;
1122
struct idxd_device *idxd;
1123
struct iaa_wq *iaa_wq;
1124
struct pci_dev *pdev;
1125
struct device *dev;
1126
int ret = 0;
1127
1128
iaa_wq = idxd_wq_get_private(wq);
1129
iaa_device = iaa_wq->iaa_device;
1130
idxd = iaa_device->idxd;
1131
pdev = idxd->pdev;
1132
dev = &pdev->dev;
1133
1134
active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1135
1136
idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1137
if (IS_ERR(idxd_desc)) {
1138
dev_dbg(dev, "idxd descriptor allocation failed\n");
1139
dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc));
1140
return PTR_ERR(idxd_desc);
1141
}
1142
desc = idxd_desc->iax_hw;
1143
1144
desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR |
1145
IDXD_OP_FLAG_RD_SRC2_AECS | IDXD_OP_FLAG_CC;
1146
desc->opcode = IAX_OPCODE_COMPRESS;
1147
desc->compr_flags = IAA_COMP_FLAGS;
1148
desc->priv = 0;
1149
1150
desc->src1_addr = (u64)src_addr;
1151
desc->src1_size = slen;
1152
desc->dst_addr = (u64)dst_addr;
1153
desc->max_dst_size = *dlen;
1154
desc->src2_addr = active_compression_mode->aecs_comp_table_dma_addr;
1155
desc->src2_size = sizeof(struct aecs_comp_table_record);
1156
desc->completion_addr = idxd_desc->compl_dma;
1157
1158
if (ctx->use_irq) {
1159
desc->flags |= IDXD_OP_FLAG_RCI;
1160
1161
idxd_desc->crypto.req = req;
1162
idxd_desc->crypto.tfm = tfm;
1163
idxd_desc->crypto.src_addr = src_addr;
1164
idxd_desc->crypto.dst_addr = dst_addr;
1165
idxd_desc->crypto.compress = true;
1166
1167
dev_dbg(dev, "%s use_async_irq: compression mode %s,"
1168
" src_addr %llx, dst_addr %llx\n", __func__,
1169
active_compression_mode->name,
1170
src_addr, dst_addr);
1171
}
1172
1173
dev_dbg(dev, "%s: compression mode %s,"
1174
" desc->src1_addr %llx, desc->src1_size %d,"
1175
" desc->dst_addr %llx, desc->max_dst_size %d,"
1176
" desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1177
active_compression_mode->name,
1178
desc->src1_addr, desc->src1_size, desc->dst_addr,
1179
desc->max_dst_size, desc->src2_addr, desc->src2_size);
1180
1181
ret = idxd_submit_desc(wq, idxd_desc);
1182
if (ret) {
1183
dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1184
goto err;
1185
}
1186
1187
/* Update stats */
1188
update_total_comp_calls();
1189
update_wq_comp_calls(wq);
1190
1191
if (ctx->async_mode) {
1192
ret = -EINPROGRESS;
1193
dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1194
goto out;
1195
}
1196
1197
ret = check_completion(dev, idxd_desc->iax_completion, true, false);
1198
if (ret) {
1199
dev_dbg(dev, "check_completion failed ret=%d\n", ret);
1200
goto err;
1201
}
1202
1203
*dlen = idxd_desc->iax_completion->output_size;
1204
1205
/* Update stats */
1206
update_total_comp_bytes_out(*dlen);
1207
update_wq_comp_bytes(wq, *dlen);
1208
1209
*compression_crc = idxd_desc->iax_completion->crc;
1210
1211
if (!ctx->async_mode)
1212
idxd_free_desc(wq, idxd_desc);
1213
out:
1214
return ret;
1215
err:
1216
idxd_free_desc(wq, idxd_desc);
1217
dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1218
1219
goto out;
1220
}
1221
1222
static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq,
1223
struct acomp_req *req,
1224
dma_addr_t *src_addr, dma_addr_t *dst_addr)
1225
{
1226
int ret = 0;
1227
int nr_sgs;
1228
1229
dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1230
dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1231
1232
nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1233
if (nr_sgs <= 0 || nr_sgs > 1) {
1234
dev_dbg(dev, "verify: couldn't map src sg for iaa device %d,"
1235
" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1236
iaa_wq->wq->id, ret);
1237
ret = -EIO;
1238
goto out;
1239
}
1240
*src_addr = sg_dma_address(req->src);
1241
dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1242
" req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs,
1243
req->src, req->slen, sg_dma_len(req->src));
1244
1245
nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1246
if (nr_sgs <= 0 || nr_sgs > 1) {
1247
dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d,"
1248
" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1249
iaa_wq->wq->id, ret);
1250
ret = -EIO;
1251
dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1252
goto out;
1253
}
1254
*dst_addr = sg_dma_address(req->dst);
1255
dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1256
" req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs,
1257
req->dst, req->dlen, sg_dma_len(req->dst));
1258
out:
1259
return ret;
1260
}
1261
1262
static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req,
1263
struct idxd_wq *wq,
1264
dma_addr_t src_addr, unsigned int slen,
1265
dma_addr_t dst_addr, unsigned int *dlen)
1266
{
1267
struct iaa_device_compression_mode *active_compression_mode;
1268
struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1269
u32 *compression_crc = acomp_request_ctx(req);
1270
struct iaa_device *iaa_device;
1271
struct idxd_desc *idxd_desc;
1272
struct iax_hw_desc *desc;
1273
struct idxd_device *idxd;
1274
struct iaa_wq *iaa_wq;
1275
struct pci_dev *pdev;
1276
struct device *dev;
1277
int ret = 0;
1278
1279
iaa_wq = idxd_wq_get_private(wq);
1280
iaa_device = iaa_wq->iaa_device;
1281
idxd = iaa_device->idxd;
1282
pdev = idxd->pdev;
1283
dev = &pdev->dev;
1284
1285
active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1286
1287
idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1288
if (IS_ERR(idxd_desc)) {
1289
dev_dbg(dev, "idxd descriptor allocation failed\n");
1290
dev_dbg(dev, "iaa compress failed: ret=%ld\n",
1291
PTR_ERR(idxd_desc));
1292
return PTR_ERR(idxd_desc);
1293
}
1294
desc = idxd_desc->iax_hw;
1295
1296
/* Verify (optional) - decompress and check crc, suppress dest write */
1297
1298
desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1299
desc->opcode = IAX_OPCODE_DECOMPRESS;
1300
desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT;
1301
desc->priv = 0;
1302
1303
desc->src1_addr = (u64)dst_addr;
1304
desc->src1_size = *dlen;
1305
desc->dst_addr = (u64)src_addr;
1306
desc->max_dst_size = slen;
1307
desc->completion_addr = idxd_desc->compl_dma;
1308
1309
dev_dbg(dev, "(verify) compression mode %s,"
1310
" desc->src1_addr %llx, desc->src1_size %d,"
1311
" desc->dst_addr %llx, desc->max_dst_size %d,"
1312
" desc->src2_addr %llx, desc->src2_size %d\n",
1313
active_compression_mode->name,
1314
desc->src1_addr, desc->src1_size, desc->dst_addr,
1315
desc->max_dst_size, desc->src2_addr, desc->src2_size);
1316
1317
ret = idxd_submit_desc(wq, idxd_desc);
1318
if (ret) {
1319
dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret);
1320
goto err;
1321
}
1322
1323
ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1324
if (ret) {
1325
dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret);
1326
goto err;
1327
}
1328
1329
if (*compression_crc != idxd_desc->iax_completion->crc) {
1330
ret = -EINVAL;
1331
dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:"
1332
" comp=0x%x, decomp=0x%x\n", *compression_crc,
1333
idxd_desc->iax_completion->crc);
1334
print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET,
1335
8, 1, idxd_desc->iax_completion, 64, 0);
1336
goto err;
1337
}
1338
1339
idxd_free_desc(wq, idxd_desc);
1340
out:
1341
return ret;
1342
err:
1343
idxd_free_desc(wq, idxd_desc);
1344
dev_dbg(dev, "iaa compress failed: ret=%d\n", ret);
1345
1346
goto out;
1347
}
1348
1349
static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
1350
struct idxd_wq *wq,
1351
dma_addr_t src_addr, unsigned int slen,
1352
dma_addr_t dst_addr, unsigned int *dlen)
1353
{
1354
struct iaa_device_compression_mode *active_compression_mode;
1355
struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1356
struct iaa_device *iaa_device;
1357
struct idxd_desc *idxd_desc;
1358
struct iax_hw_desc *desc;
1359
struct idxd_device *idxd;
1360
struct iaa_wq *iaa_wq;
1361
struct pci_dev *pdev;
1362
struct device *dev;
1363
int ret = 0;
1364
1365
iaa_wq = idxd_wq_get_private(wq);
1366
iaa_device = iaa_wq->iaa_device;
1367
idxd = iaa_device->idxd;
1368
pdev = idxd->pdev;
1369
dev = &pdev->dev;
1370
1371
active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode);
1372
1373
idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
1374
if (IS_ERR(idxd_desc)) {
1375
dev_dbg(dev, "idxd descriptor allocation failed\n");
1376
dev_dbg(dev, "iaa decompress failed: ret=%ld\n",
1377
PTR_ERR(idxd_desc));
1378
return PTR_ERR(idxd_desc);
1379
}
1380
desc = idxd_desc->iax_hw;
1381
1382
desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC;
1383
desc->opcode = IAX_OPCODE_DECOMPRESS;
1384
desc->max_dst_size = PAGE_SIZE;
1385
desc->decompr_flags = IAA_DECOMP_FLAGS;
1386
desc->priv = 0;
1387
1388
desc->src1_addr = (u64)src_addr;
1389
desc->dst_addr = (u64)dst_addr;
1390
desc->max_dst_size = *dlen;
1391
desc->src1_size = slen;
1392
desc->completion_addr = idxd_desc->compl_dma;
1393
1394
if (ctx->use_irq) {
1395
desc->flags |= IDXD_OP_FLAG_RCI;
1396
1397
idxd_desc->crypto.req = req;
1398
idxd_desc->crypto.tfm = tfm;
1399
idxd_desc->crypto.src_addr = src_addr;
1400
idxd_desc->crypto.dst_addr = dst_addr;
1401
idxd_desc->crypto.compress = false;
1402
1403
dev_dbg(dev, "%s: use_async_irq compression mode %s,"
1404
" src_addr %llx, dst_addr %llx\n", __func__,
1405
active_compression_mode->name,
1406
src_addr, dst_addr);
1407
}
1408
1409
dev_dbg(dev, "%s: decompression mode %s,"
1410
" desc->src1_addr %llx, desc->src1_size %d,"
1411
" desc->dst_addr %llx, desc->max_dst_size %d,"
1412
" desc->src2_addr %llx, desc->src2_size %d\n", __func__,
1413
active_compression_mode->name,
1414
desc->src1_addr, desc->src1_size, desc->dst_addr,
1415
desc->max_dst_size, desc->src2_addr, desc->src2_size);
1416
1417
ret = idxd_submit_desc(wq, idxd_desc);
1418
if (ret) {
1419
dev_dbg(dev, "submit_desc failed ret=%d\n", ret);
1420
goto err;
1421
}
1422
1423
/* Update stats */
1424
update_total_decomp_calls();
1425
update_wq_decomp_calls(wq);
1426
1427
if (ctx->async_mode) {
1428
ret = -EINPROGRESS;
1429
dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__);
1430
goto out;
1431
}
1432
1433
ret = check_completion(dev, idxd_desc->iax_completion, false, false);
1434
if (ret) {
1435
dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret);
1436
if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) {
1437
pr_warn("%s: falling back to deflate-generic decompress, "
1438
"analytics error code %x\n", __func__,
1439
idxd_desc->iax_completion->error_code);
1440
ret = deflate_generic_decompress(req);
1441
if (ret) {
1442
dev_dbg(dev, "%s: deflate-generic failed ret=%d\n",
1443
__func__, ret);
1444
goto err;
1445
}
1446
} else {
1447
goto err;
1448
}
1449
} else {
1450
req->dlen = idxd_desc->iax_completion->output_size;
1451
}
1452
1453
*dlen = req->dlen;
1454
1455
if (!ctx->async_mode)
1456
idxd_free_desc(wq, idxd_desc);
1457
1458
/* Update stats */
1459
update_total_decomp_bytes_in(slen);
1460
update_wq_decomp_bytes(wq, slen);
1461
out:
1462
return ret;
1463
err:
1464
idxd_free_desc(wq, idxd_desc);
1465
dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret);
1466
1467
goto out;
1468
}
1469
1470
static int iaa_comp_acompress(struct acomp_req *req)
1471
{
1472
struct iaa_compression_ctx *compression_ctx;
1473
struct crypto_tfm *tfm = req->base.tfm;
1474
dma_addr_t src_addr, dst_addr;
1475
int nr_sgs, cpu, ret = 0;
1476
struct iaa_wq *iaa_wq;
1477
struct idxd_wq *wq;
1478
struct device *dev;
1479
1480
compression_ctx = crypto_tfm_ctx(tfm);
1481
1482
if (!iaa_crypto_enabled) {
1483
pr_debug("iaa_crypto disabled, not compressing\n");
1484
return -ENODEV;
1485
}
1486
1487
if (!req->src || !req->slen) {
1488
pr_debug("invalid src, not compressing\n");
1489
return -EINVAL;
1490
}
1491
1492
cpu = get_cpu();
1493
wq = wq_table_next_wq(cpu);
1494
put_cpu();
1495
if (!wq) {
1496
pr_debug("no wq configured for cpu=%d\n", cpu);
1497
return -ENODEV;
1498
}
1499
1500
ret = iaa_wq_get(wq);
1501
if (ret) {
1502
pr_debug("no wq available for cpu=%d\n", cpu);
1503
return -ENODEV;
1504
}
1505
1506
iaa_wq = idxd_wq_get_private(wq);
1507
1508
dev = &wq->idxd->pdev->dev;
1509
1510
nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1511
if (nr_sgs <= 0 || nr_sgs > 1) {
1512
dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1513
" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1514
iaa_wq->wq->id, ret);
1515
ret = -EIO;
1516
goto out;
1517
}
1518
src_addr = sg_dma_address(req->src);
1519
dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1520
" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1521
req->src, req->slen, sg_dma_len(req->src));
1522
1523
nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1524
if (nr_sgs <= 0 || nr_sgs > 1) {
1525
dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1526
" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1527
iaa_wq->wq->id, ret);
1528
ret = -EIO;
1529
goto err_map_dst;
1530
}
1531
dst_addr = sg_dma_address(req->dst);
1532
dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1533
" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1534
req->dst, req->dlen, sg_dma_len(req->dst));
1535
1536
ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr,
1537
&req->dlen);
1538
if (ret == -EINPROGRESS)
1539
return ret;
1540
1541
if (!ret && compression_ctx->verify_compress) {
1542
ret = iaa_remap_for_verify(dev, iaa_wq, req, &src_addr, &dst_addr);
1543
if (ret) {
1544
dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret);
1545
goto out;
1546
}
1547
1548
ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen,
1549
dst_addr, &req->dlen);
1550
if (ret)
1551
dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret);
1552
1553
dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE);
1554
dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE);
1555
1556
goto out;
1557
}
1558
1559
if (ret)
1560
dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret);
1561
1562
dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1563
err_map_dst:
1564
dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1565
out:
1566
iaa_wq_put(wq);
1567
1568
return ret;
1569
}
1570
1571
static int iaa_comp_adecompress(struct acomp_req *req)
1572
{
1573
struct crypto_tfm *tfm = req->base.tfm;
1574
dma_addr_t src_addr, dst_addr;
1575
int nr_sgs, cpu, ret = 0;
1576
struct iaa_wq *iaa_wq;
1577
struct device *dev;
1578
struct idxd_wq *wq;
1579
1580
if (!iaa_crypto_enabled) {
1581
pr_debug("iaa_crypto disabled, not decompressing\n");
1582
return -ENODEV;
1583
}
1584
1585
if (!req->src || !req->slen) {
1586
pr_debug("invalid src, not decompressing\n");
1587
return -EINVAL;
1588
}
1589
1590
cpu = get_cpu();
1591
wq = wq_table_next_wq(cpu);
1592
put_cpu();
1593
if (!wq) {
1594
pr_debug("no wq configured for cpu=%d\n", cpu);
1595
return -ENODEV;
1596
}
1597
1598
ret = iaa_wq_get(wq);
1599
if (ret) {
1600
pr_debug("no wq available for cpu=%d\n", cpu);
1601
return -ENODEV;
1602
}
1603
1604
iaa_wq = idxd_wq_get_private(wq);
1605
1606
dev = &wq->idxd->pdev->dev;
1607
1608
nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1609
if (nr_sgs <= 0 || nr_sgs > 1) {
1610
dev_dbg(dev, "couldn't map src sg for iaa device %d,"
1611
" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1612
iaa_wq->wq->id, ret);
1613
ret = -EIO;
1614
goto out;
1615
}
1616
src_addr = sg_dma_address(req->src);
1617
dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p,"
1618
" req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs,
1619
req->src, req->slen, sg_dma_len(req->src));
1620
1621
nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1622
if (nr_sgs <= 0 || nr_sgs > 1) {
1623
dev_dbg(dev, "couldn't map dst sg for iaa device %d,"
1624
" wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id,
1625
iaa_wq->wq->id, ret);
1626
ret = -EIO;
1627
goto err_map_dst;
1628
}
1629
dst_addr = sg_dma_address(req->dst);
1630
dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
1631
" req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
1632
req->dst, req->dlen, sg_dma_len(req->dst));
1633
1634
ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
1635
dst_addr, &req->dlen);
1636
if (ret == -EINPROGRESS)
1637
return ret;
1638
1639
if (ret != 0)
1640
dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret);
1641
1642
dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
1643
err_map_dst:
1644
dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE);
1645
out:
1646
iaa_wq_put(wq);
1647
1648
return ret;
1649
}
1650
1651
static void compression_ctx_init(struct iaa_compression_ctx *ctx)
1652
{
1653
ctx->verify_compress = iaa_verify_compress;
1654
ctx->async_mode = async_mode;
1655
ctx->use_irq = use_irq;
1656
}
1657
1658
static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm)
1659
{
1660
struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
1661
struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm);
1662
1663
compression_ctx_init(ctx);
1664
1665
ctx->mode = IAA_MODE_FIXED;
1666
1667
return 0;
1668
}
1669
1670
static struct acomp_alg iaa_acomp_fixed_deflate = {
1671
.init = iaa_comp_init_fixed,
1672
.compress = iaa_comp_acompress,
1673
.decompress = iaa_comp_adecompress,
1674
.base = {
1675
.cra_name = "deflate",
1676
.cra_driver_name = "deflate-iaa",
1677
.cra_flags = CRYPTO_ALG_ASYNC,
1678
.cra_ctxsize = sizeof(struct iaa_compression_ctx),
1679
.cra_reqsize = sizeof(u32),
1680
.cra_module = THIS_MODULE,
1681
.cra_priority = IAA_ALG_PRIORITY,
1682
}
1683
};
1684
1685
static int iaa_register_compression_device(void)
1686
{
1687
int ret;
1688
1689
ret = crypto_register_acomp(&iaa_acomp_fixed_deflate);
1690
if (ret) {
1691
pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret);
1692
goto out;
1693
}
1694
1695
iaa_crypto_registered = true;
1696
out:
1697
return ret;
1698
}
1699
1700
static void iaa_unregister_compression_device(void)
1701
{
1702
if (iaa_crypto_registered)
1703
crypto_unregister_acomp(&iaa_acomp_fixed_deflate);
1704
}
1705
1706
static int iaa_crypto_probe(struct idxd_dev *idxd_dev)
1707
{
1708
struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1709
struct idxd_device *idxd = wq->idxd;
1710
struct idxd_driver_data *data = idxd->data;
1711
struct device *dev = &idxd_dev->conf_dev;
1712
bool first_wq = false;
1713
int ret = 0;
1714
1715
if (idxd->state != IDXD_DEV_ENABLED)
1716
return -ENXIO;
1717
1718
if (data->type != IDXD_TYPE_IAX)
1719
return -ENODEV;
1720
1721
mutex_lock(&wq->wq_lock);
1722
1723
if (idxd_wq_get_private(wq)) {
1724
mutex_unlock(&wq->wq_lock);
1725
return -EBUSY;
1726
}
1727
1728
if (!idxd_wq_driver_name_match(wq, dev)) {
1729
dev_dbg(dev, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n",
1730
idxd->id, wq->id, wq->driver_name, dev->driver->name);
1731
idxd->cmd_status = IDXD_SCMD_WQ_NO_DRV_NAME;
1732
ret = -ENODEV;
1733
goto err;
1734
}
1735
1736
wq->type = IDXD_WQT_KERNEL;
1737
1738
ret = idxd_drv_enable_wq(wq);
1739
if (ret < 0) {
1740
dev_dbg(dev, "enable wq %d.%d failed: %d\n",
1741
idxd->id, wq->id, ret);
1742
ret = -ENXIO;
1743
goto err;
1744
}
1745
1746
mutex_lock(&iaa_devices_lock);
1747
1748
if (list_empty(&iaa_devices)) {
1749
ret = alloc_wq_table(wq->idxd->max_wqs);
1750
if (ret)
1751
goto err_alloc;
1752
first_wq = true;
1753
}
1754
1755
ret = save_iaa_wq(wq);
1756
if (ret)
1757
goto err_save;
1758
1759
rebalance_wq_table();
1760
1761
if (first_wq) {
1762
iaa_crypto_enabled = true;
1763
ret = iaa_register_compression_device();
1764
if (ret != 0) {
1765
iaa_crypto_enabled = false;
1766
dev_dbg(dev, "IAA compression device registration failed\n");
1767
goto err_register;
1768
}
1769
try_module_get(THIS_MODULE);
1770
1771
pr_info("iaa_crypto now ENABLED\n");
1772
}
1773
1774
mutex_unlock(&iaa_devices_lock);
1775
out:
1776
mutex_unlock(&wq->wq_lock);
1777
1778
return ret;
1779
1780
err_register:
1781
remove_iaa_wq(wq);
1782
free_iaa_wq(idxd_wq_get_private(wq));
1783
err_save:
1784
if (first_wq)
1785
free_wq_table();
1786
err_alloc:
1787
mutex_unlock(&iaa_devices_lock);
1788
idxd_drv_disable_wq(wq);
1789
err:
1790
wq->type = IDXD_WQT_NONE;
1791
1792
goto out;
1793
}
1794
1795
static void iaa_crypto_remove(struct idxd_dev *idxd_dev)
1796
{
1797
struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
1798
struct idxd_device *idxd = wq->idxd;
1799
struct iaa_wq *iaa_wq;
1800
bool free = false;
1801
1802
idxd_wq_quiesce(wq);
1803
1804
mutex_lock(&wq->wq_lock);
1805
mutex_lock(&iaa_devices_lock);
1806
1807
remove_iaa_wq(wq);
1808
1809
spin_lock(&idxd->dev_lock);
1810
iaa_wq = idxd_wq_get_private(wq);
1811
if (!iaa_wq) {
1812
spin_unlock(&idxd->dev_lock);
1813
pr_err("%s: no iaa_wq available to remove\n", __func__);
1814
goto out;
1815
}
1816
1817
if (iaa_wq->ref) {
1818
iaa_wq->remove = true;
1819
} else {
1820
wq = iaa_wq->wq;
1821
idxd_wq_set_private(wq, NULL);
1822
free = true;
1823
}
1824
spin_unlock(&idxd->dev_lock);
1825
if (free) {
1826
__free_iaa_wq(iaa_wq);
1827
kfree(iaa_wq);
1828
}
1829
1830
idxd_drv_disable_wq(wq);
1831
rebalance_wq_table();
1832
1833
if (nr_iaa == 0) {
1834
iaa_crypto_enabled = false;
1835
free_wq_table();
1836
module_put(THIS_MODULE);
1837
1838
pr_info("iaa_crypto now DISABLED\n");
1839
}
1840
out:
1841
mutex_unlock(&iaa_devices_lock);
1842
mutex_unlock(&wq->wq_lock);
1843
}
1844
1845
static enum idxd_dev_type dev_types[] = {
1846
IDXD_DEV_WQ,
1847
IDXD_DEV_NONE,
1848
};
1849
1850
static struct idxd_device_driver iaa_crypto_driver = {
1851
.probe = iaa_crypto_probe,
1852
.remove = iaa_crypto_remove,
1853
.name = IDXD_SUBDRIVER_NAME,
1854
.type = dev_types,
1855
.desc_complete = iaa_desc_complete,
1856
};
1857
1858
static int __init iaa_crypto_init_module(void)
1859
{
1860
int ret = 0;
1861
int node;
1862
1863
nr_cpus = num_possible_cpus();
1864
for_each_node_with_cpus(node)
1865
nr_nodes++;
1866
if (!nr_nodes) {
1867
pr_err("IAA couldn't find any nodes with cpus\n");
1868
return -ENODEV;
1869
}
1870
nr_cpus_per_node = nr_cpus / nr_nodes;
1871
1872
ret = iaa_aecs_init_fixed();
1873
if (ret < 0) {
1874
pr_debug("IAA fixed compression mode init failed\n");
1875
goto err_aecs_init;
1876
}
1877
1878
ret = idxd_driver_register(&iaa_crypto_driver);
1879
if (ret) {
1880
pr_debug("IAA wq sub-driver registration failed\n");
1881
goto err_driver_reg;
1882
}
1883
1884
ret = driver_create_file(&iaa_crypto_driver.drv,
1885
&driver_attr_verify_compress);
1886
if (ret) {
1887
pr_debug("IAA verify_compress attr creation failed\n");
1888
goto err_verify_attr_create;
1889
}
1890
1891
ret = driver_create_file(&iaa_crypto_driver.drv,
1892
&driver_attr_sync_mode);
1893
if (ret) {
1894
pr_debug("IAA sync mode attr creation failed\n");
1895
goto err_sync_attr_create;
1896
}
1897
1898
if (iaa_crypto_debugfs_init())
1899
pr_warn("debugfs init failed, stats not available\n");
1900
1901
pr_debug("initialized\n");
1902
out:
1903
return ret;
1904
1905
err_sync_attr_create:
1906
driver_remove_file(&iaa_crypto_driver.drv,
1907
&driver_attr_verify_compress);
1908
err_verify_attr_create:
1909
idxd_driver_unregister(&iaa_crypto_driver);
1910
err_driver_reg:
1911
iaa_aecs_cleanup_fixed();
1912
err_aecs_init:
1913
1914
goto out;
1915
}
1916
1917
static void __exit iaa_crypto_cleanup_module(void)
1918
{
1919
iaa_unregister_compression_device();
1920
1921
iaa_crypto_debugfs_cleanup();
1922
driver_remove_file(&iaa_crypto_driver.drv,
1923
&driver_attr_sync_mode);
1924
driver_remove_file(&iaa_crypto_driver.drv,
1925
&driver_attr_verify_compress);
1926
idxd_driver_unregister(&iaa_crypto_driver);
1927
iaa_aecs_cleanup_fixed();
1928
1929
pr_debug("cleaned up\n");
1930
}
1931
1932
MODULE_IMPORT_NS("IDXD");
1933
MODULE_LICENSE("GPL");
1934
MODULE_ALIAS_IDXD_DEVICE(0);
1935
MODULE_AUTHOR("Intel Corporation");
1936
MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver");
1937
1938
module_init(iaa_crypto_init_module);
1939
module_exit(iaa_crypto_cleanup_module);
1940
1941