Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/crypto/ccp/ccp_hardware.c
39481 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2017 Chelsio Communications, Inc.
5
* Copyright (c) 2017 Conrad Meyer <[email protected]>
6
* All rights reserved.
7
* Largely borrowed from ccr(4), Written by: John Baldwin <[email protected]>
8
*
9
* Redistribution and use in source and binary forms, with or without
10
* modification, are permitted provided that the following conditions
11
* are met:
12
* 1. Redistributions of source code must retain the above copyright
13
* notice, this list of conditions and the following disclaimer.
14
* 2. Redistributions in binary form must reproduce the above copyright
15
* notice, this list of conditions and the following disclaimer in the
16
* documentation and/or other materials provided with the distribution.
17
*
18
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
* SUCH DAMAGE.
29
*/
30
31
#include <sys/cdefs.h>
32
#include "opt_ddb.h"
33
34
#include <sys/param.h>
35
#include <sys/bus.h>
36
#include <sys/lock.h>
37
#include <sys/kernel.h>
38
#include <sys/malloc.h>
39
#include <sys/mutex.h>
40
#include <sys/module.h>
41
#include <sys/rman.h>
42
#include <sys/sglist.h>
43
#include <sys/sysctl.h>
44
45
#ifdef DDB
46
#include <ddb/ddb.h>
47
#endif
48
49
#include <dev/pci/pcireg.h>
50
#include <dev/pci/pcivar.h>
51
52
#include <machine/bus.h>
53
#include <machine/resource.h>
54
#include <machine/vmparam.h>
55
56
#include <opencrypto/cryptodev.h>
57
#include <opencrypto/xform.h>
58
59
#include <vm/vm.h>
60
#include <vm/pmap.h>
61
62
#include "cryptodev_if.h"
63
64
#include "ccp.h"
65
#include "ccp_hardware.h"
66
#include "ccp_lsb.h"
67
68
CTASSERT(sizeof(struct ccp_desc) == 32);
69
70
static struct ccp_xts_unitsize_map_entry {
71
enum ccp_xts_unitsize cxu_id;
72
unsigned cxu_size;
73
} ccp_xts_unitsize_map[] = {
74
{ CCP_XTS_AES_UNIT_SIZE_16, 16 },
75
{ CCP_XTS_AES_UNIT_SIZE_512, 512 },
76
{ CCP_XTS_AES_UNIT_SIZE_1024, 1024 },
77
{ CCP_XTS_AES_UNIT_SIZE_2048, 2048 },
78
{ CCP_XTS_AES_UNIT_SIZE_4096, 4096 },
79
};
80
81
SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
82
"ccp node");
83
84
unsigned g_ccp_ring_order = 11;
85
SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order,
86
0, "Set CCP ring order. (1 << this) == ring size. Min: 6, Max: 16");
87
88
/*
89
* Zero buffer, sufficient for padding LSB entries, that does not span a page
90
* boundary
91
*/
92
static const char g_zeroes[32] __aligned(32);
93
94
static inline uint32_t
95
ccp_read_4(struct ccp_softc *sc, uint32_t offset)
96
{
97
return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset));
98
}
99
100
static inline void
101
ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value)
102
{
103
bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value);
104
}
105
106
static inline uint32_t
107
ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset)
108
{
109
/*
110
* Each queue gets its own 4kB register space. Queue 0 is at 0x1000.
111
*/
112
return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset));
113
}
114
115
static inline void
116
ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset,
117
uint32_t value)
118
{
119
ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value);
120
}
121
122
void
123
ccp_queue_write_tail(struct ccp_queue *qp)
124
{
125
ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE,
126
((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail));
127
}
128
129
/*
130
* Given a queue and a reserved LSB entry index, compute the LSB *entry id* of
131
* that entry for the queue's private LSB region.
132
*/
133
static inline uint8_t
134
ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry)
135
{
136
return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry));
137
}
138
139
/*
140
* Given a queue and a reserved LSB entry index, compute the LSB *address* of
141
* that entry for the queue's private LSB region.
142
*/
143
static inline uint32_t
144
ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry)
145
{
146
return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE);
147
}
148
149
/*
150
* Some terminology:
151
*
152
* LSB - Local Storage Block
153
* =========================
154
*
155
* 8 segments/regions, each containing 16 entries.
156
*
157
* Each entry contains 256 bits (32 bytes).
158
*
159
* Segments are virtually addressed in commands, but accesses cannot cross
160
* segment boundaries. Virtual map uses an identity mapping by default
161
* (virtual segment N corresponds to physical segment N).
162
*
163
* Access to a physical region can be restricted to any subset of all five
164
* queues.
165
*
166
* "Pass-through" mode
167
* ===================
168
*
169
* Pass-through is a generic DMA engine, much like ioat(4). Some nice
170
* features:
171
*
172
* - Supports byte-swapping for endian conversion (32- or 256-bit words)
173
* - AND, OR, XOR with fixed 256-bit mask
174
* - CRC32 of data (may be used in tandem with bswap, but not bit operations)
175
* - Read/write of LSB
176
* - Memset
177
*
178
* If bit manipulation mode is enabled, input must be a multiple of 256 bits
179
* (32 bytes).
180
*
181
* If byte-swapping is enabled, input must be a multiple of the word size.
182
*
183
* Zlib mode -- only usable from one queue at a time, single job at a time.
184
* ========================================================================
185
*
186
* Only usable from private host, aka PSP? Not host processor?
187
*
188
* RNG.
189
* ====
190
*
191
* Raw bits are conditioned with AES and fed through CTR_DRBG. Output goes in
192
* a ring buffer readable by software.
193
*
194
* NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are
195
* implemented on the raw input stream and may be enabled to verify min-entropy
196
* of 0.5 bits per bit.
197
*/
198
199
static void
200
ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
201
{
202
bus_addr_t *baddr;
203
204
KASSERT(error == 0, ("%s: error:%d", __func__, error));
205
baddr = arg;
206
*baddr = segs->ds_addr;
207
}
208
209
static int
210
ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue)
211
{
212
struct ccp_softc *sc;
213
struct ccp_queue *qp;
214
void *desc;
215
size_t ringsz, num_descriptors;
216
int error;
217
218
desc = NULL;
219
sc = device_get_softc(dev);
220
qp = &sc->queues[queue];
221
222
/*
223
* Don't bother allocating a ring for queues the host isn't allowed to
224
* drive.
225
*/
226
if ((sc->valid_queues & (1 << queue)) == 0)
227
return (0);
228
229
ccp_queue_decode_lsb_regions(sc, lsbmask, queue);
230
231
/* Ignore queues that do not have any LSB access. */
232
if (qp->lsb_mask == 0) {
233
device_printf(dev, "Ignoring queue %u with no LSB access\n",
234
queue);
235
sc->valid_queues &= ~(1 << queue);
236
return (0);
237
}
238
239
num_descriptors = 1 << sc->ring_size_order;
240
ringsz = sizeof(struct ccp_desc) * num_descriptors;
241
242
/*
243
* "Queue_Size" is order - 1.
244
*
245
* Queue must be aligned to 5+Queue_Size+1 == 5 + order bits.
246
*/
247
error = bus_dma_tag_create(bus_get_dma_tag(dev),
248
1 << (5 + sc->ring_size_order),
249
#if defined(__i386__) && !defined(PAE)
250
0, BUS_SPACE_MAXADDR,
251
#else
252
(bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT,
253
#endif
254
BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1,
255
ringsz, 0, NULL, NULL, &qp->ring_desc_tag);
256
if (error != 0)
257
goto out;
258
259
error = bus_dmamem_alloc(qp->ring_desc_tag, &desc,
260
BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map);
261
if (error != 0)
262
goto out;
263
264
error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc,
265
ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK);
266
if (error != 0)
267
goto out;
268
269
qp->desc_ring = desc;
270
qp->completions_ring = malloc(num_descriptors *
271
sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK);
272
273
/* Zero control register; among other things, clears the RUN flag. */
274
qp->qcontrol = 0;
275
ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
276
ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0);
277
278
/* Clear any leftover interrupt status flags */
279
ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE,
280
ALL_INTERRUPTS);
281
282
qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT;
283
284
ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE,
285
(uint32_t)qp->desc_ring_bus_addr);
286
ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE,
287
(uint32_t)qp->desc_ring_bus_addr);
288
289
/*
290
* Enable completion interrupts, as well as error or administrative
291
* halt interrupts. We don't use administrative halts, but they
292
* shouldn't trip unless we do, so it ought to be harmless.
293
*/
294
ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE,
295
INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
296
297
qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT;
298
qp->qcontrol |= CMD_Q_RUN;
299
ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol);
300
301
out:
302
if (error != 0) {
303
if (qp->desc_ring != NULL)
304
bus_dmamap_unload(qp->ring_desc_tag,
305
qp->ring_desc_map);
306
if (desc != NULL)
307
bus_dmamem_free(qp->ring_desc_tag, desc,
308
qp->ring_desc_map);
309
if (qp->ring_desc_tag != NULL)
310
bus_dma_tag_destroy(qp->ring_desc_tag);
311
}
312
return (error);
313
}
314
315
static void
316
ccp_hw_detach_queue(device_t dev, unsigned queue)
317
{
318
struct ccp_softc *sc;
319
struct ccp_queue *qp;
320
321
sc = device_get_softc(dev);
322
qp = &sc->queues[queue];
323
324
/*
325
* Don't bother allocating a ring for queues the host isn't allowed to
326
* drive.
327
*/
328
if ((sc->valid_queues & (1 << queue)) == 0)
329
return;
330
331
free(qp->completions_ring, M_CCP);
332
bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map);
333
bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map);
334
bus_dma_tag_destroy(qp->ring_desc_tag);
335
}
336
337
static int
338
ccp_map_pci_bar(device_t dev)
339
{
340
struct ccp_softc *sc;
341
342
sc = device_get_softc(dev);
343
344
sc->pci_resource_id = PCIR_BAR(2);
345
sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
346
&sc->pci_resource_id, RF_ACTIVE);
347
if (sc->pci_resource == NULL) {
348
device_printf(dev, "unable to allocate pci resource\n");
349
return (ENODEV);
350
}
351
352
sc->pci_resource_id_msix = PCIR_BAR(5);
353
sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
354
&sc->pci_resource_id_msix, RF_ACTIVE);
355
if (sc->pci_resource_msix == NULL) {
356
device_printf(dev, "unable to allocate pci resource msix\n");
357
bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
358
sc->pci_resource);
359
return (ENODEV);
360
}
361
362
sc->pci_bus_tag = rman_get_bustag(sc->pci_resource);
363
sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource);
364
return (0);
365
}
366
367
static void
368
ccp_unmap_pci_bar(device_t dev)
369
{
370
struct ccp_softc *sc;
371
372
sc = device_get_softc(dev);
373
374
bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix,
375
sc->pci_resource_msix);
376
bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id,
377
sc->pci_resource);
378
}
379
380
const static struct ccp_error_code {
381
uint8_t ce_code;
382
const char *ce_name;
383
int ce_errno;
384
const char *ce_desc;
385
} ccp_error_codes[] = {
386
{ 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" },
387
{ 0x03, "ILLEGAL_FUNCTION_TYPE", EIO,
388
"A non-supported function type was specified" },
389
{ 0x04, "ILLEGAL_FUNCTION_MODE", EIO,
390
"A non-supported function mode was specified" },
391
{ 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO,
392
"A CMAC type was specified when ENCRYPT was not specified" },
393
{ 0x06, "ILLEGAL_FUNCTION_SIZE", EIO,
394
"A non-supported function size was specified.\n"
395
"AES-CFB: Size was not 127 or 7;\n"
396
"3DES-CFB: Size was not 7;\n"
397
"RSA: See supported size table (7.4.2);\n"
398
"ECC: Size was greater than 576 bits." },
399
{ 0x07, "Zlib_MISSING_INIT_EOM", EIO,
400
"Zlib command does not have INIT and EOM set" },
401
{ 0x08, "ILLEGAL_FUNCTION_RSVD", EIO,
402
"Reserved bits in a function specification were not 0" },
403
{ 0x09, "ILLEGAL_BUFFER_LENGTH", EIO,
404
"The buffer length specified was not correct for the selected engine"
405
},
406
{ 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n"
407
"Undefined VLSB segment mapping or\n"
408
"mapping to unsupported LSB segment id" },
409
{ 0x0B, "ILLEGAL_MEM_ADDR", EFAULT,
410
"The specified source/destination buffer access was illegal:\n"
411
"Data buffer located in a LSB location disallowed by the LSB protection masks; or\n"
412
"Data buffer not completely contained within a single segment; or\n"
413
"Pointer with Fixed=1 is not 32-bit aligned; or\n"
414
"Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory."
415
},
416
{ 0x0C, "ILLEGAL_MEM_SEL", EIO,
417
"A src_mem, dst_mem, or key_mem field was illegal:\n"
418
"A field was set to a reserved value; or\n"
419
"A public command attempted to reference AXI1 (local) or GART memory; or\n"
420
"A Zlib command attmpted to use the LSB." },
421
{ 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO,
422
"The specified context location was illegal:\n"
423
"Context located in a LSB location disallowed by the LSB protection masks; or\n"
424
"Context not completely contained within a single segment." },
425
{ 0x0E, "ILLEGAL_KEY_ADDR", EIO,
426
"The specified key location was illegal:\n"
427
"Key located in a LSB location disallowed by the LSB protection masks; or\n"
428
"Key not completely contained within a single segment." },
429
{ 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" },
430
/* XXX Could fill out these descriptions too */
431
{ 0x13, "IDMA0_AXI_SLVERR", EIO, "" },
432
{ 0x14, "IDMA0_AXI_DECERR", EIO, "" },
433
{ 0x16, "IDMA1_AXI_SLVERR", EIO, "" },
434
{ 0x17, "IDMA1_AXI_DECERR", EIO, "" },
435
{ 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" },
436
{ 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" },
437
{ 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" },
438
{ 0x1D, "ZLIB_EXTRA_DATA", EIO, "" },
439
{ 0x1E, "ZLIB_BTYPE", EIO, "" },
440
{ 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" },
441
{ 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" },
442
{ 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" },
443
{ 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" },
444
{ 0x24, "ZLIB_LIMIT_REACHED", EIO, "" },
445
{ 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" },
446
{ 0x26, "ODMA0_AXI_SLVERR", EIO, "" },
447
{ 0x27, "ODMA0_AXI_DECERR", EIO, "" },
448
{ 0x29, "ODMA1_AXI_SLVERR", EIO, "" },
449
{ 0x2A, "ODMA1_AXI_DECERR", EIO, "" },
450
{ 0x2B, "LSB_PARITY_ERR", EIO,
451
"A read from the LSB encountered a parity error" },
452
};
453
454
static void
455
ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc)
456
{
457
struct ccp_completion_ctx *cctx;
458
const struct ccp_error_code *ec;
459
struct ccp_softc *sc;
460
uint32_t status, error, esource, faultblock;
461
unsigned q, idx;
462
int errno;
463
464
sc = qp->cq_softc;
465
q = qp->cq_qindex;
466
467
status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
468
469
error = status & STATUS_ERROR_MASK;
470
471
/* Decode error status */
472
ec = NULL;
473
for (idx = 0; idx < nitems(ccp_error_codes); idx++)
474
if (ccp_error_codes[idx].ce_code == error) {
475
ec = &ccp_error_codes[idx];
476
break;
477
}
478
479
esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
480
STATUS_ERRORSOURCE_MASK;
481
faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
482
STATUS_VLSB_FAULTBLOCK_MASK;
483
device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n",
484
(ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
485
faultblock);
486
if (ec != NULL)
487
device_printf(sc->dev, "Error description: %s\n", ec->ce_desc);
488
489
/* TODO Could format the desc nicely here */
490
idx = desc - qp->desc_ring;
491
DPRINTF(sc->dev, "Bad descriptor index: %u contents: %32D\n", idx,
492
(const void *)desc, " ");
493
494
/*
495
* TODO Per § 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status,
496
* Zlib Decompress status may be interesting.
497
*/
498
499
while (true) {
500
/* Keep unused descriptors zero for next use. */
501
memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx]));
502
503
cctx = &qp->completions_ring[idx];
504
505
/*
506
* Restart procedure described in § 14.2.5. Could be used by HoC if we
507
* used that.
508
*
509
* Advance HEAD_LO past bad descriptor + any remaining in
510
* transaction manually, then restart queue.
511
*/
512
idx = (idx + 1) % (1 << sc->ring_size_order);
513
514
/* Callback function signals end of transaction */
515
if (cctx->callback_fn != NULL) {
516
if (ec == NULL)
517
errno = EIO;
518
else
519
errno = ec->ce_errno;
520
/* TODO More specific error code */
521
cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno);
522
cctx->callback_fn = NULL;
523
break;
524
}
525
}
526
527
qp->cq_head = idx;
528
qp->cq_waiting = false;
529
wakeup(&qp->cq_tail);
530
DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
531
ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE,
532
(uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE));
533
ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol);
534
DPRINTF(sc->dev, "%s: Restarted queue\n", __func__);
535
}
536
537
static void
538
ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints)
539
{
540
struct ccp_completion_ctx *cctx;
541
struct ccp_softc *sc;
542
const struct ccp_desc *desc;
543
uint32_t headlo, idx;
544
unsigned q, completed;
545
546
sc = qp->cq_softc;
547
q = qp->cq_qindex;
548
549
mtx_lock(&qp->cq_lock);
550
551
/*
552
* Hardware HEAD_LO points to the first incomplete descriptor. Process
553
* any submitted and completed descriptors, up to but not including
554
* HEAD_LO.
555
*/
556
headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
557
idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
558
559
DPRINTF(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx,
560
qp->cq_head);
561
completed = 0;
562
while (qp->cq_head != idx) {
563
DPRINTF(sc->dev, "%s: completing:%u\n", __func__, qp->cq_head);
564
565
cctx = &qp->completions_ring[qp->cq_head];
566
if (cctx->callback_fn != NULL) {
567
cctx->callback_fn(qp, cctx->session,
568
cctx->callback_arg, 0);
569
cctx->callback_fn = NULL;
570
}
571
572
/* Keep unused descriptors zero for next use. */
573
memset(&qp->desc_ring[qp->cq_head], 0,
574
sizeof(qp->desc_ring[qp->cq_head]));
575
576
qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order);
577
completed++;
578
}
579
if (completed > 0) {
580
qp->cq_waiting = false;
581
wakeup(&qp->cq_tail);
582
}
583
584
DPRINTF(sc->dev, "%s: wrote sw head:%u\n", __func__, qp->cq_head);
585
586
/*
587
* Desc points to the first incomplete descriptor, at the time we read
588
* HEAD_LO. If there was an error flagged in interrupt status, the HW
589
* will not proceed past the erroneous descriptor by itself.
590
*/
591
desc = &qp->desc_ring[idx];
592
if ((ints & INT_ERROR) != 0)
593
ccp_intr_handle_error(qp, desc);
594
595
mtx_unlock(&qp->cq_lock);
596
}
597
598
static void
599
ccp_intr_handler(void *arg)
600
{
601
struct ccp_softc *sc = arg;
602
size_t i;
603
uint32_t ints;
604
605
DPRINTF(sc->dev, "%s: interrupt\n", __func__);
606
607
/*
608
* We get one global interrupt per PCI device, shared over all of
609
* its queues. Scan each valid queue on interrupt for flags indicating
610
* activity.
611
*/
612
for (i = 0; i < nitems(sc->queues); i++) {
613
if ((sc->valid_queues & (1 << i)) == 0)
614
continue;
615
616
ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE);
617
if (ints == 0)
618
continue;
619
620
#if 0
621
DPRINTF(sc->dev, "%s: %x interrupts on queue %zu\n", __func__,
622
(unsigned)ints, i);
623
#endif
624
/* Write back 1s to clear interrupt status bits. */
625
ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints);
626
627
/*
628
* If there was an error, we still need to run completions on
629
* any descriptors prior to the error. The completions handler
630
* invoked below will also handle the error descriptor.
631
*/
632
if ((ints & (INT_COMPLETION | INT_ERROR)) != 0)
633
ccp_intr_run_completions(&sc->queues[i], ints);
634
635
if ((ints & INT_QUEUE_STOPPED) != 0)
636
device_printf(sc->dev, "%s: queue %zu stopped\n",
637
__func__, i);
638
}
639
640
/* Re-enable interrupts after processing */
641
for (i = 0; i < nitems(sc->queues); i++) {
642
if ((sc->valid_queues & (1 << i)) == 0)
643
continue;
644
ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE,
645
INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED);
646
}
647
}
648
649
static int
650
ccp_intr_filter(void *arg)
651
{
652
struct ccp_softc *sc = arg;
653
size_t i;
654
655
/* TODO: Split individual queues into separate taskqueues? */
656
for (i = 0; i < nitems(sc->queues); i++) {
657
if ((sc->valid_queues & (1 << i)) == 0)
658
continue;
659
660
/* Mask interrupt until task completes */
661
ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0);
662
}
663
664
return (FILTER_SCHEDULE_THREAD);
665
}
666
667
static int
668
ccp_setup_interrupts(struct ccp_softc *sc)
669
{
670
uint32_t nvec;
671
int rid, error, n, ridcopy;
672
673
n = pci_msix_count(sc->dev);
674
if (n < 1) {
675
device_printf(sc->dev, "%s: msix_count: %d\n", __func__, n);
676
return (ENXIO);
677
}
678
679
nvec = n;
680
error = pci_alloc_msix(sc->dev, &nvec);
681
if (error != 0) {
682
device_printf(sc->dev, "%s: alloc_msix error: %d\n", __func__,
683
error);
684
return (error);
685
}
686
if (nvec < 1) {
687
device_printf(sc->dev, "%s: alloc_msix: 0 vectors\n",
688
__func__);
689
return (ENXIO);
690
}
691
if (nvec > nitems(sc->intr_res)) {
692
device_printf(sc->dev, "%s: too many vectors: %u\n", __func__,
693
nvec);
694
nvec = nitems(sc->intr_res);
695
}
696
697
for (rid = 1; rid < 1 + nvec; rid++) {
698
ridcopy = rid;
699
sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev,
700
SYS_RES_IRQ, &ridcopy, RF_ACTIVE);
701
if (sc->intr_res[rid - 1] == NULL) {
702
device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n",
703
__func__);
704
return (ENXIO);
705
}
706
707
sc->intr_tag[rid - 1] = NULL;
708
error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1],
709
INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter,
710
ccp_intr_handler, sc, &sc->intr_tag[rid - 1]);
711
if (error != 0)
712
device_printf(sc->dev, "%s: setup_intr: %d\n",
713
__func__, error);
714
}
715
sc->intr_count = nvec;
716
717
return (error);
718
}
719
720
static void
721
ccp_release_interrupts(struct ccp_softc *sc)
722
{
723
unsigned i;
724
725
for (i = 0; i < sc->intr_count; i++) {
726
if (sc->intr_tag[i] != NULL)
727
bus_teardown_intr(sc->dev, sc->intr_res[i],
728
sc->intr_tag[i]);
729
if (sc->intr_res[i] != NULL)
730
bus_release_resource(sc->dev, SYS_RES_IRQ,
731
rman_get_rid(sc->intr_res[i]), sc->intr_res[i]);
732
}
733
734
pci_release_msi(sc->dev);
735
}
736
737
int
738
ccp_hw_attach(device_t dev)
739
{
740
struct ccp_softc *sc;
741
uint64_t lsbmask;
742
uint32_t version, lsbmasklo, lsbmaskhi;
743
unsigned queue_idx, j;
744
int error;
745
bool bars_mapped, interrupts_setup;
746
747
queue_idx = 0;
748
bars_mapped = interrupts_setup = false;
749
sc = device_get_softc(dev);
750
751
error = ccp_map_pci_bar(dev);
752
if (error != 0) {
753
device_printf(dev, "%s: couldn't map BAR(s)\n", __func__);
754
goto out;
755
}
756
bars_mapped = true;
757
758
error = pci_enable_busmaster(dev);
759
if (error != 0) {
760
device_printf(dev, "%s: couldn't enable busmaster\n",
761
__func__);
762
goto out;
763
}
764
765
sc->ring_size_order = g_ccp_ring_order;
766
if (sc->ring_size_order < 6 || sc->ring_size_order > 16) {
767
device_printf(dev, "bogus hw.ccp.ring_order\n");
768
error = EINVAL;
769
goto out;
770
}
771
sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET);
772
773
version = ccp_read_4(sc, VERSION_REG);
774
if ((version & VERSION_NUM_MASK) < 5) {
775
device_printf(dev,
776
"driver supports version 5 and later hardware\n");
777
error = ENXIO;
778
goto out;
779
}
780
781
error = ccp_setup_interrupts(sc);
782
if (error != 0)
783
goto out;
784
interrupts_setup = true;
785
786
sc->hw_version = version & VERSION_NUM_MASK;
787
sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) &
788
VERSION_NUMVQM_MASK;
789
sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) &
790
VERSION_LSBSIZE_MASK;
791
sc->hw_features = version & VERSION_CAP_MASK;
792
793
/*
794
* Copy private LSB mask to public registers to enable access to LSB
795
* from all queues allowed by BIOS.
796
*/
797
lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET);
798
lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET);
799
ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo);
800
ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi);
801
802
lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo;
803
804
for (; queue_idx < nitems(sc->queues); queue_idx++) {
805
error = ccp_hw_attach_queue(dev, lsbmask, queue_idx);
806
if (error != 0) {
807
device_printf(dev, "%s: couldn't attach queue %u\n",
808
__func__, queue_idx);
809
goto out;
810
}
811
}
812
ccp_assign_lsb_regions(sc, lsbmask);
813
814
out:
815
if (error != 0) {
816
if (interrupts_setup)
817
ccp_release_interrupts(sc);
818
for (j = 0; j < queue_idx; j++)
819
ccp_hw_detach_queue(dev, j);
820
if (sc->ring_size_order != 0)
821
pci_disable_busmaster(dev);
822
if (bars_mapped)
823
ccp_unmap_pci_bar(dev);
824
}
825
return (error);
826
}
827
828
void
829
ccp_hw_detach(device_t dev)
830
{
831
struct ccp_softc *sc;
832
unsigned i;
833
834
sc = device_get_softc(dev);
835
836
for (i = 0; i < nitems(sc->queues); i++)
837
ccp_hw_detach_queue(dev, i);
838
839
ccp_release_interrupts(sc);
840
pci_disable_busmaster(dev);
841
ccp_unmap_pci_bar(dev);
842
}
843
844
static int __must_check
845
ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst,
846
enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type,
847
bus_size_t len, enum ccp_passthru_byteswap swapmode,
848
enum ccp_passthru_bitwise bitmode, bool interrupt,
849
const struct ccp_completion_ctx *cctx)
850
{
851
struct ccp_desc *desc;
852
853
if (ccp_queue_get_ring_space(qp) == 0)
854
return (EAGAIN);
855
856
desc = &qp->desc_ring[qp->cq_tail];
857
858
memset(desc, 0, sizeof(*desc));
859
desc->engine = CCP_ENGINE_PASSTHRU;
860
861
desc->pt.ioc = interrupt;
862
desc->pt.byteswap = swapmode;
863
desc->pt.bitwise = bitmode;
864
desc->length = len;
865
866
desc->src_lo = (uint32_t)src;
867
desc->src_hi = src >> 32;
868
desc->src_mem = src_type;
869
870
desc->dst_lo = (uint32_t)dst;
871
desc->dst_hi = dst >> 32;
872
desc->dst_mem = dst_type;
873
874
if (bitmode != CCP_PASSTHRU_BITWISE_NOOP)
875
desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY);
876
877
if (cctx != NULL)
878
memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx));
879
880
qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
881
return (0);
882
}
883
884
static int __must_check
885
ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb,
886
struct sglist *sgl, bus_size_t len, bool interrupt,
887
const struct ccp_completion_ctx *cctx)
888
{
889
struct sglist_seg *seg;
890
size_t i, remain, nb;
891
int error;
892
893
remain = len;
894
for (i = 0; i < sgl->sg_nseg && remain != 0; i++) {
895
seg = &sgl->sg_segs[i];
896
/* crp lengths are int, so 32-bit min() is ok. */
897
nb = min(remain, seg->ss_len);
898
899
if (tolsb)
900
error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB,
901
seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb,
902
CCP_PASSTHRU_BYTESWAP_NOOP,
903
CCP_PASSTHRU_BITWISE_NOOP,
904
(nb == remain) && interrupt, cctx);
905
else
906
error = ccp_passthrough(qp, seg->ss_paddr,
907
CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb,
908
CCP_PASSTHRU_BYTESWAP_NOOP,
909
CCP_PASSTHRU_BITWISE_NOOP,
910
(nb == remain) && interrupt, cctx);
911
if (error != 0)
912
return (error);
913
914
remain -= nb;
915
}
916
return (0);
917
}
918
919
/*
920
* Note that these vectors are in reverse of the usual order.
921
*/
922
const struct SHA_vectors {
923
uint32_t SHA1[8];
924
uint32_t SHA224[8];
925
uint32_t SHA256[8];
926
uint64_t SHA384[8];
927
uint64_t SHA512[8];
928
} SHA_H __aligned(PAGE_SIZE) = {
929
.SHA1 = {
930
0xc3d2e1f0ul,
931
0x10325476ul,
932
0x98badcfeul,
933
0xefcdab89ul,
934
0x67452301ul,
935
0,
936
0,
937
0,
938
},
939
.SHA224 = {
940
0xbefa4fa4ul,
941
0x64f98fa7ul,
942
0x68581511ul,
943
0xffc00b31ul,
944
0xf70e5939ul,
945
0x3070dd17ul,
946
0x367cd507ul,
947
0xc1059ed8ul,
948
},
949
.SHA256 = {
950
0x5be0cd19ul,
951
0x1f83d9abul,
952
0x9b05688cul,
953
0x510e527ful,
954
0xa54ff53aul,
955
0x3c6ef372ul,
956
0xbb67ae85ul,
957
0x6a09e667ul,
958
},
959
.SHA384 = {
960
0x47b5481dbefa4fa4ull,
961
0xdb0c2e0d64f98fa7ull,
962
0x8eb44a8768581511ull,
963
0x67332667ffc00b31ull,
964
0x152fecd8f70e5939ull,
965
0x9159015a3070dd17ull,
966
0x629a292a367cd507ull,
967
0xcbbb9d5dc1059ed8ull,
968
},
969
.SHA512 = {
970
0x5be0cd19137e2179ull,
971
0x1f83d9abfb41bd6bull,
972
0x9b05688c2b3e6c1full,
973
0x510e527fade682d1ull,
974
0xa54ff53a5f1d36f1ull,
975
0x3c6ef372fe94f82bull,
976
0xbb67ae8584caa73bull,
977
0x6a09e667f3bcc908ull,
978
},
979
};
980
/*
981
* Ensure vectors do not cross a page boundary.
982
*
983
* Disabled due to a new Clang error: "expression is not an integral constant
984
* expression." GCC (cross toolchain) seems to handle this assertion with
985
* _Static_assert just fine.
986
*/
987
#if 0
988
CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H));
989
#endif
990
991
const struct SHA_Defn {
992
enum sha_version version;
993
const void *H_vectors;
994
size_t H_size;
995
const struct auth_hash *axf;
996
enum ccp_sha_type engine_type;
997
} SHA_definitions[] = {
998
{
999
.version = SHA1,
1000
.H_vectors = SHA_H.SHA1,
1001
.H_size = sizeof(SHA_H.SHA1),
1002
.axf = &auth_hash_hmac_sha1,
1003
.engine_type = CCP_SHA_TYPE_1,
1004
},
1005
#if 0
1006
{
1007
.version = SHA2_224,
1008
.H_vectors = SHA_H.SHA224,
1009
.H_size = sizeof(SHA_H.SHA224),
1010
.axf = &auth_hash_hmac_sha2_224,
1011
.engine_type = CCP_SHA_TYPE_224,
1012
},
1013
#endif
1014
{
1015
.version = SHA2_256,
1016
.H_vectors = SHA_H.SHA256,
1017
.H_size = sizeof(SHA_H.SHA256),
1018
.axf = &auth_hash_hmac_sha2_256,
1019
.engine_type = CCP_SHA_TYPE_256,
1020
},
1021
{
1022
.version = SHA2_384,
1023
.H_vectors = SHA_H.SHA384,
1024
.H_size = sizeof(SHA_H.SHA384),
1025
.axf = &auth_hash_hmac_sha2_384,
1026
.engine_type = CCP_SHA_TYPE_384,
1027
},
1028
{
1029
.version = SHA2_512,
1030
.H_vectors = SHA_H.SHA512,
1031
.H_size = sizeof(SHA_H.SHA512),
1032
.axf = &auth_hash_hmac_sha2_512,
1033
.engine_type = CCP_SHA_TYPE_512,
1034
},
1035
};
1036
1037
static int __must_check
1038
ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn,
1039
vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits)
1040
{
1041
struct ccp_desc *desc;
1042
1043
if (ccp_queue_get_ring_space(qp) == 0)
1044
return (EAGAIN);
1045
1046
desc = &qp->desc_ring[qp->cq_tail];
1047
1048
memset(desc, 0, sizeof(*desc));
1049
desc->engine = CCP_ENGINE_SHA;
1050
desc->som = start;
1051
desc->eom = end;
1052
1053
desc->sha.type = defn->engine_type;
1054
desc->length = len;
1055
1056
if (end) {
1057
desc->sha_len_lo = (uint32_t)msgbits;
1058
desc->sha_len_hi = msgbits >> 32;
1059
}
1060
1061
desc->src_lo = (uint32_t)addr;
1062
desc->src_hi = addr >> 32;
1063
desc->src_mem = CCP_MEMTYPE_SYSTEM;
1064
1065
desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA);
1066
1067
qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order);
1068
return (0);
1069
}
1070
1071
static int __must_check
1072
ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src,
1073
struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx)
1074
{
1075
const struct SHA_Defn *defn;
1076
struct sglist_seg *seg;
1077
size_t i, msgsize, remaining, nb;
1078
uint32_t lsbaddr;
1079
int error;
1080
1081
for (i = 0; i < nitems(SHA_definitions); i++)
1082
if (SHA_definitions[i].version == version)
1083
break;
1084
if (i == nitems(SHA_definitions))
1085
return (EINVAL);
1086
defn = &SHA_definitions[i];
1087
1088
/* XXX validate input ??? */
1089
1090
/* Load initial SHA state into LSB */
1091
/* XXX ensure H_vectors don't span page boundaries */
1092
error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA),
1093
CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors),
1094
CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE),
1095
CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false,
1096
NULL);
1097
if (error != 0)
1098
return (error);
1099
1100
/* Execute series of SHA updates on correctly sized buffers */
1101
msgsize = 0;
1102
for (i = 0; i < sgl_src->sg_nseg; i++) {
1103
seg = &sgl_src->sg_segs[i];
1104
msgsize += seg->ss_len;
1105
error = ccp_sha_single_desc(qp, defn, seg->ss_paddr,
1106
seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1,
1107
msgsize << 3);
1108
if (error != 0)
1109
return (error);
1110
}
1111
1112
/* Copy result out to sgl_dst */
1113
remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE);
1114
lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA);
1115
for (i = 0; i < sgl_dst->sg_nseg; i++) {
1116
seg = &sgl_dst->sg_segs[i];
1117
/* crp lengths are int, so 32-bit min() is ok. */
1118
nb = min(remaining, seg->ss_len);
1119
1120
error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM,
1121
lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP,
1122
CCP_PASSTHRU_BITWISE_NOOP,
1123
(cctx != NULL) ? (nb == remaining) : false,
1124
(nb == remaining) ? cctx : NULL);
1125
if (error != 0)
1126
return (error);
1127
1128
remaining -= nb;
1129
lsbaddr += nb;
1130
if (remaining == 0)
1131
break;
1132
}
1133
1134
return (0);
1135
}
1136
1137
static void
1138
byteswap256(uint64_t *buffer)
1139
{
1140
uint64_t t;
1141
1142
t = bswap64(buffer[3]);
1143
buffer[3] = bswap64(buffer[0]);
1144
buffer[0] = t;
1145
1146
t = bswap64(buffer[2]);
1147
buffer[2] = bswap64(buffer[1]);
1148
buffer[1] = t;
1149
}
1150
1151
/*
1152
* Translate CCP internal LSB hash format into a standard hash ouput.
1153
*
1154
* Manipulates input buffer with byteswap256 operation.
1155
*/
1156
static void
1157
ccp_sha_copy_result(char *output, char *buffer, enum sha_version version)
1158
{
1159
const struct SHA_Defn *defn;
1160
size_t i;
1161
1162
for (i = 0; i < nitems(SHA_definitions); i++)
1163
if (SHA_definitions[i].version == version)
1164
break;
1165
if (i == nitems(SHA_definitions))
1166
panic("bogus sha version auth_mode %u\n", (unsigned)version);
1167
1168
defn = &SHA_definitions[i];
1169
1170
/* Swap 256bit manually -- DMA engine can, but with limitations */
1171
byteswap256((void *)buffer);
1172
if (defn->axf->hashsize > LSB_ENTRY_SIZE)
1173
byteswap256((void *)(buffer + LSB_ENTRY_SIZE));
1174
1175
switch (defn->version) {
1176
case SHA1:
1177
memcpy(output, buffer + 12, defn->axf->hashsize);
1178
break;
1179
#if 0
1180
case SHA2_224:
1181
memcpy(output, buffer + XXX, defn->axf->hashsize);
1182
break;
1183
#endif
1184
case SHA2_256:
1185
memcpy(output, buffer, defn->axf->hashsize);
1186
break;
1187
case SHA2_384:
1188
memcpy(output,
1189
buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize,
1190
defn->axf->hashsize - LSB_ENTRY_SIZE);
1191
memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer,
1192
LSB_ENTRY_SIZE);
1193
break;
1194
case SHA2_512:
1195
memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE);
1196
memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE);
1197
break;
1198
}
1199
}
1200
1201
static void
1202
ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s,
1203
struct cryptop *crp, int error)
1204
{
1205
char ihash[SHA2_512_HASH_LEN /* max hash len */];
1206
union authctx auth_ctx;
1207
const struct auth_hash *axf;
1208
1209
axf = s->hmac.auth_hash;
1210
1211
s->pending--;
1212
1213
if (error != 0) {
1214
crp->crp_etype = error;
1215
goto out;
1216
}
1217
1218
/* Do remaining outer hash over small inner hash in software */
1219
axf->Init(&auth_ctx);
1220
axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize);
1221
ccp_sha_copy_result(ihash, s->hmac.res, s->hmac.auth_mode);
1222
#if 0
1223
INSECURE_DEBUG(dev, "%s sha intermediate=%64D\n", __func__,
1224
(u_char *)ihash, " ");
1225
#endif
1226
axf->Update(&auth_ctx, ihash, axf->hashsize);
1227
axf->Final(s->hmac.res, &auth_ctx);
1228
1229
if (crp->crp_op & CRYPTO_OP_VERIFY_DIGEST) {
1230
crypto_copydata(crp, crp->crp_digest_start, s->hmac.hash_len,
1231
ihash);
1232
if (timingsafe_bcmp(s->hmac.res, ihash, s->hmac.hash_len) != 0)
1233
crp->crp_etype = EBADMSG;
1234
} else
1235
crypto_copyback(crp, crp->crp_digest_start, s->hmac.hash_len,
1236
s->hmac.res);
1237
1238
/* Avoid leaking key material */
1239
explicit_bzero(&auth_ctx, sizeof(auth_ctx));
1240
explicit_bzero(s->hmac.res, sizeof(s->hmac.res));
1241
1242
out:
1243
crypto_done(crp);
1244
}
1245
1246
static void
1247
ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1248
int error)
1249
{
1250
struct cryptop *crp;
1251
1252
crp = vcrp;
1253
ccp_do_hmac_done(qp, s, crp, error);
1254
}
1255
1256
static int __must_check
1257
ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1258
const struct ccp_completion_ctx *cctx)
1259
{
1260
device_t dev;
1261
const struct auth_hash *axf;
1262
int error;
1263
1264
dev = qp->cq_softc->dev;
1265
axf = s->hmac.auth_hash;
1266
1267
/*
1268
* Populate the SGL describing inside hash contents. We want to hash
1269
* the ipad (key XOR fixed bit pattern) concatenated with the user
1270
* data.
1271
*/
1272
sglist_reset(qp->cq_sg_ulptx);
1273
error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize);
1274
if (error != 0)
1275
return (error);
1276
if (crp->crp_aad_length != 0) {
1277
error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1278
crp->crp_aad_start, crp->crp_aad_length);
1279
if (error != 0)
1280
return (error);
1281
}
1282
error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1283
crp->crp_payload_start, crp->crp_payload_length);
1284
if (error != 0) {
1285
DPRINTF(dev, "%s: sglist too short\n", __func__);
1286
return (error);
1287
}
1288
/* Populate SGL for output -- use hmac.res buffer. */
1289
sglist_reset(qp->cq_sg_dst);
1290
error = sglist_append(qp->cq_sg_dst, s->hmac.res,
1291
roundup2(axf->hashsize, LSB_ENTRY_SIZE));
1292
if (error != 0)
1293
return (error);
1294
1295
error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst,
1296
cctx);
1297
if (error != 0) {
1298
DPRINTF(dev, "%s: ccp_sha error\n", __func__);
1299
return (error);
1300
}
1301
return (0);
1302
}
1303
1304
int __must_check
1305
ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1306
{
1307
struct ccp_completion_ctx ctx;
1308
1309
ctx.callback_fn = ccp_hmac_done;
1310
ctx.callback_arg = crp;
1311
ctx.session = s;
1312
1313
return (ccp_do_hmac(qp, s, crp, &ctx));
1314
}
1315
1316
static void
1317
ccp_byteswap(char *data, size_t len)
1318
{
1319
size_t i;
1320
char t;
1321
1322
len--;
1323
for (i = 0; i < len; i++, len--) {
1324
t = data[i];
1325
data[i] = data[len];
1326
data[len] = t;
1327
}
1328
}
1329
1330
static void
1331
ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1332
int error)
1333
{
1334
struct cryptop *crp;
1335
1336
explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1337
1338
crp = vcrp;
1339
1340
s->pending--;
1341
1342
if (error != 0)
1343
crp->crp_etype = error;
1344
1345
DPRINTF(qp->cq_softc->dev, "%s: qp=%p crp=%p\n", __func__, qp, crp);
1346
crypto_done(crp);
1347
}
1348
1349
static void
1350
ccp_collect_iv(struct cryptop *crp, const struct crypto_session_params *csp,
1351
char *iv)
1352
{
1353
1354
crypto_read_iv(crp, iv);
1355
1356
/*
1357
* Append an explicit counter of 1 for GCM.
1358
*/
1359
if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16)
1360
*(uint32_t *)&iv[12] = htobe32(1);
1361
1362
if (csp->csp_cipher_alg == CRYPTO_AES_XTS &&
1363
csp->csp_ivlen < AES_BLOCK_LEN)
1364
memset(&iv[csp->csp_ivlen], 0, AES_BLOCK_LEN - csp->csp_ivlen);
1365
1366
/* Reverse order of IV material for HW */
1367
INSECURE_DEBUG(NULL, "%s: IV: %16D len: %u\n", __func__, iv, " ",
1368
csp->csp_ivlen);
1369
1370
/*
1371
* For unknown reasons, XTS mode expects the IV in the reverse byte
1372
* order to every other AES mode.
1373
*/
1374
if (csp->csp_cipher_alg != CRYPTO_AES_XTS)
1375
ccp_byteswap(iv, AES_BLOCK_LEN);
1376
}
1377
1378
static int __must_check
1379
ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src,
1380
size_t len)
1381
{
1382
int error;
1383
1384
sglist_reset(qp->cq_sg_ulptx);
1385
error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len);
1386
if (error != 0)
1387
return (error);
1388
1389
error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len,
1390
false, NULL);
1391
return (error);
1392
}
1393
1394
static int __must_check
1395
ccp_do_xts(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp,
1396
enum ccp_cipher_dir dir, const struct ccp_completion_ctx *cctx)
1397
{
1398
struct ccp_desc *desc;
1399
device_t dev;
1400
unsigned i;
1401
enum ccp_xts_unitsize usize;
1402
1403
/* IV and Key data are already loaded */
1404
1405
dev = qp->cq_softc->dev;
1406
1407
for (i = 0; i < nitems(ccp_xts_unitsize_map); i++)
1408
if (ccp_xts_unitsize_map[i].cxu_size ==
1409
crp->crp_payload_length) {
1410
usize = ccp_xts_unitsize_map[i].cxu_id;
1411
break;
1412
}
1413
if (i >= nitems(ccp_xts_unitsize_map))
1414
return (EINVAL);
1415
1416
for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1417
struct sglist_seg *seg;
1418
1419
seg = &qp->cq_sg_ulptx->sg_segs[i];
1420
1421
desc = &qp->desc_ring[qp->cq_tail];
1422
desc->engine = CCP_ENGINE_XTS_AES;
1423
desc->som = (i == 0);
1424
desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1425
desc->ioc = (desc->eom && cctx != NULL);
1426
DPRINTF(dev, "%s: XTS %u: som:%d eom:%d ioc:%d dir:%d\n",
1427
__func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1428
(int)desc->ioc, (int)dir);
1429
1430
if (desc->ioc)
1431
memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1432
sizeof(*cctx));
1433
1434
desc->aes_xts.encrypt = dir;
1435
desc->aes_xts.type = s->blkcipher.cipher_type;
1436
desc->aes_xts.size = usize;
1437
1438
DPRINTF(dev, "XXX %s: XTS %u: type:%u size:%u\n", __func__,
1439
qp->cq_tail, (unsigned)desc->aes_xts.type,
1440
(unsigned)desc->aes_xts.size);
1441
1442
desc->length = seg->ss_len;
1443
desc->src_lo = (uint32_t)seg->ss_paddr;
1444
desc->src_hi = (seg->ss_paddr >> 32);
1445
desc->src_mem = CCP_MEMTYPE_SYSTEM;
1446
1447
/* Crypt in-place */
1448
desc->dst_lo = desc->src_lo;
1449
desc->dst_hi = desc->src_hi;
1450
desc->dst_mem = desc->src_mem;
1451
1452
desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1453
desc->key_hi = 0;
1454
desc->key_mem = CCP_MEMTYPE_SB;
1455
1456
desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1457
1458
qp->cq_tail = (qp->cq_tail + 1) %
1459
(1 << qp->cq_softc->ring_size_order);
1460
}
1461
return (0);
1462
}
1463
1464
static int __must_check
1465
ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s,
1466
struct cryptop *crp, const struct ccp_completion_ctx *cctx)
1467
{
1468
const struct crypto_session_params *csp;
1469
struct ccp_desc *desc;
1470
char *keydata;
1471
device_t dev;
1472
enum ccp_cipher_dir dir;
1473
int error, iv_len;
1474
size_t keydata_len;
1475
unsigned i, j;
1476
1477
dev = qp->cq_softc->dev;
1478
1479
if (s->blkcipher.key_len == 0 || crp->crp_payload_length == 0) {
1480
DPRINTF(dev, "%s: empty\n", __func__);
1481
return (EINVAL);
1482
}
1483
if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) {
1484
DPRINTF(dev, "%s: len modulo: %d\n", __func__,
1485
crp->crp_payload_length);
1486
return (EINVAL);
1487
}
1488
1489
/*
1490
* Individual segments must be multiples of AES block size for the HW
1491
* to process it. Non-compliant inputs aren't bogus, just not doable
1492
* on this hardware.
1493
*/
1494
for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++)
1495
if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1496
DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1497
qp->cq_sg_crp->sg_segs[i].ss_len);
1498
return (EINVAL);
1499
}
1500
1501
/* Gather IV/nonce data */
1502
csp = crypto_get_params(crp->crp_session);
1503
ccp_collect_iv(crp, csp, s->blkcipher.iv);
1504
iv_len = csp->csp_ivlen;
1505
if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1506
iv_len = AES_BLOCK_LEN;
1507
1508
if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1509
dir = CCP_CIPHER_DIR_ENCRYPT;
1510
else
1511
dir = CCP_CIPHER_DIR_DECRYPT;
1512
1513
/* Set up passthrough op(s) to copy IV into LSB */
1514
error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1515
s->blkcipher.iv, iv_len);
1516
if (error != 0)
1517
return (error);
1518
1519
/*
1520
* Initialize keydata and keydata_len for GCC. The default case of the
1521
* following switch is impossible to reach, but GCC doesn't know that.
1522
*/
1523
keydata_len = 0;
1524
keydata = NULL;
1525
1526
switch (csp->csp_cipher_alg) {
1527
case CRYPTO_AES_XTS:
1528
for (j = 0; j < nitems(ccp_xts_unitsize_map); j++)
1529
if (ccp_xts_unitsize_map[j].cxu_size ==
1530
crp->crp_payload_length)
1531
break;
1532
/* Input buffer must be a supported UnitSize */
1533
if (j >= nitems(ccp_xts_unitsize_map)) {
1534
device_printf(dev, "%s: rejected block size: %u\n",
1535
__func__, crp->crp_payload_length);
1536
return (EOPNOTSUPP);
1537
}
1538
/* FALLTHROUGH */
1539
case CRYPTO_AES_CBC:
1540
case CRYPTO_AES_ICM:
1541
keydata = s->blkcipher.enckey;
1542
keydata_len = s->blkcipher.key_len;
1543
break;
1544
}
1545
1546
INSECURE_DEBUG(dev, "%s: KEY(%zu): %16D\n", __func__, keydata_len,
1547
keydata, " ");
1548
if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1549
INSECURE_DEBUG(dev, "%s: KEY(XTS): %64D\n", __func__, keydata, " ");
1550
1551
/* Reverse order of key material for HW */
1552
ccp_byteswap(keydata, keydata_len);
1553
1554
/* Store key material into LSB to avoid page boundaries */
1555
if (csp->csp_cipher_alg == CRYPTO_AES_XTS) {
1556
/*
1557
* XTS mode uses 2 256-bit vectors for the primary key and the
1558
* tweak key. For 128-bit keys, the vectors are zero-padded.
1559
*
1560
* After byteswapping the combined OCF-provided K1:K2 vector
1561
* above, we need to reverse the order again so the hardware
1562
* gets the swapped keys in the order K1':K2'.
1563
*/
1564
error = ccp_do_pst_to_lsb(qp,
1565
ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1), keydata,
1566
keydata_len / 2);
1567
if (error != 0)
1568
return (error);
1569
error = ccp_do_pst_to_lsb(qp,
1570
ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1571
keydata + (keydata_len / 2), keydata_len / 2);
1572
1573
/* Zero-pad 128 bit keys */
1574
if (keydata_len == 32) {
1575
if (error != 0)
1576
return (error);
1577
error = ccp_do_pst_to_lsb(qp,
1578
ccp_queue_lsb_address(qp, LSB_ENTRY_KEY) +
1579
keydata_len / 2, g_zeroes, keydata_len / 2);
1580
if (error != 0)
1581
return (error);
1582
error = ccp_do_pst_to_lsb(qp,
1583
ccp_queue_lsb_address(qp, LSB_ENTRY_KEY + 1) +
1584
keydata_len / 2, g_zeroes, keydata_len / 2);
1585
}
1586
} else
1587
error = ccp_do_pst_to_lsb(qp,
1588
ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), keydata,
1589
keydata_len);
1590
if (error != 0)
1591
return (error);
1592
1593
/*
1594
* Point SGLs at the subset of cryptop buffer contents representing the
1595
* data.
1596
*/
1597
sglist_reset(qp->cq_sg_ulptx);
1598
error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1599
crp->crp_payload_start, crp->crp_payload_length);
1600
if (error != 0)
1601
return (error);
1602
1603
INSECURE_DEBUG(dev, "%s: Contents: %16D\n", __func__,
1604
(void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " ");
1605
1606
DPRINTF(dev, "%s: starting AES ops @ %u\n", __func__, qp->cq_tail);
1607
1608
if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1609
return (EAGAIN);
1610
1611
if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
1612
return (ccp_do_xts(qp, s, crp, dir, cctx));
1613
1614
for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1615
struct sglist_seg *seg;
1616
1617
seg = &qp->cq_sg_ulptx->sg_segs[i];
1618
1619
desc = &qp->desc_ring[qp->cq_tail];
1620
desc->engine = CCP_ENGINE_AES;
1621
desc->som = (i == 0);
1622
desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1);
1623
desc->ioc = (desc->eom && cctx != NULL);
1624
DPRINTF(dev, "%s: AES %u: som:%d eom:%d ioc:%d dir:%d\n",
1625
__func__, qp->cq_tail, (int)desc->som, (int)desc->eom,
1626
(int)desc->ioc, (int)dir);
1627
1628
if (desc->ioc)
1629
memcpy(&qp->completions_ring[qp->cq_tail], cctx,
1630
sizeof(*cctx));
1631
1632
desc->aes.encrypt = dir;
1633
desc->aes.mode = s->blkcipher.cipher_mode;
1634
desc->aes.type = s->blkcipher.cipher_type;
1635
if (csp->csp_cipher_alg == CRYPTO_AES_ICM)
1636
/*
1637
* Size of CTR value in bits, - 1. ICM mode uses all
1638
* 128 bits as counter.
1639
*/
1640
desc->aes.size = 127;
1641
1642
DPRINTF(dev, "%s: AES %u: mode:%u type:%u size:%u\n", __func__,
1643
qp->cq_tail, (unsigned)desc->aes.mode,
1644
(unsigned)desc->aes.type, (unsigned)desc->aes.size);
1645
1646
desc->length = seg->ss_len;
1647
desc->src_lo = (uint32_t)seg->ss_paddr;
1648
desc->src_hi = (seg->ss_paddr >> 32);
1649
desc->src_mem = CCP_MEMTYPE_SYSTEM;
1650
1651
/* Crypt in-place */
1652
desc->dst_lo = desc->src_lo;
1653
desc->dst_hi = desc->src_hi;
1654
desc->dst_mem = desc->src_mem;
1655
1656
desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1657
desc->key_hi = 0;
1658
desc->key_mem = CCP_MEMTYPE_SB;
1659
1660
desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1661
1662
qp->cq_tail = (qp->cq_tail + 1) %
1663
(1 << qp->cq_softc->ring_size_order);
1664
}
1665
return (0);
1666
}
1667
1668
int __must_check
1669
ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1670
{
1671
struct ccp_completion_ctx ctx;
1672
1673
ctx.callback_fn = ccp_blkcipher_done;
1674
ctx.session = s;
1675
ctx.callback_arg = crp;
1676
1677
return (ccp_do_blkcipher(qp, s, crp, &ctx));
1678
}
1679
1680
static void
1681
ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1682
int error)
1683
{
1684
struct cryptop *crp;
1685
1686
explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1687
1688
crp = vcrp;
1689
1690
ccp_do_hmac_done(qp, s, crp, error);
1691
}
1692
1693
int __must_check
1694
ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1695
{
1696
struct ccp_completion_ctx ctx;
1697
int error;
1698
1699
ctx.callback_fn = ccp_authenc_done;
1700
ctx.session = s;
1701
ctx.callback_arg = crp;
1702
1703
/* Perform first operation */
1704
if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1705
error = ccp_do_blkcipher(qp, s, crp, NULL);
1706
else
1707
error = ccp_do_hmac(qp, s, crp, NULL);
1708
if (error != 0)
1709
return (error);
1710
1711
/* Perform second operation */
1712
if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1713
error = ccp_do_hmac(qp, s, crp, &ctx);
1714
else
1715
error = ccp_do_blkcipher(qp, s, crp, &ctx);
1716
return (error);
1717
}
1718
1719
static int __must_check
1720
ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s)
1721
{
1722
struct ccp_desc *desc;
1723
struct sglist_seg *seg;
1724
unsigned i;
1725
1726
if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg)
1727
return (EAGAIN);
1728
1729
for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1730
seg = &qp->cq_sg_ulptx->sg_segs[i];
1731
1732
desc = &qp->desc_ring[qp->cq_tail];
1733
1734
desc->engine = CCP_ENGINE_AES;
1735
desc->aes.mode = CCP_AES_MODE_GHASH;
1736
desc->aes.type = s->blkcipher.cipher_type;
1737
desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD;
1738
1739
desc->som = (i == 0);
1740
desc->length = seg->ss_len;
1741
1742
desc->src_lo = (uint32_t)seg->ss_paddr;
1743
desc->src_hi = (seg->ss_paddr >> 32);
1744
desc->src_mem = CCP_MEMTYPE_SYSTEM;
1745
1746
desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1747
1748
desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1749
desc->key_mem = CCP_MEMTYPE_SB;
1750
1751
qp->cq_tail = (qp->cq_tail + 1) %
1752
(1 << qp->cq_softc->ring_size_order);
1753
}
1754
return (0);
1755
}
1756
1757
static int __must_check
1758
ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s,
1759
enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom)
1760
{
1761
struct ccp_desc *desc;
1762
1763
if (ccp_queue_get_ring_space(qp) == 0)
1764
return (EAGAIN);
1765
1766
desc = &qp->desc_ring[qp->cq_tail];
1767
1768
desc->engine = CCP_ENGINE_AES;
1769
desc->aes.mode = CCP_AES_MODE_GCTR;
1770
desc->aes.type = s->blkcipher.cipher_type;
1771
desc->aes.encrypt = dir;
1772
desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1;
1773
1774
desc->som = som;
1775
desc->eom = eom;
1776
1777
/* Trailing bytes will be masked off by aes.size above. */
1778
desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN);
1779
1780
desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr;
1781
desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32;
1782
desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM;
1783
1784
desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1785
1786
desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1787
desc->key_mem = CCP_MEMTYPE_SB;
1788
1789
qp->cq_tail = (qp->cq_tail + 1) %
1790
(1 << qp->cq_softc->ring_size_order);
1791
return (0);
1792
}
1793
1794
static int __must_check
1795
ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s)
1796
{
1797
struct ccp_desc *desc;
1798
1799
if (ccp_queue_get_ring_space(qp) == 0)
1800
return (EAGAIN);
1801
1802
desc = &qp->desc_ring[qp->cq_tail];
1803
1804
desc->engine = CCP_ENGINE_AES;
1805
desc->aes.mode = CCP_AES_MODE_GHASH;
1806
desc->aes.type = s->blkcipher.cipher_type;
1807
desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL;
1808
1809
desc->length = GMAC_BLOCK_LEN;
1810
1811
desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN);
1812
desc->src_mem = CCP_MEMTYPE_SB;
1813
1814
desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV);
1815
1816
desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY);
1817
desc->key_mem = CCP_MEMTYPE_SB;
1818
1819
desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH);
1820
desc->dst_mem = CCP_MEMTYPE_SB;
1821
1822
qp->cq_tail = (qp->cq_tail + 1) %
1823
(1 << qp->cq_softc->ring_size_order);
1824
return (0);
1825
}
1826
1827
static void
1828
ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp,
1829
int error)
1830
{
1831
char tag[GMAC_DIGEST_LEN];
1832
struct cryptop *crp;
1833
1834
crp = vcrp;
1835
1836
s->pending--;
1837
1838
if (error != 0) {
1839
crp->crp_etype = error;
1840
goto out;
1841
}
1842
1843
/* Encrypt is done. Decrypt needs to verify tag. */
1844
if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1845
goto out;
1846
1847
/* Copy in message tag. */
1848
crypto_copydata(crp, crp->crp_digest_start, s->gmac.hash_len, tag);
1849
1850
/* Verify tag against computed GMAC */
1851
if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0)
1852
crp->crp_etype = EBADMSG;
1853
1854
out:
1855
explicit_bzero(&s->blkcipher.iv, sizeof(s->blkcipher.iv));
1856
explicit_bzero(&s->gmac.final_block, sizeof(s->gmac.final_block));
1857
crypto_done(crp);
1858
}
1859
1860
int __must_check
1861
ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp)
1862
{
1863
const struct crypto_session_params *csp;
1864
struct ccp_completion_ctx ctx;
1865
enum ccp_cipher_dir dir;
1866
device_t dev;
1867
unsigned i;
1868
int error;
1869
1870
if (s->blkcipher.key_len == 0)
1871
return (EINVAL);
1872
1873
dev = qp->cq_softc->dev;
1874
1875
if (CRYPTO_OP_IS_ENCRYPT(crp->crp_op))
1876
dir = CCP_CIPHER_DIR_ENCRYPT;
1877
else
1878
dir = CCP_CIPHER_DIR_DECRYPT;
1879
1880
/* Zero initial GHASH portion of context */
1881
memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv));
1882
1883
/* Gather IV data */
1884
csp = crypto_get_params(crp->crp_session);
1885
ccp_collect_iv(crp, csp, s->blkcipher.iv);
1886
1887
/* Reverse order of key material for HW */
1888
ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len);
1889
1890
/* Prepare input buffer of concatenated lengths for final GHASH */
1891
be64enc(s->gmac.final_block, (uint64_t)crp->crp_aad_length * 8);
1892
be64enc(&s->gmac.final_block[8], (uint64_t)crp->crp_payload_length * 8);
1893
1894
/* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */
1895
error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1896
s->blkcipher.iv, 32);
1897
if (error != 0)
1898
return (error);
1899
error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY),
1900
s->blkcipher.enckey, s->blkcipher.key_len);
1901
if (error != 0)
1902
return (error);
1903
error = ccp_do_pst_to_lsb(qp,
1904
ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block,
1905
GMAC_BLOCK_LEN);
1906
if (error != 0)
1907
return (error);
1908
1909
/* First step - compute GHASH over AAD */
1910
if (crp->crp_aad_length != 0) {
1911
sglist_reset(qp->cq_sg_ulptx);
1912
error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1913
crp->crp_aad_start, crp->crp_aad_length);
1914
if (error != 0)
1915
return (error);
1916
1917
/* This engine cannot process non-block multiple AAD data. */
1918
for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1919
if ((qp->cq_sg_ulptx->sg_segs[i].ss_len %
1920
GMAC_BLOCK_LEN) != 0) {
1921
DPRINTF(dev, "%s: AD seg modulo: %zu\n",
1922
__func__,
1923
qp->cq_sg_ulptx->sg_segs[i].ss_len);
1924
return (EINVAL);
1925
}
1926
1927
error = ccp_do_ghash_aad(qp, s);
1928
if (error != 0)
1929
return (error);
1930
}
1931
1932
/* Feed data piece by piece into GCTR */
1933
sglist_reset(qp->cq_sg_ulptx);
1934
error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1935
crp->crp_payload_start, crp->crp_payload_length);
1936
if (error != 0)
1937
return (error);
1938
1939
/*
1940
* All segments except the last must be even multiples of AES block
1941
* size for the HW to process it. Non-compliant inputs aren't bogus,
1942
* just not doable on this hardware.
1943
*
1944
* XXX: Well, the hardware will produce a valid tag for shorter final
1945
* segment inputs, but it will still write out a block-sized plaintext
1946
* or ciphertext chunk. For a typical CRP this tramples trailing data,
1947
* including the provided message tag. So, reject such inputs for now.
1948
*/
1949
for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++)
1950
if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) {
1951
DPRINTF(dev, "%s: seg modulo: %zu\n", __func__,
1952
qp->cq_sg_ulptx->sg_segs[i].ss_len);
1953
return (EINVAL);
1954
}
1955
1956
for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) {
1957
struct sglist_seg *seg;
1958
1959
seg = &qp->cq_sg_ulptx->sg_segs[i];
1960
error = ccp_do_gctr(qp, s, dir, seg,
1961
(i == 0 && crp->crp_aad_length == 0),
1962
i == (qp->cq_sg_ulptx->sg_nseg - 1));
1963
if (error != 0)
1964
return (error);
1965
}
1966
1967
/* Send just initial IV (not GHASH!) to LSB again */
1968
error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV),
1969
s->blkcipher.iv, AES_BLOCK_LEN);
1970
if (error != 0)
1971
return (error);
1972
1973
ctx.callback_fn = ccp_gcm_done;
1974
ctx.session = s;
1975
ctx.callback_arg = crp;
1976
1977
/* Compute final hash and copy result back */
1978
error = ccp_do_ghash_final(qp, s);
1979
if (error != 0)
1980
return (error);
1981
1982
/* When encrypting, copy computed tag out to caller buffer. */
1983
sglist_reset(qp->cq_sg_ulptx);
1984
if (dir == CCP_CIPHER_DIR_ENCRYPT)
1985
error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp,
1986
crp->crp_digest_start, s->gmac.hash_len);
1987
else
1988
/*
1989
* For decrypting, copy the computed tag out to our session
1990
* buffer to verify in our callback.
1991
*/
1992
error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block,
1993
s->gmac.hash_len);
1994
if (error != 0)
1995
return (error);
1996
error = ccp_passthrough_sgl(qp,
1997
ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx,
1998
s->gmac.hash_len, true, &ctx);
1999
return (error);
2000
}
2001
2002
#define MAX_TRNG_RETRIES 10
2003
u_int
2004
random_ccp_read(void *v, u_int c)
2005
{
2006
uint32_t *buf;
2007
u_int i, j;
2008
2009
KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c));
2010
2011
buf = v;
2012
for (i = c; i > 0; i -= sizeof(*buf)) {
2013
for (j = 0; j < MAX_TRNG_RETRIES; j++) {
2014
*buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET);
2015
if (*buf != 0)
2016
break;
2017
}
2018
if (j == MAX_TRNG_RETRIES)
2019
return (0);
2020
buf++;
2021
}
2022
return (c);
2023
2024
}
2025
2026
#ifdef DDB
2027
void
2028
db_ccp_show_hw(struct ccp_softc *sc)
2029
{
2030
2031
db_printf(" queue mask: 0x%x\n",
2032
ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET));
2033
db_printf(" queue prio: 0x%x\n",
2034
ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET));
2035
db_printf(" reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET));
2036
db_printf(" trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET));
2037
db_printf(" cmd timeout: 0x%x\n",
2038
ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET));
2039
db_printf(" lsb public mask lo: 0x%x\n",
2040
ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET));
2041
db_printf(" lsb public mask hi: 0x%x\n",
2042
ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET));
2043
db_printf(" lsb private mask lo: 0x%x\n",
2044
ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET));
2045
db_printf(" lsb private mask hi: 0x%x\n",
2046
ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET));
2047
db_printf(" version: 0x%x\n", ccp_read_4(sc, VERSION_REG));
2048
}
2049
2050
void
2051
db_ccp_show_queue_hw(struct ccp_queue *qp)
2052
{
2053
const struct ccp_error_code *ec;
2054
struct ccp_softc *sc;
2055
uint32_t status, error, esource, faultblock, headlo, qcontrol;
2056
unsigned q, i;
2057
2058
sc = qp->cq_softc;
2059
q = qp->cq_qindex;
2060
2061
qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE);
2062
db_printf(" qcontrol: 0x%x%s%s\n", qcontrol,
2063
(qcontrol & CMD_Q_RUN) ? " RUN" : "",
2064
(qcontrol & CMD_Q_HALTED) ? " HALTED" : "");
2065
db_printf(" tail_lo: 0x%x\n",
2066
ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE));
2067
headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE);
2068
db_printf(" head_lo: 0x%x\n", headlo);
2069
db_printf(" int enable: 0x%x\n",
2070
ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE));
2071
db_printf(" interrupt status: 0x%x\n",
2072
ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE));
2073
status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE);
2074
db_printf(" status: 0x%x\n", status);
2075
db_printf(" int stats: 0x%x\n",
2076
ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE));
2077
2078
error = status & STATUS_ERROR_MASK;
2079
if (error == 0)
2080
return;
2081
2082
esource = (status >> STATUS_ERRORSOURCE_SHIFT) &
2083
STATUS_ERRORSOURCE_MASK;
2084
faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) &
2085
STATUS_VLSB_FAULTBLOCK_MASK;
2086
2087
ec = NULL;
2088
for (i = 0; i < nitems(ccp_error_codes); i++)
2089
if (ccp_error_codes[i].ce_code == error)
2090
break;
2091
if (i < nitems(ccp_error_codes))
2092
ec = &ccp_error_codes[i];
2093
2094
db_printf(" Error: %s (%u) Source: %u Faulting LSB block: %u\n",
2095
(ec != NULL) ? ec->ce_name : "(reserved)", error, esource,
2096
faultblock);
2097
if (ec != NULL)
2098
db_printf(" Error description: %s\n", ec->ce_desc);
2099
2100
i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE;
2101
db_printf(" Bad descriptor idx: %u contents:\n %32D\n", i,
2102
(void *)&qp->desc_ring[i], " ");
2103
}
2104
#endif
2105
2106