Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/accel/habanalabs/common/command_buffer.c
26436 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
/*
4
* Copyright 2016-2019 HabanaLabs, Ltd.
5
* All Rights Reserved.
6
*/
7
8
#include <uapi/drm/habanalabs_accel.h>
9
#include "habanalabs.h"
10
11
#include <linux/mm.h>
12
#include <linux/slab.h>
13
#include <linux/uaccess.h>
14
15
#define CB_VA_POOL_SIZE (4UL * SZ_1G)
16
17
static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
18
{
19
struct hl_device *hdev = ctx->hdev;
20
struct asic_fixed_properties *prop = &hdev->asic_prop;
21
u32 page_size = prop->pmmu.page_size;
22
int rc;
23
24
if (!hdev->supports_cb_mapping) {
25
dev_err_ratelimited(hdev->dev,
26
"Mapping a CB to the device's MMU is not supported\n");
27
return -EINVAL;
28
}
29
30
if (cb->is_mmu_mapped)
31
return 0;
32
33
cb->roundup_size = roundup(cb->size, page_size);
34
35
cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size);
36
if (!cb->virtual_addr) {
37
dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n");
38
return -ENOMEM;
39
}
40
41
mutex_lock(&hdev->mmu_lock);
42
43
rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size);
44
if (rc) {
45
dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr);
46
goto err_va_pool_free;
47
}
48
49
rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV);
50
if (rc)
51
goto err_mmu_unmap;
52
53
mutex_unlock(&hdev->mmu_lock);
54
55
cb->is_mmu_mapped = true;
56
57
return 0;
58
59
err_mmu_unmap:
60
hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size);
61
err_va_pool_free:
62
mutex_unlock(&hdev->mmu_lock);
63
gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
64
65
return rc;
66
}
67
68
static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
69
{
70
struct hl_device *hdev = ctx->hdev;
71
72
mutex_lock(&hdev->mmu_lock);
73
hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size);
74
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
75
mutex_unlock(&hdev->mmu_lock);
76
77
gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
78
}
79
80
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
81
{
82
if (cb->is_internal)
83
gen_pool_free(hdev->internal_cb_pool,
84
(uintptr_t)cb->kernel_address, cb->size);
85
else
86
hl_asic_dma_free_coherent(hdev, cb->size, cb->kernel_address, cb->bus_address);
87
88
kfree(cb);
89
}
90
91
static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb)
92
{
93
if (cb->is_pool) {
94
atomic_set(&cb->is_handle_destroyed, 0);
95
spin_lock(&hdev->cb_pool_lock);
96
list_add(&cb->pool_list, &hdev->cb_pool);
97
spin_unlock(&hdev->cb_pool_lock);
98
} else {
99
cb_fini(hdev, cb);
100
}
101
}
102
103
static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
104
int ctx_id, bool internal_cb)
105
{
106
struct hl_cb *cb = NULL;
107
u32 cb_offset;
108
void *p;
109
110
/*
111
* We use of GFP_ATOMIC here because this function can be called from
112
* the latency-sensitive code path for command submission. Due to H/W
113
* limitations in some of the ASICs, the kernel must copy the user CB
114
* that is designated for an external queue and actually enqueue
115
* the kernel's copy. Hence, we must never sleep in this code section
116
* and must use GFP_ATOMIC for all memory allocations.
117
*/
118
if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled)
119
cb = kzalloc(sizeof(*cb), GFP_ATOMIC);
120
121
if (!cb)
122
cb = kzalloc(sizeof(*cb), GFP_KERNEL);
123
124
if (!cb)
125
return NULL;
126
127
if (internal_cb) {
128
p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size);
129
if (!p) {
130
kfree(cb);
131
return NULL;
132
}
133
134
cb_offset = p - hdev->internal_cb_pool_virt_addr;
135
cb->is_internal = true;
136
cb->bus_address = hdev->internal_cb_va_base + cb_offset;
137
} else if (ctx_id == HL_KERNEL_ASID_ID) {
138
p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_ATOMIC);
139
if (!p)
140
p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_KERNEL);
141
} else {
142
p = hl_asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address,
143
GFP_USER | __GFP_ZERO);
144
}
145
146
if (!p) {
147
dev_err(hdev->dev,
148
"failed to allocate %d of dma memory for CB\n",
149
cb_size);
150
kfree(cb);
151
return NULL;
152
}
153
154
cb->kernel_address = p;
155
cb->size = cb_size;
156
157
return cb;
158
}
159
160
struct hl_cb_mmap_mem_alloc_args {
161
struct hl_device *hdev;
162
struct hl_ctx *ctx;
163
u32 cb_size;
164
bool internal_cb;
165
bool map_cb;
166
};
167
168
static void hl_cb_mmap_mem_release(struct hl_mmap_mem_buf *buf)
169
{
170
struct hl_cb *cb = buf->private;
171
172
hl_debugfs_remove_cb(cb);
173
174
if (cb->is_mmu_mapped)
175
cb_unmap_mem(cb->ctx, cb);
176
177
hl_ctx_put(cb->ctx);
178
179
cb_do_release(cb->hdev, cb);
180
}
181
182
static int hl_cb_mmap_mem_alloc(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args)
183
{
184
struct hl_cb_mmap_mem_alloc_args *cb_args = args;
185
struct hl_cb *cb;
186
int rc, ctx_id = cb_args->ctx->asid;
187
bool alloc_new_cb = true;
188
189
if (!cb_args->internal_cb) {
190
/* Minimum allocation must be PAGE SIZE */
191
if (cb_args->cb_size < PAGE_SIZE)
192
cb_args->cb_size = PAGE_SIZE;
193
194
if (ctx_id == HL_KERNEL_ASID_ID &&
195
cb_args->cb_size <= cb_args->hdev->asic_prop.cb_pool_cb_size) {
196
197
spin_lock(&cb_args->hdev->cb_pool_lock);
198
if (!list_empty(&cb_args->hdev->cb_pool)) {
199
cb = list_first_entry(&cb_args->hdev->cb_pool,
200
typeof(*cb), pool_list);
201
list_del(&cb->pool_list);
202
spin_unlock(&cb_args->hdev->cb_pool_lock);
203
alloc_new_cb = false;
204
} else {
205
spin_unlock(&cb_args->hdev->cb_pool_lock);
206
dev_dbg(cb_args->hdev->dev, "CB pool is empty\n");
207
}
208
}
209
}
210
211
if (alloc_new_cb) {
212
cb = hl_cb_alloc(cb_args->hdev, cb_args->cb_size, ctx_id, cb_args->internal_cb);
213
if (!cb)
214
return -ENOMEM;
215
}
216
217
cb->hdev = cb_args->hdev;
218
cb->ctx = cb_args->ctx;
219
cb->buf = buf;
220
cb->buf->mappable_size = cb->size;
221
cb->buf->private = cb;
222
223
hl_ctx_get(cb->ctx);
224
225
if (cb_args->map_cb) {
226
if (ctx_id == HL_KERNEL_ASID_ID) {
227
dev_err(cb_args->hdev->dev,
228
"CB mapping is not supported for kernel context\n");
229
rc = -EINVAL;
230
goto release_cb;
231
}
232
233
rc = cb_map_mem(cb_args->ctx, cb);
234
if (rc)
235
goto release_cb;
236
}
237
238
hl_debugfs_add_cb(cb);
239
240
return 0;
241
242
release_cb:
243
hl_ctx_put(cb->ctx);
244
cb_do_release(cb_args->hdev, cb);
245
246
return rc;
247
}
248
249
static int hl_cb_mmap(struct hl_mmap_mem_buf *buf,
250
struct vm_area_struct *vma, void *args)
251
{
252
struct hl_cb *cb = buf->private;
253
254
return cb->hdev->asic_funcs->mmap(cb->hdev, vma, cb->kernel_address,
255
cb->bus_address, cb->size);
256
}
257
258
static struct hl_mmap_mem_buf_behavior cb_behavior = {
259
.topic = "CB",
260
.mem_id = HL_MMAP_TYPE_CB,
261
.alloc = hl_cb_mmap_mem_alloc,
262
.release = hl_cb_mmap_mem_release,
263
.mmap = hl_cb_mmap,
264
};
265
266
int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg,
267
struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
268
bool map_cb, u64 *handle)
269
{
270
struct hl_cb_mmap_mem_alloc_args args = {
271
.hdev = hdev,
272
.ctx = ctx,
273
.cb_size = cb_size,
274
.internal_cb = internal_cb,
275
.map_cb = map_cb,
276
};
277
struct hl_mmap_mem_buf *buf;
278
int ctx_id = ctx->asid;
279
280
if ((hdev->disabled) || (hdev->reset_info.in_reset && (ctx_id != HL_KERNEL_ASID_ID))) {
281
dev_warn_ratelimited(hdev->dev,
282
"Device is disabled or in reset. Can't create new CBs\n");
283
return -EBUSY;
284
}
285
286
if (cb_size > SZ_2M) {
287
dev_err(hdev->dev, "CB size %d must be less than %d\n",
288
cb_size, SZ_2M);
289
return -EINVAL;
290
}
291
292
buf = hl_mmap_mem_buf_alloc(
293
mmg, &cb_behavior,
294
ctx_id == HL_KERNEL_ASID_ID ? GFP_ATOMIC : GFP_KERNEL, &args);
295
if (!buf)
296
return -ENOMEM;
297
298
*handle = buf->handle;
299
300
return 0;
301
}
302
303
int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle)
304
{
305
struct hl_cb *cb;
306
int rc;
307
308
cb = hl_cb_get(mmg, cb_handle);
309
if (!cb) {
310
dev_dbg(mmg->dev, "CB destroy failed, no CB was found for handle %#llx\n",
311
cb_handle);
312
return -EINVAL;
313
}
314
315
/* Make sure that CB handle isn't destroyed more than once */
316
rc = atomic_cmpxchg(&cb->is_handle_destroyed, 0, 1);
317
hl_cb_put(cb);
318
if (rc) {
319
dev_dbg(mmg->dev, "CB destroy failed, handle %#llx was already destroyed\n",
320
cb_handle);
321
return -EINVAL;
322
}
323
324
rc = hl_mmap_mem_buf_put_handle(mmg, cb_handle);
325
if (rc < 0)
326
return rc; /* Invalid handle */
327
328
if (rc == 0)
329
dev_dbg(mmg->dev, "CB 0x%llx is destroyed while still in use\n", cb_handle);
330
331
return 0;
332
}
333
334
static int hl_cb_info(struct hl_mem_mgr *mmg,
335
u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va)
336
{
337
struct hl_cb *cb;
338
int rc = 0;
339
340
cb = hl_cb_get(mmg, handle);
341
if (!cb) {
342
dev_err(mmg->dev,
343
"CB info failed, no match to handle 0x%llx\n", handle);
344
return -EINVAL;
345
}
346
347
if (flags & HL_CB_FLAGS_GET_DEVICE_VA) {
348
if (cb->is_mmu_mapped) {
349
*device_va = cb->virtual_addr;
350
} else {
351
dev_err(mmg->dev, "CB is not mapped to the device's MMU\n");
352
rc = -EINVAL;
353
goto out;
354
}
355
} else {
356
*usage_cnt = atomic_read(&cb->cs_cnt);
357
}
358
359
out:
360
hl_cb_put(cb);
361
return rc;
362
}
363
364
int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
365
{
366
struct hl_fpriv *hpriv = file_priv->driver_priv;
367
struct hl_device *hdev = hpriv->hdev;
368
union hl_cb_args *args = data;
369
u64 handle = 0, device_va = 0;
370
enum hl_device_status status;
371
u32 usage_cnt = 0;
372
int rc;
373
374
if (!hl_device_operational(hdev, &status)) {
375
dev_dbg_ratelimited(hdev->dev,
376
"Device is %s. Can't execute CB IOCTL\n",
377
hdev->status[status]);
378
return -EBUSY;
379
}
380
381
switch (args->in.op) {
382
case HL_CB_OP_CREATE:
383
if (args->in.cb_size > HL_MAX_CB_SIZE) {
384
dev_err(hdev->dev,
385
"User requested CB size %d must be less than %d\n",
386
args->in.cb_size, HL_MAX_CB_SIZE);
387
rc = -EINVAL;
388
} else {
389
rc = hl_cb_create(hdev, &hpriv->mem_mgr, hpriv->ctx,
390
args->in.cb_size, false,
391
!!(args->in.flags & HL_CB_FLAGS_MAP),
392
&handle);
393
}
394
395
memset(args, 0, sizeof(*args));
396
args->out.cb_handle = handle;
397
break;
398
399
case HL_CB_OP_DESTROY:
400
rc = hl_cb_destroy(&hpriv->mem_mgr,
401
args->in.cb_handle);
402
break;
403
404
case HL_CB_OP_INFO:
405
rc = hl_cb_info(&hpriv->mem_mgr, args->in.cb_handle,
406
args->in.flags,
407
&usage_cnt,
408
&device_va);
409
if (rc)
410
break;
411
412
memset(&args->out, 0, sizeof(args->out));
413
414
if (args->in.flags & HL_CB_FLAGS_GET_DEVICE_VA)
415
args->out.device_va = device_va;
416
else
417
args->out.usage_cnt = usage_cnt;
418
break;
419
420
default:
421
rc = -EINVAL;
422
break;
423
}
424
425
return rc;
426
}
427
428
struct hl_cb *hl_cb_get(struct hl_mem_mgr *mmg, u64 handle)
429
{
430
struct hl_mmap_mem_buf *buf;
431
432
buf = hl_mmap_mem_buf_get(mmg, handle);
433
if (!buf)
434
return NULL;
435
return buf->private;
436
437
}
438
439
void hl_cb_put(struct hl_cb *cb)
440
{
441
hl_mmap_mem_buf_put(cb->buf);
442
}
443
444
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
445
bool internal_cb)
446
{
447
u64 cb_handle;
448
struct hl_cb *cb;
449
int rc;
450
451
rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, cb_size,
452
internal_cb, false, &cb_handle);
453
if (rc) {
454
dev_err(hdev->dev,
455
"Failed to allocate CB for the kernel driver %d\n", rc);
456
return NULL;
457
}
458
459
cb = hl_cb_get(&hdev->kernel_mem_mgr, cb_handle);
460
/* hl_cb_get should never fail here */
461
if (!cb) {
462
dev_crit(hdev->dev, "Kernel CB handle invalid 0x%x\n",
463
(u32) cb_handle);
464
goto destroy_cb;
465
}
466
467
return cb;
468
469
destroy_cb:
470
hl_cb_destroy(&hdev->kernel_mem_mgr, cb_handle);
471
472
return NULL;
473
}
474
475
int hl_cb_pool_init(struct hl_device *hdev)
476
{
477
struct hl_cb *cb;
478
int i;
479
480
INIT_LIST_HEAD(&hdev->cb_pool);
481
spin_lock_init(&hdev->cb_pool_lock);
482
483
for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
484
cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
485
HL_KERNEL_ASID_ID, false);
486
if (cb) {
487
cb->is_pool = true;
488
list_add(&cb->pool_list, &hdev->cb_pool);
489
} else {
490
hl_cb_pool_fini(hdev);
491
return -ENOMEM;
492
}
493
}
494
495
return 0;
496
}
497
498
int hl_cb_pool_fini(struct hl_device *hdev)
499
{
500
struct hl_cb *cb, *tmp;
501
502
list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) {
503
list_del(&cb->pool_list);
504
cb_fini(hdev, cb);
505
}
506
507
return 0;
508
}
509
510
int hl_cb_va_pool_init(struct hl_ctx *ctx)
511
{
512
struct hl_device *hdev = ctx->hdev;
513
struct asic_fixed_properties *prop = &hdev->asic_prop;
514
int rc;
515
516
if (!hdev->supports_cb_mapping)
517
return 0;
518
519
ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1);
520
if (!ctx->cb_va_pool) {
521
dev_err(hdev->dev,
522
"Failed to create VA gen pool for CB mapping\n");
523
return -ENOMEM;
524
}
525
526
ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
527
CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
528
if (!ctx->cb_va_pool_base) {
529
rc = -ENOMEM;
530
goto err_pool_destroy;
531
}
532
rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1);
533
if (rc) {
534
dev_err(hdev->dev,
535
"Failed to add memory to VA gen pool for CB mapping\n");
536
goto err_unreserve_va_block;
537
}
538
539
return 0;
540
541
err_unreserve_va_block:
542
hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
543
err_pool_destroy:
544
gen_pool_destroy(ctx->cb_va_pool);
545
546
return rc;
547
}
548
549
void hl_cb_va_pool_fini(struct hl_ctx *ctx)
550
{
551
struct hl_device *hdev = ctx->hdev;
552
553
if (!hdev->supports_cb_mapping)
554
return;
555
556
gen_pool_destroy(ctx->cb_va_pool);
557
hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
558
}
559
560