Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/gpu/drm/amd/amdkfd/kfd_events.c
50693 views
1
// SPDX-License-Identifier: GPL-2.0 OR MIT
2
/*
3
* Copyright 2014-2022 Advanced Micro Devices, Inc.
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice shall be included in
13
* all copies or substantial portions of the Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21
* OTHER DEALINGS IN THE SOFTWARE.
22
*/
23
24
#include <linux/mm_types.h>
25
#include <linux/slab.h>
26
#include <linux/types.h>
27
#include <linux/sched/signal.h>
28
#include <linux/sched/mm.h>
29
#include <linux/uaccess.h>
30
#include <linux/mman.h>
31
#include <linux/memory.h>
32
#include "kfd_priv.h"
33
#include "kfd_events.h"
34
#include "kfd_device_queue_manager.h"
35
#include <linux/device.h>
36
37
/*
38
* Wrapper around wait_queue_entry_t
39
*/
40
struct kfd_event_waiter {
41
wait_queue_entry_t wait;
42
struct kfd_event *event; /* Event to wait for */
43
bool activated; /* Becomes true when event is signaled */
44
bool event_age_enabled; /* set to true when last_event_age is non-zero */
45
};
46
47
/*
48
* Each signal event needs a 64-bit signal slot where the signaler will write
49
* a 1 before sending an interrupt. (This is needed because some interrupts
50
* do not contain enough spare data bits to identify an event.)
51
* We get whole pages and map them to the process VA.
52
* Individual signal events use their event_id as slot index.
53
*/
54
struct kfd_signal_page {
55
uint64_t *kernel_address;
56
uint64_t __user *user_address;
57
bool need_to_free_pages;
58
};
59
60
static uint64_t *page_slots(struct kfd_signal_page *page)
61
{
62
return page->kernel_address;
63
}
64
65
static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
66
{
67
void *backing_store;
68
struct kfd_signal_page *page;
69
70
page = kzalloc(sizeof(*page), GFP_KERNEL);
71
if (!page)
72
return NULL;
73
74
backing_store = (void *) __get_free_pages(GFP_KERNEL,
75
get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
76
if (!backing_store)
77
goto fail_alloc_signal_store;
78
79
/* Initialize all events to unsignaled */
80
memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT,
81
KFD_SIGNAL_EVENT_LIMIT * 8);
82
83
page->kernel_address = backing_store;
84
page->need_to_free_pages = true;
85
pr_debug("Allocated new event signal page at %p, for process %p\n",
86
page, p);
87
88
return page;
89
90
fail_alloc_signal_store:
91
kfree(page);
92
return NULL;
93
}
94
95
static int allocate_event_notification_slot(struct kfd_process *p,
96
struct kfd_event *ev,
97
const int *restore_id)
98
{
99
int id;
100
101
if (!p->signal_page) {
102
p->signal_page = allocate_signal_page(p);
103
if (!p->signal_page)
104
return -ENOMEM;
105
/* Oldest user mode expects 256 event slots */
106
p->signal_mapped_size = 256*8;
107
}
108
109
if (restore_id) {
110
id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
111
GFP_KERNEL);
112
} else {
113
/*
114
* Compatibility with old user mode: Only use signal slots
115
* user mode has mapped, may be less than
116
* KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
117
* of the event limit without breaking user mode.
118
*/
119
id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
120
GFP_KERNEL);
121
}
122
if (id < 0)
123
return id;
124
125
ev->event_id = id;
126
page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT;
127
128
return 0;
129
}
130
131
/*
132
* Assumes that p->event_mutex or rcu_readlock is held and of course that p is
133
* not going away.
134
*/
135
static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
136
{
137
return idr_find(&p->event_idr, id);
138
}
139
140
/**
141
* lookup_signaled_event_by_partial_id - Lookup signaled event from partial ID
142
* @p: Pointer to struct kfd_process
143
* @id: ID to look up
144
* @bits: Number of valid bits in @id
145
*
146
* Finds the first signaled event with a matching partial ID. If no
147
* matching signaled event is found, returns NULL. In that case the
148
* caller should assume that the partial ID is invalid and do an
149
* exhaustive search of all siglaned events.
150
*
151
* If multiple events with the same partial ID signal at the same
152
* time, they will be found one interrupt at a time, not necessarily
153
* in the same order the interrupts occurred. As long as the number of
154
* interrupts is correct, all signaled events will be seen by the
155
* driver.
156
*/
157
static struct kfd_event *lookup_signaled_event_by_partial_id(
158
struct kfd_process *p, uint32_t id, uint32_t bits)
159
{
160
struct kfd_event *ev;
161
162
if (!p->signal_page || id >= KFD_SIGNAL_EVENT_LIMIT)
163
return NULL;
164
165
/* Fast path for the common case that @id is not a partial ID
166
* and we only need a single lookup.
167
*/
168
if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) {
169
if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT)
170
return NULL;
171
172
return idr_find(&p->event_idr, id);
173
}
174
175
/* General case for partial IDs: Iterate over all matching IDs
176
* and find the first one that has signaled.
177
*/
178
for (ev = NULL; id < KFD_SIGNAL_EVENT_LIMIT && !ev; id += 1U << bits) {
179
if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT)
180
continue;
181
182
ev = idr_find(&p->event_idr, id);
183
}
184
185
return ev;
186
}
187
188
static int create_signal_event(struct file *devkfd, struct kfd_process *p,
189
struct kfd_event *ev, const int *restore_id)
190
{
191
int ret;
192
193
if (p->signal_mapped_size &&
194
p->signal_event_count == p->signal_mapped_size / 8) {
195
if (!p->signal_event_limit_reached) {
196
pr_debug("Signal event wasn't created because limit was reached\n");
197
p->signal_event_limit_reached = true;
198
}
199
return -ENOSPC;
200
}
201
202
ret = allocate_event_notification_slot(p, ev, restore_id);
203
if (ret) {
204
pr_warn("Signal event wasn't created because out of kernel memory\n");
205
return ret;
206
}
207
208
p->signal_event_count++;
209
210
ev->user_signal_address = &p->signal_page->user_address[ev->event_id];
211
pr_debug("Signal event number %zu created with id %d, address %p\n",
212
p->signal_event_count, ev->event_id,
213
ev->user_signal_address);
214
215
return 0;
216
}
217
218
static int create_other_event(struct kfd_process *p, struct kfd_event *ev, const int *restore_id)
219
{
220
int id;
221
222
if (restore_id)
223
id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
224
GFP_KERNEL);
225
else
226
/* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
227
* intentional integer overflow to -1 without a compiler
228
* warning. idr_alloc treats a negative value as "maximum
229
* signed integer".
230
*/
231
id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
232
(uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
233
GFP_KERNEL);
234
235
if (id < 0)
236
return id;
237
ev->event_id = id;
238
239
return 0;
240
}
241
242
int kfd_event_init_process(struct kfd_process *p)
243
{
244
int id;
245
246
mutex_init(&p->event_mutex);
247
idr_init(&p->event_idr);
248
p->signal_page = NULL;
249
p->signal_event_count = 1;
250
/* Allocate event ID 0. It is used for a fast path to ignore bogus events
251
* that are sent by the CP without a context ID
252
*/
253
id = idr_alloc(&p->event_idr, NULL, 0, 1, GFP_KERNEL);
254
if (id < 0) {
255
idr_destroy(&p->event_idr);
256
mutex_destroy(&p->event_mutex);
257
return id;
258
}
259
return 0;
260
}
261
262
static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
263
{
264
struct kfd_event_waiter *waiter;
265
266
/* Wake up pending waiters. They will return failure */
267
spin_lock(&ev->lock);
268
list_for_each_entry(waiter, &ev->wq.head, wait.entry)
269
WRITE_ONCE(waiter->event, NULL);
270
wake_up_all(&ev->wq);
271
spin_unlock(&ev->lock);
272
273
if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
274
ev->type == KFD_EVENT_TYPE_DEBUG)
275
p->signal_event_count--;
276
277
idr_remove(&p->event_idr, ev->event_id);
278
kfree_rcu(ev, rcu);
279
}
280
281
static void destroy_events(struct kfd_process *p)
282
{
283
struct kfd_event *ev;
284
uint32_t id;
285
286
idr_for_each_entry(&p->event_idr, ev, id)
287
if (ev)
288
destroy_event(p, ev);
289
idr_destroy(&p->event_idr);
290
mutex_destroy(&p->event_mutex);
291
}
292
293
/*
294
* We assume that the process is being destroyed and there is no need to
295
* unmap the pages or keep bookkeeping data in order.
296
*/
297
static void shutdown_signal_page(struct kfd_process *p)
298
{
299
struct kfd_signal_page *page = p->signal_page;
300
301
if (page) {
302
if (page->need_to_free_pages)
303
free_pages((unsigned long)page->kernel_address,
304
get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
305
kfree(page);
306
}
307
}
308
309
void kfd_event_free_process(struct kfd_process *p)
310
{
311
destroy_events(p);
312
shutdown_signal_page(p);
313
}
314
315
static bool event_can_be_gpu_signaled(const struct kfd_event *ev)
316
{
317
return ev->type == KFD_EVENT_TYPE_SIGNAL ||
318
ev->type == KFD_EVENT_TYPE_DEBUG;
319
}
320
321
static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
322
{
323
return ev->type == KFD_EVENT_TYPE_SIGNAL;
324
}
325
326
static int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
327
uint64_t size, uint64_t user_handle)
328
{
329
struct kfd_signal_page *page;
330
331
if (p->signal_page)
332
return -EBUSY;
333
334
page = kzalloc(sizeof(*page), GFP_KERNEL);
335
if (!page)
336
return -ENOMEM;
337
338
/* Initialize all events to unsignaled */
339
memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT,
340
KFD_SIGNAL_EVENT_LIMIT * 8);
341
342
page->kernel_address = kernel_address;
343
344
p->signal_page = page;
345
p->signal_mapped_size = size;
346
p->signal_handle = user_handle;
347
return 0;
348
}
349
350
int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
351
{
352
struct kfd_node *kfd;
353
struct kfd_process_device *pdd;
354
void *mem, *kern_addr;
355
uint64_t size;
356
int err = 0;
357
358
if (p->signal_page) {
359
pr_err("Event page is already set\n");
360
return -EINVAL;
361
}
362
363
pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(event_page_offset));
364
if (!pdd) {
365
pr_err("Getting device by id failed in %s\n", __func__);
366
return -EINVAL;
367
}
368
kfd = pdd->dev;
369
370
pdd = kfd_bind_process_to_device(kfd, p);
371
if (IS_ERR(pdd))
372
return PTR_ERR(pdd);
373
374
mem = kfd_process_device_translate_handle(pdd,
375
GET_IDR_HANDLE(event_page_offset));
376
if (!mem) {
377
pr_err("Can't find BO, offset is 0x%llx\n", event_page_offset);
378
return -EINVAL;
379
}
380
381
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(mem, &kern_addr, &size);
382
if (err) {
383
pr_err("Failed to map event page to kernel\n");
384
return err;
385
}
386
387
err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
388
if (err) {
389
pr_err("Failed to set event page\n");
390
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
391
return err;
392
}
393
return err;
394
}
395
396
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
397
uint32_t event_type, bool auto_reset, uint32_t node_id,
398
uint32_t *event_id, uint32_t *event_trigger_data,
399
uint64_t *event_page_offset, uint32_t *event_slot_index)
400
{
401
int ret = 0;
402
struct kfd_event *ev = kzalloc(sizeof(*ev), GFP_KERNEL);
403
404
if (!ev)
405
return -ENOMEM;
406
407
ev->type = event_type;
408
ev->auto_reset = auto_reset;
409
ev->signaled = false;
410
411
spin_lock_init(&ev->lock);
412
init_waitqueue_head(&ev->wq);
413
414
*event_page_offset = 0;
415
416
mutex_lock(&p->event_mutex);
417
418
switch (event_type) {
419
case KFD_EVENT_TYPE_SIGNAL:
420
case KFD_EVENT_TYPE_DEBUG:
421
ret = create_signal_event(devkfd, p, ev, NULL);
422
if (!ret) {
423
*event_page_offset = KFD_MMAP_TYPE_EVENTS;
424
*event_slot_index = ev->event_id;
425
}
426
break;
427
default:
428
ret = create_other_event(p, ev, NULL);
429
break;
430
}
431
432
if (!ret) {
433
*event_id = ev->event_id;
434
*event_trigger_data = ev->event_id;
435
ev->event_age = 1;
436
} else {
437
kfree(ev);
438
}
439
440
mutex_unlock(&p->event_mutex);
441
442
return ret;
443
}
444
445
int kfd_criu_restore_event(struct file *devkfd,
446
struct kfd_process *p,
447
uint8_t __user *user_priv_ptr,
448
uint64_t *priv_data_offset,
449
uint64_t max_priv_data_size)
450
{
451
struct kfd_criu_event_priv_data *ev_priv;
452
struct kfd_event *ev = NULL;
453
int ret = 0;
454
455
ev_priv = kmalloc(sizeof(*ev_priv), GFP_KERNEL);
456
if (!ev_priv)
457
return -ENOMEM;
458
459
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
460
if (!ev) {
461
ret = -ENOMEM;
462
goto exit;
463
}
464
465
if (*priv_data_offset + sizeof(*ev_priv) > max_priv_data_size) {
466
ret = -EINVAL;
467
goto exit;
468
}
469
470
ret = copy_from_user(ev_priv, user_priv_ptr + *priv_data_offset, sizeof(*ev_priv));
471
if (ret) {
472
ret = -EFAULT;
473
goto exit;
474
}
475
*priv_data_offset += sizeof(*ev_priv);
476
477
if (ev_priv->user_handle) {
478
ret = kfd_kmap_event_page(p, ev_priv->user_handle);
479
if (ret)
480
goto exit;
481
}
482
483
ev->type = ev_priv->type;
484
ev->auto_reset = ev_priv->auto_reset;
485
ev->signaled = ev_priv->signaled;
486
487
spin_lock_init(&ev->lock);
488
init_waitqueue_head(&ev->wq);
489
490
mutex_lock(&p->event_mutex);
491
switch (ev->type) {
492
case KFD_EVENT_TYPE_SIGNAL:
493
case KFD_EVENT_TYPE_DEBUG:
494
ret = create_signal_event(devkfd, p, ev, &ev_priv->event_id);
495
break;
496
case KFD_EVENT_TYPE_MEMORY:
497
memcpy(&ev->memory_exception_data,
498
&ev_priv->memory_exception_data,
499
sizeof(struct kfd_hsa_memory_exception_data));
500
501
ret = create_other_event(p, ev, &ev_priv->event_id);
502
break;
503
case KFD_EVENT_TYPE_HW_EXCEPTION:
504
memcpy(&ev->hw_exception_data,
505
&ev_priv->hw_exception_data,
506
sizeof(struct kfd_hsa_hw_exception_data));
507
508
ret = create_other_event(p, ev, &ev_priv->event_id);
509
break;
510
}
511
mutex_unlock(&p->event_mutex);
512
513
exit:
514
if (ret)
515
kfree(ev);
516
517
kfree(ev_priv);
518
519
return ret;
520
}
521
522
int kfd_criu_checkpoint_events(struct kfd_process *p,
523
uint8_t __user *user_priv_data,
524
uint64_t *priv_data_offset)
525
{
526
struct kfd_criu_event_priv_data *ev_privs;
527
int i = 0;
528
int ret = 0;
529
struct kfd_event *ev;
530
uint32_t ev_id;
531
532
uint32_t num_events = kfd_get_num_events(p);
533
534
if (!num_events)
535
return 0;
536
537
ev_privs = kvzalloc(num_events * sizeof(*ev_privs), GFP_KERNEL);
538
if (!ev_privs)
539
return -ENOMEM;
540
541
542
idr_for_each_entry(&p->event_idr, ev, ev_id) {
543
struct kfd_criu_event_priv_data *ev_priv;
544
545
/*
546
* Currently, all events have same size of private_data, but the current ioctl's
547
* and CRIU plugin supports private_data of variable sizes
548
*/
549
ev_priv = &ev_privs[i];
550
551
ev_priv->object_type = KFD_CRIU_OBJECT_TYPE_EVENT;
552
553
/* We store the user_handle with the first event */
554
if (i == 0 && p->signal_page)
555
ev_priv->user_handle = p->signal_handle;
556
557
ev_priv->event_id = ev->event_id;
558
ev_priv->auto_reset = ev->auto_reset;
559
ev_priv->type = ev->type;
560
ev_priv->signaled = ev->signaled;
561
562
if (ev_priv->type == KFD_EVENT_TYPE_MEMORY)
563
memcpy(&ev_priv->memory_exception_data,
564
&ev->memory_exception_data,
565
sizeof(struct kfd_hsa_memory_exception_data));
566
else if (ev_priv->type == KFD_EVENT_TYPE_HW_EXCEPTION)
567
memcpy(&ev_priv->hw_exception_data,
568
&ev->hw_exception_data,
569
sizeof(struct kfd_hsa_hw_exception_data));
570
571
pr_debug("Checkpointed event[%d] id = 0x%08x auto_reset = %x type = %x signaled = %x\n",
572
i,
573
ev_priv->event_id,
574
ev_priv->auto_reset,
575
ev_priv->type,
576
ev_priv->signaled);
577
i++;
578
}
579
580
ret = copy_to_user(user_priv_data + *priv_data_offset,
581
ev_privs, num_events * sizeof(*ev_privs));
582
if (ret) {
583
pr_err("Failed to copy events priv to user\n");
584
ret = -EFAULT;
585
}
586
587
*priv_data_offset += num_events * sizeof(*ev_privs);
588
589
kvfree(ev_privs);
590
return ret;
591
}
592
593
int kfd_get_num_events(struct kfd_process *p)
594
{
595
struct kfd_event *ev;
596
uint32_t id;
597
u32 num_events = 0;
598
599
idr_for_each_entry(&p->event_idr, ev, id)
600
num_events++;
601
602
return num_events;
603
}
604
605
/* Assumes that p is current. */
606
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
607
{
608
struct kfd_event *ev;
609
int ret = 0;
610
611
mutex_lock(&p->event_mutex);
612
613
ev = lookup_event_by_id(p, event_id);
614
615
if (ev)
616
destroy_event(p, ev);
617
else
618
ret = -EINVAL;
619
620
mutex_unlock(&p->event_mutex);
621
return ret;
622
}
623
624
static void set_event(struct kfd_event *ev)
625
{
626
struct kfd_event_waiter *waiter;
627
628
/* Auto reset if the list is non-empty and we're waking
629
* someone. waitqueue_active is safe here because we're
630
* protected by the ev->lock, which is also held when
631
* updating the wait queues in kfd_wait_on_events.
632
*/
633
ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq);
634
if (!(++ev->event_age)) {
635
/* Never wrap back to reserved/default event age 0/1 */
636
ev->event_age = 2;
637
WARN_ONCE(1, "event_age wrap back!");
638
}
639
640
list_for_each_entry(waiter, &ev->wq.head, wait.entry)
641
WRITE_ONCE(waiter->activated, true);
642
643
wake_up_all(&ev->wq);
644
}
645
646
/* Assumes that p is current. */
647
int kfd_set_event(struct kfd_process *p, uint32_t event_id)
648
{
649
int ret = 0;
650
struct kfd_event *ev;
651
652
rcu_read_lock();
653
654
ev = lookup_event_by_id(p, event_id);
655
if (!ev) {
656
ret = -EINVAL;
657
goto unlock_rcu;
658
}
659
spin_lock(&ev->lock);
660
661
if (event_can_be_cpu_signaled(ev))
662
set_event(ev);
663
else
664
ret = -EINVAL;
665
666
spin_unlock(&ev->lock);
667
unlock_rcu:
668
rcu_read_unlock();
669
return ret;
670
}
671
672
static void reset_event(struct kfd_event *ev)
673
{
674
ev->signaled = false;
675
}
676
677
/* Assumes that p is current. */
678
int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
679
{
680
int ret = 0;
681
struct kfd_event *ev;
682
683
rcu_read_lock();
684
685
ev = lookup_event_by_id(p, event_id);
686
if (!ev) {
687
ret = -EINVAL;
688
goto unlock_rcu;
689
}
690
spin_lock(&ev->lock);
691
692
if (event_can_be_cpu_signaled(ev))
693
reset_event(ev);
694
else
695
ret = -EINVAL;
696
697
spin_unlock(&ev->lock);
698
unlock_rcu:
699
rcu_read_unlock();
700
return ret;
701
702
}
703
704
static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
705
{
706
WRITE_ONCE(page_slots(p->signal_page)[ev->event_id], UNSIGNALED_EVENT_SLOT);
707
}
708
709
static void set_event_from_interrupt(struct kfd_process *p,
710
struct kfd_event *ev)
711
{
712
if (ev && event_can_be_gpu_signaled(ev)) {
713
acknowledge_signal(p, ev);
714
spin_lock(&ev->lock);
715
set_event(ev);
716
spin_unlock(&ev->lock);
717
}
718
}
719
720
void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
721
uint32_t valid_id_bits)
722
{
723
struct kfd_event *ev = NULL;
724
725
/*
726
* Because we are called from arbitrary context (workqueue) as opposed
727
* to process context, kfd_process could attempt to exit while we are
728
* running so the lookup function increments the process ref count.
729
*/
730
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
731
732
if (!p)
733
return; /* Presumably process exited. */
734
735
rcu_read_lock();
736
737
if (valid_id_bits)
738
ev = lookup_signaled_event_by_partial_id(p, partial_id,
739
valid_id_bits);
740
if (ev) {
741
set_event_from_interrupt(p, ev);
742
} else if (p->signal_page) {
743
/*
744
* Partial ID lookup failed. Assume that the event ID
745
* in the interrupt payload was invalid and do an
746
* exhaustive search of signaled events.
747
*/
748
uint64_t *slots = page_slots(p->signal_page);
749
uint32_t id;
750
751
if (valid_id_bits)
752
pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
753
partial_id, valid_id_bits);
754
755
if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT / 64) {
756
/* With relatively few events, it's faster to
757
* iterate over the event IDR
758
*/
759
idr_for_each_entry(&p->event_idr, ev, id) {
760
if (id >= KFD_SIGNAL_EVENT_LIMIT)
761
break;
762
763
if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT)
764
set_event_from_interrupt(p, ev);
765
}
766
} else {
767
/* With relatively many events, it's faster to
768
* iterate over the signal slots and lookup
769
* only signaled events from the IDR.
770
*/
771
for (id = 1; id < KFD_SIGNAL_EVENT_LIMIT; id++)
772
if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) {
773
ev = lookup_event_by_id(p, id);
774
set_event_from_interrupt(p, ev);
775
}
776
}
777
}
778
779
rcu_read_unlock();
780
kfd_unref_process(p);
781
}
782
783
static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
784
{
785
struct kfd_event_waiter *event_waiters;
786
uint32_t i;
787
788
event_waiters = kcalloc(num_events, sizeof(struct kfd_event_waiter),
789
GFP_KERNEL);
790
if (!event_waiters)
791
return NULL;
792
793
for (i = 0; i < num_events; i++)
794
init_wait(&event_waiters[i].wait);
795
796
return event_waiters;
797
}
798
799
static int init_event_waiter(struct kfd_process *p,
800
struct kfd_event_waiter *waiter,
801
struct kfd_event_data *event_data)
802
{
803
struct kfd_event *ev = lookup_event_by_id(p, event_data->event_id);
804
805
if (!ev)
806
return -EINVAL;
807
808
spin_lock(&ev->lock);
809
waiter->event = ev;
810
waiter->activated = ev->signaled;
811
ev->signaled = ev->signaled && !ev->auto_reset;
812
813
/* last_event_age = 0 reserved for backward compatible */
814
if (waiter->event->type == KFD_EVENT_TYPE_SIGNAL &&
815
event_data->signal_event_data.last_event_age) {
816
waiter->event_age_enabled = true;
817
if (ev->event_age != event_data->signal_event_data.last_event_age)
818
waiter->activated = true;
819
}
820
821
if (!waiter->activated)
822
add_wait_queue(&ev->wq, &waiter->wait);
823
spin_unlock(&ev->lock);
824
825
return 0;
826
}
827
828
/* test_event_condition - Test condition of events being waited for
829
* @all: Return completion only if all events have signaled
830
* @num_events: Number of events to wait for
831
* @event_waiters: Array of event waiters, one per event
832
*
833
* Returns KFD_IOC_WAIT_RESULT_COMPLETE if all (or one) event(s) have
834
* signaled. Returns KFD_IOC_WAIT_RESULT_TIMEOUT if no (or not all)
835
* events have signaled. Returns KFD_IOC_WAIT_RESULT_FAIL if any of
836
* the events have been destroyed.
837
*/
838
static uint32_t test_event_condition(bool all, uint32_t num_events,
839
struct kfd_event_waiter *event_waiters)
840
{
841
uint32_t i;
842
uint32_t activated_count = 0;
843
844
for (i = 0; i < num_events; i++) {
845
if (!READ_ONCE(event_waiters[i].event))
846
return KFD_IOC_WAIT_RESULT_FAIL;
847
848
if (READ_ONCE(event_waiters[i].activated)) {
849
if (!all)
850
return KFD_IOC_WAIT_RESULT_COMPLETE;
851
852
activated_count++;
853
}
854
}
855
856
return activated_count == num_events ?
857
KFD_IOC_WAIT_RESULT_COMPLETE : KFD_IOC_WAIT_RESULT_TIMEOUT;
858
}
859
860
/*
861
* Copy event specific data, if defined.
862
* Currently only memory exception events have additional data to copy to user
863
*/
864
static int copy_signaled_event_data(uint32_t num_events,
865
struct kfd_event_waiter *event_waiters,
866
struct kfd_event_data __user *data)
867
{
868
void *src;
869
void __user *dst;
870
struct kfd_event_waiter *waiter;
871
struct kfd_event *event;
872
uint32_t i, size = 0;
873
874
for (i = 0; i < num_events; i++) {
875
waiter = &event_waiters[i];
876
event = waiter->event;
877
if (!event)
878
return -EINVAL; /* event was destroyed */
879
if (waiter->activated) {
880
if (event->type == KFD_EVENT_TYPE_MEMORY) {
881
dst = &data[i].memory_exception_data;
882
src = &event->memory_exception_data;
883
size = sizeof(struct kfd_hsa_memory_exception_data);
884
} else if (event->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
885
dst = &data[i].memory_exception_data;
886
src = &event->hw_exception_data;
887
size = sizeof(struct kfd_hsa_hw_exception_data);
888
} else if (event->type == KFD_EVENT_TYPE_SIGNAL &&
889
waiter->event_age_enabled) {
890
dst = &data[i].signal_event_data.last_event_age;
891
src = &event->event_age;
892
size = sizeof(u64);
893
}
894
if (size && copy_to_user(dst, src, size))
895
return -EFAULT;
896
}
897
}
898
899
return 0;
900
}
901
902
static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
903
{
904
if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE)
905
return 0;
906
907
if (user_timeout_ms == KFD_EVENT_TIMEOUT_INFINITE)
908
return MAX_SCHEDULE_TIMEOUT;
909
910
/*
911
* msecs_to_jiffies interprets all values above 2^31-1 as infinite,
912
* but we consider them finite.
913
* This hack is wrong, but nobody is likely to notice.
914
*/
915
user_timeout_ms = min_t(uint32_t, user_timeout_ms, 0x7FFFFFFF);
916
917
return msecs_to_jiffies(user_timeout_ms) + 1;
918
}
919
920
static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters,
921
bool undo_auto_reset)
922
{
923
uint32_t i;
924
925
for (i = 0; i < num_events; i++)
926
if (waiters[i].event) {
927
spin_lock(&waiters[i].event->lock);
928
remove_wait_queue(&waiters[i].event->wq,
929
&waiters[i].wait);
930
if (undo_auto_reset && waiters[i].activated &&
931
waiters[i].event && waiters[i].event->auto_reset)
932
set_event(waiters[i].event);
933
spin_unlock(&waiters[i].event->lock);
934
}
935
936
kfree(waiters);
937
}
938
939
int kfd_wait_on_events(struct kfd_process *p,
940
uint32_t num_events, void __user *data,
941
bool all, uint32_t *user_timeout_ms,
942
uint32_t *wait_result)
943
{
944
struct kfd_event_data __user *events =
945
(struct kfd_event_data __user *) data;
946
uint32_t i;
947
int ret = 0;
948
949
struct kfd_event_waiter *event_waiters = NULL;
950
long timeout = user_timeout_to_jiffies(*user_timeout_ms);
951
952
event_waiters = alloc_event_waiters(num_events);
953
if (!event_waiters) {
954
ret = -ENOMEM;
955
goto out;
956
}
957
958
/* Use p->event_mutex here to protect against concurrent creation and
959
* destruction of events while we initialize event_waiters.
960
*/
961
mutex_lock(&p->event_mutex);
962
963
for (i = 0; i < num_events; i++) {
964
struct kfd_event_data event_data;
965
966
if (copy_from_user(&event_data, &events[i],
967
sizeof(struct kfd_event_data))) {
968
ret = -EFAULT;
969
goto out_unlock;
970
}
971
972
ret = init_event_waiter(p, &event_waiters[i], &event_data);
973
if (ret)
974
goto out_unlock;
975
}
976
977
/* Check condition once. */
978
*wait_result = test_event_condition(all, num_events, event_waiters);
979
if (*wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) {
980
ret = copy_signaled_event_data(num_events,
981
event_waiters, events);
982
goto out_unlock;
983
} else if (WARN_ON(*wait_result == KFD_IOC_WAIT_RESULT_FAIL)) {
984
/* This should not happen. Events shouldn't be
985
* destroyed while we're holding the event_mutex
986
*/
987
goto out_unlock;
988
}
989
990
mutex_unlock(&p->event_mutex);
991
992
while (true) {
993
if (fatal_signal_pending(current)) {
994
ret = -EINTR;
995
break;
996
}
997
998
if (signal_pending(current)) {
999
ret = -ERESTARTSYS;
1000
if (*user_timeout_ms != KFD_EVENT_TIMEOUT_IMMEDIATE &&
1001
*user_timeout_ms != KFD_EVENT_TIMEOUT_INFINITE)
1002
*user_timeout_ms = jiffies_to_msecs(
1003
max(0l, timeout-1));
1004
break;
1005
}
1006
1007
/* Set task state to interruptible sleep before
1008
* checking wake-up conditions. A concurrent wake-up
1009
* will put the task back into runnable state. In that
1010
* case schedule_timeout will not put the task to
1011
* sleep and we'll get a chance to re-check the
1012
* updated conditions almost immediately. Otherwise,
1013
* this race condition would lead to a soft hang or a
1014
* very long sleep.
1015
*/
1016
set_current_state(TASK_INTERRUPTIBLE);
1017
1018
*wait_result = test_event_condition(all, num_events,
1019
event_waiters);
1020
if (*wait_result != KFD_IOC_WAIT_RESULT_TIMEOUT)
1021
break;
1022
1023
if (timeout <= 0)
1024
break;
1025
1026
timeout = schedule_timeout(timeout);
1027
}
1028
__set_current_state(TASK_RUNNING);
1029
1030
mutex_lock(&p->event_mutex);
1031
/* copy_signaled_event_data may sleep. So this has to happen
1032
* after the task state is set back to RUNNING.
1033
*
1034
* The event may also have been destroyed after signaling. So
1035
* copy_signaled_event_data also must confirm that the event
1036
* still exists. Therefore this must be under the p->event_mutex
1037
* which is also held when events are destroyed.
1038
*/
1039
if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
1040
ret = copy_signaled_event_data(num_events,
1041
event_waiters, events);
1042
1043
out_unlock:
1044
free_waiters(num_events, event_waiters, ret == -ERESTARTSYS);
1045
mutex_unlock(&p->event_mutex);
1046
out:
1047
if (ret)
1048
*wait_result = KFD_IOC_WAIT_RESULT_FAIL;
1049
else if (*wait_result == KFD_IOC_WAIT_RESULT_FAIL)
1050
ret = -EIO;
1051
1052
return ret;
1053
}
1054
1055
int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
1056
{
1057
unsigned long pfn;
1058
struct kfd_signal_page *page;
1059
int ret;
1060
1061
/* check required size doesn't exceed the allocated size */
1062
if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) <
1063
get_order(vma->vm_end - vma->vm_start)) {
1064
pr_err("Event page mmap requested illegal size\n");
1065
return -EINVAL;
1066
}
1067
1068
page = p->signal_page;
1069
if (!page) {
1070
/* Probably KFD bug, but mmap is user-accessible. */
1071
pr_debug("Signal page could not be found\n");
1072
return -EINVAL;
1073
}
1074
1075
pfn = __pa(page->kernel_address);
1076
pfn >>= PAGE_SHIFT;
1077
1078
vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE
1079
| VM_DONTDUMP | VM_PFNMAP);
1080
1081
pr_debug("Mapping signal page\n");
1082
pr_debug(" start user address == 0x%08lx\n", vma->vm_start);
1083
pr_debug(" end user address == 0x%08lx\n", vma->vm_end);
1084
pr_debug(" pfn == 0x%016lX\n", pfn);
1085
pr_debug(" vm_flags == 0x%08lX\n", vma->vm_flags);
1086
pr_debug(" size == 0x%08lX\n",
1087
vma->vm_end - vma->vm_start);
1088
1089
page->user_address = (uint64_t __user *)vma->vm_start;
1090
1091
/* mapping the page to user process */
1092
ret = remap_pfn_range(vma, vma->vm_start, pfn,
1093
vma->vm_end - vma->vm_start, vma->vm_page_prot);
1094
if (!ret)
1095
p->signal_mapped_size = vma->vm_end - vma->vm_start;
1096
1097
return ret;
1098
}
1099
1100
/*
1101
* Assumes that p is not going away.
1102
*/
1103
static void lookup_events_by_type_and_signal(struct kfd_process *p,
1104
int type, void *event_data)
1105
{
1106
struct kfd_hsa_memory_exception_data *ev_data;
1107
struct kfd_event *ev;
1108
uint32_t id;
1109
bool send_signal = true;
1110
1111
ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
1112
1113
rcu_read_lock();
1114
1115
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
1116
idr_for_each_entry_continue(&p->event_idr, ev, id)
1117
if (ev->type == type) {
1118
send_signal = false;
1119
dev_dbg(kfd_device,
1120
"Event found: id %X type %d",
1121
ev->event_id, ev->type);
1122
spin_lock(&ev->lock);
1123
set_event(ev);
1124
if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data)
1125
ev->memory_exception_data = *ev_data;
1126
spin_unlock(&ev->lock);
1127
}
1128
1129
if (type == KFD_EVENT_TYPE_MEMORY) {
1130
dev_warn(kfd_device,
1131
"Sending SIGSEGV to process pid %d",
1132
p->lead_thread->pid);
1133
send_sig(SIGSEGV, p->lead_thread, 0);
1134
}
1135
1136
/* Send SIGTERM no event of type "type" has been found*/
1137
if (send_signal) {
1138
if (send_sigterm) {
1139
dev_warn(kfd_device,
1140
"Sending SIGTERM to process pid %d",
1141
p->lead_thread->pid);
1142
send_sig(SIGTERM, p->lead_thread, 0);
1143
} else {
1144
dev_err(kfd_device,
1145
"Process pid %d got unhandled exception",
1146
p->lead_thread->pid);
1147
}
1148
}
1149
1150
rcu_read_unlock();
1151
}
1152
1153
void kfd_signal_hw_exception_event(u32 pasid)
1154
{
1155
/*
1156
* Because we are called from arbitrary context (workqueue) as opposed
1157
* to process context, kfd_process could attempt to exit while we are
1158
* running so the lookup function increments the process ref count.
1159
*/
1160
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
1161
1162
if (!p)
1163
return; /* Presumably process exited. */
1164
1165
lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
1166
kfd_unref_process(p);
1167
}
1168
1169
void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va)
1170
{
1171
struct kfd_process_device *pdd;
1172
struct kfd_hsa_memory_exception_data exception_data;
1173
int i;
1174
1175
memset(&exception_data, 0, sizeof(exception_data));
1176
exception_data.va = gpu_va;
1177
exception_data.failure.NotPresent = 1;
1178
1179
// Send VM seg fault to all kfd process device
1180
for (i = 0; i < p->n_pdds; i++) {
1181
pdd = p->pdds[i];
1182
exception_data.gpu_id = pdd->user_gpu_id;
1183
kfd_evict_process_device(pdd);
1184
kfd_signal_vm_fault_event(pdd, NULL, &exception_data);
1185
}
1186
}
1187
1188
void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
1189
struct kfd_vm_fault_info *info,
1190
struct kfd_hsa_memory_exception_data *data)
1191
{
1192
struct kfd_event *ev;
1193
uint32_t id;
1194
struct kfd_process *p = pdd->process;
1195
struct kfd_hsa_memory_exception_data memory_exception_data;
1196
int user_gpu_id;
1197
1198
user_gpu_id = kfd_process_get_user_gpu_id(p, pdd->dev->id);
1199
if (unlikely(user_gpu_id == -EINVAL)) {
1200
WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n",
1201
pdd->dev->id);
1202
return;
1203
}
1204
1205
/* SoC15 chips and onwards will pass in data from now on. */
1206
if (!data) {
1207
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
1208
memory_exception_data.gpu_id = user_gpu_id;
1209
memory_exception_data.failure.imprecise = true;
1210
1211
/* Set failure reason */
1212
if (info) {
1213
memory_exception_data.va = (info->page_addr) <<
1214
PAGE_SHIFT;
1215
memory_exception_data.failure.NotPresent =
1216
info->prot_valid ? 1 : 0;
1217
memory_exception_data.failure.NoExecute =
1218
info->prot_exec ? 1 : 0;
1219
memory_exception_data.failure.ReadOnly =
1220
info->prot_write ? 1 : 0;
1221
memory_exception_data.failure.imprecise = 0;
1222
}
1223
}
1224
1225
rcu_read_lock();
1226
1227
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
1228
idr_for_each_entry_continue(&p->event_idr, ev, id)
1229
if (ev->type == KFD_EVENT_TYPE_MEMORY) {
1230
spin_lock(&ev->lock);
1231
ev->memory_exception_data = data ? *data :
1232
memory_exception_data;
1233
set_event(ev);
1234
spin_unlock(&ev->lock);
1235
}
1236
1237
rcu_read_unlock();
1238
}
1239
1240
void kfd_signal_reset_event(struct kfd_node *dev)
1241
{
1242
struct kfd_hsa_hw_exception_data hw_exception_data;
1243
struct kfd_hsa_memory_exception_data memory_exception_data;
1244
struct kfd_process *p;
1245
struct kfd_event *ev;
1246
unsigned int temp;
1247
uint32_t id, idx;
1248
int reset_cause = atomic_read(&dev->sram_ecc_flag) ?
1249
KFD_HW_EXCEPTION_ECC :
1250
KFD_HW_EXCEPTION_GPU_HANG;
1251
1252
/* Whole gpu reset caused by GPU hang and memory is lost */
1253
memset(&hw_exception_data, 0, sizeof(hw_exception_data));
1254
hw_exception_data.memory_lost = 1;
1255
hw_exception_data.reset_cause = reset_cause;
1256
1257
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
1258
memory_exception_data.ErrorType = KFD_MEM_ERR_SRAM_ECC;
1259
memory_exception_data.failure.imprecise = true;
1260
1261
idx = srcu_read_lock(&kfd_processes_srcu);
1262
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
1263
int user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
1264
struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p);
1265
1266
if (unlikely(user_gpu_id == -EINVAL)) {
1267
WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
1268
continue;
1269
}
1270
1271
if (unlikely(!pdd)) {
1272
WARN_ONCE(1, "Could not get device data from process pid:%d\n",
1273
p->lead_thread->pid);
1274
continue;
1275
}
1276
1277
if (dev->dqm->detect_hang_count && !pdd->has_reset_queue)
1278
continue;
1279
1280
if (dev->dqm->detect_hang_count) {
1281
struct amdgpu_task_info *ti;
1282
struct amdgpu_fpriv *drv_priv;
1283
1284
if (unlikely(amdgpu_file_to_fpriv(pdd->drm_file, &drv_priv))) {
1285
WARN_ONCE(1, "Could not get vm for device %x from pid:%d\n",
1286
dev->id, p->lead_thread->pid);
1287
continue;
1288
}
1289
1290
ti = amdgpu_vm_get_task_info_vm(&drv_priv->vm);
1291
if (ti) {
1292
dev_err(dev->adev->dev,
1293
"Queues reset on process %s tid %d thread %s pid %d\n",
1294
ti->process_name, ti->tgid, ti->task.comm, ti->task.pid);
1295
amdgpu_vm_put_task_info(ti);
1296
}
1297
}
1298
1299
rcu_read_lock();
1300
1301
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
1302
idr_for_each_entry_continue(&p->event_idr, ev, id) {
1303
if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
1304
spin_lock(&ev->lock);
1305
ev->hw_exception_data = hw_exception_data;
1306
ev->hw_exception_data.gpu_id = user_gpu_id;
1307
set_event(ev);
1308
spin_unlock(&ev->lock);
1309
}
1310
if (ev->type == KFD_EVENT_TYPE_MEMORY &&
1311
reset_cause == KFD_HW_EXCEPTION_ECC) {
1312
spin_lock(&ev->lock);
1313
ev->memory_exception_data = memory_exception_data;
1314
ev->memory_exception_data.gpu_id = user_gpu_id;
1315
set_event(ev);
1316
spin_unlock(&ev->lock);
1317
}
1318
}
1319
1320
rcu_read_unlock();
1321
}
1322
srcu_read_unlock(&kfd_processes_srcu, idx);
1323
}
1324
1325
void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
1326
{
1327
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
1328
struct kfd_hsa_memory_exception_data memory_exception_data;
1329
struct kfd_hsa_hw_exception_data hw_exception_data;
1330
struct kfd_event *ev;
1331
uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
1332
int user_gpu_id;
1333
1334
if (!p) {
1335
dev_warn(dev->adev->dev, "Not find process with pasid:%d\n", pasid);
1336
return; /* Presumably process exited. */
1337
}
1338
1339
user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
1340
if (unlikely(user_gpu_id == -EINVAL)) {
1341
WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
1342
kfd_unref_process(p);
1343
return;
1344
}
1345
1346
memset(&hw_exception_data, 0, sizeof(hw_exception_data));
1347
hw_exception_data.gpu_id = user_gpu_id;
1348
hw_exception_data.memory_lost = 1;
1349
hw_exception_data.reset_cause = KFD_HW_EXCEPTION_ECC;
1350
1351
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
1352
memory_exception_data.ErrorType = KFD_MEM_ERR_POISON_CONSUMED;
1353
memory_exception_data.gpu_id = user_gpu_id;
1354
memory_exception_data.failure.imprecise = true;
1355
1356
rcu_read_lock();
1357
1358
idr_for_each_entry_continue(&p->event_idr, ev, id) {
1359
if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
1360
spin_lock(&ev->lock);
1361
ev->hw_exception_data = hw_exception_data;
1362
set_event(ev);
1363
spin_unlock(&ev->lock);
1364
}
1365
1366
if (ev->type == KFD_EVENT_TYPE_MEMORY) {
1367
spin_lock(&ev->lock);
1368
ev->memory_exception_data = memory_exception_data;
1369
set_event(ev);
1370
spin_unlock(&ev->lock);
1371
}
1372
}
1373
1374
dev_warn(dev->adev->dev, "Send SIGBUS to process %s(pasid:%d)\n",
1375
p->lead_thread->comm, pasid);
1376
rcu_read_unlock();
1377
1378
/* user application will handle SIGBUS signal */
1379
send_sig(SIGBUS, p->lead_thread, 0);
1380
1381
kfd_unref_process(p);
1382
}
1383
1384