Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/accel/habanalabs/common/command_submission.c
26436 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
/*
4
* Copyright 2016-2021 HabanaLabs, Ltd.
5
* All Rights Reserved.
6
*/
7
8
#include <uapi/drm/habanalabs_accel.h>
9
#include "habanalabs.h"
10
11
#include <linux/uaccess.h>
12
#include <linux/slab.h>
13
14
#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
15
HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
16
HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \
17
HL_CS_FLAGS_ENGINES_COMMAND | HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
18
19
20
#define MAX_TS_ITER_NUM 100
21
22
/**
23
* enum hl_cs_wait_status - cs wait status
24
* @CS_WAIT_STATUS_BUSY: cs was not completed yet
25
* @CS_WAIT_STATUS_COMPLETED: cs completed
26
* @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
27
*/
28
enum hl_cs_wait_status {
29
CS_WAIT_STATUS_BUSY,
30
CS_WAIT_STATUS_COMPLETED,
31
CS_WAIT_STATUS_GONE
32
};
33
34
/*
35
* Data used while handling wait/timestamp nodes.
36
* The purpose of this struct is to store the needed data for both operations
37
* in one variable instead of passing large number of arguments to functions.
38
*/
39
struct wait_interrupt_data {
40
struct hl_user_interrupt *interrupt;
41
struct hl_mmap_mem_buf *buf;
42
struct hl_mem_mgr *mmg;
43
struct hl_cb *cq_cb;
44
u64 ts_handle;
45
u64 ts_offset;
46
u64 cq_handle;
47
u64 cq_offset;
48
u64 target_value;
49
u64 intr_timeout_us;
50
};
51
52
static void job_wq_completion(struct work_struct *work);
53
static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
54
enum hl_cs_wait_status *status, s64 *timestamp);
55
static void cs_do_release(struct kref *ref);
56
57
static void hl_push_cs_outcome(struct hl_device *hdev,
58
struct hl_cs_outcome_store *outcome_store,
59
u64 seq, ktime_t ts, int error)
60
{
61
struct hl_cs_outcome *node;
62
unsigned long flags;
63
64
/*
65
* CS outcome store supports the following operations:
66
* push outcome - store a recent CS outcome in the store
67
* pop outcome - retrieve a SPECIFIC (by seq) CS outcome from the store
68
* It uses 2 lists: used list and free list.
69
* It has a pre-allocated amount of nodes, each node stores
70
* a single CS outcome.
71
* Initially, all the nodes are in the free list.
72
* On push outcome, a node (any) is taken from the free list, its
73
* information is filled in, and the node is moved to the used list.
74
* It is possible, that there are no nodes left in the free list.
75
* In this case, we will lose some information about old outcomes. We
76
* will pop the OLDEST node from the used list, and make it free.
77
* On pop, the node is searched for in the used list (using a search
78
* index).
79
* If found, the node is then removed from the used list, and moved
80
* back to the free list. The outcome data that the node contained is
81
* returned back to the user.
82
*/
83
84
spin_lock_irqsave(&outcome_store->db_lock, flags);
85
86
if (list_empty(&outcome_store->free_list)) {
87
node = list_last_entry(&outcome_store->used_list,
88
struct hl_cs_outcome, list_link);
89
hash_del(&node->map_link);
90
dev_dbg(hdev->dev, "CS %llu outcome was lost\n", node->seq);
91
} else {
92
node = list_last_entry(&outcome_store->free_list,
93
struct hl_cs_outcome, list_link);
94
}
95
96
list_del_init(&node->list_link);
97
98
node->seq = seq;
99
node->ts = ts;
100
node->error = error;
101
102
list_add(&node->list_link, &outcome_store->used_list);
103
hash_add(outcome_store->outcome_map, &node->map_link, node->seq);
104
105
spin_unlock_irqrestore(&outcome_store->db_lock, flags);
106
}
107
108
static bool hl_pop_cs_outcome(struct hl_cs_outcome_store *outcome_store,
109
u64 seq, ktime_t *ts, int *error)
110
{
111
struct hl_cs_outcome *node;
112
unsigned long flags;
113
114
spin_lock_irqsave(&outcome_store->db_lock, flags);
115
116
hash_for_each_possible(outcome_store->outcome_map, node, map_link, seq)
117
if (node->seq == seq) {
118
*ts = node->ts;
119
*error = node->error;
120
121
hash_del(&node->map_link);
122
list_del_init(&node->list_link);
123
list_add(&node->list_link, &outcome_store->free_list);
124
125
spin_unlock_irqrestore(&outcome_store->db_lock, flags);
126
127
return true;
128
}
129
130
spin_unlock_irqrestore(&outcome_store->db_lock, flags);
131
132
return false;
133
}
134
135
static void hl_sob_reset(struct kref *ref)
136
{
137
struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
138
kref);
139
struct hl_device *hdev = hw_sob->hdev;
140
141
dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id);
142
143
hdev->asic_funcs->reset_sob(hdev, hw_sob);
144
145
hw_sob->need_reset = false;
146
}
147
148
void hl_sob_reset_error(struct kref *ref)
149
{
150
struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
151
kref);
152
struct hl_device *hdev = hw_sob->hdev;
153
154
dev_crit(hdev->dev,
155
"SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
156
hw_sob->q_idx, hw_sob->sob_id);
157
}
158
159
void hw_sob_put(struct hl_hw_sob *hw_sob)
160
{
161
if (hw_sob)
162
kref_put(&hw_sob->kref, hl_sob_reset);
163
}
164
165
static void hw_sob_put_err(struct hl_hw_sob *hw_sob)
166
{
167
if (hw_sob)
168
kref_put(&hw_sob->kref, hl_sob_reset_error);
169
}
170
171
void hw_sob_get(struct hl_hw_sob *hw_sob)
172
{
173
if (hw_sob)
174
kref_get(&hw_sob->kref);
175
}
176
177
/**
178
* hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
179
* @sob_base: sob base id
180
* @sob_mask: sob user mask, each bit represents a sob offset from sob base
181
* @mask: generated mask
182
*
183
* Return: 0 if given parameters are valid
184
*/
185
int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
186
{
187
int i;
188
189
if (sob_mask == 0)
190
return -EINVAL;
191
192
if (sob_mask == 0x1) {
193
*mask = ~(1 << (sob_base & 0x7));
194
} else {
195
/* find msb in order to verify sob range is valid */
196
for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
197
if (BIT(i) & sob_mask)
198
break;
199
200
if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
201
return -EINVAL;
202
203
*mask = ~sob_mask;
204
}
205
206
return 0;
207
}
208
209
static void hl_fence_release(struct kref *kref)
210
{
211
struct hl_fence *fence =
212
container_of(kref, struct hl_fence, refcount);
213
struct hl_cs_compl *hl_cs_cmpl =
214
container_of(fence, struct hl_cs_compl, base_fence);
215
216
kfree(hl_cs_cmpl);
217
}
218
219
void hl_fence_put(struct hl_fence *fence)
220
{
221
if (IS_ERR_OR_NULL(fence))
222
return;
223
kref_put(&fence->refcount, hl_fence_release);
224
}
225
226
void hl_fences_put(struct hl_fence **fence, int len)
227
{
228
int i;
229
230
for (i = 0; i < len; i++, fence++)
231
hl_fence_put(*fence);
232
}
233
234
void hl_fence_get(struct hl_fence *fence)
235
{
236
if (fence)
237
kref_get(&fence->refcount);
238
}
239
240
static void hl_fence_init(struct hl_fence *fence, u64 sequence)
241
{
242
kref_init(&fence->refcount);
243
fence->cs_sequence = sequence;
244
fence->error = 0;
245
fence->timestamp = ktime_set(0, 0);
246
fence->mcs_handling_done = false;
247
init_completion(&fence->completion);
248
}
249
250
void cs_get(struct hl_cs *cs)
251
{
252
kref_get(&cs->refcount);
253
}
254
255
static int cs_get_unless_zero(struct hl_cs *cs)
256
{
257
return kref_get_unless_zero(&cs->refcount);
258
}
259
260
static void cs_put(struct hl_cs *cs)
261
{
262
kref_put(&cs->refcount, cs_do_release);
263
}
264
265
static void cs_job_do_release(struct kref *ref)
266
{
267
struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount);
268
269
kfree(job);
270
}
271
272
static void hl_cs_job_put(struct hl_cs_job *job)
273
{
274
kref_put(&job->refcount, cs_job_do_release);
275
}
276
277
bool cs_needs_completion(struct hl_cs *cs)
278
{
279
/* In case this is a staged CS, only the last CS in sequence should
280
* get a completion, any non staged CS will always get a completion
281
*/
282
if (cs->staged_cs && !cs->staged_last)
283
return false;
284
285
return true;
286
}
287
288
bool cs_needs_timeout(struct hl_cs *cs)
289
{
290
/* In case this is a staged CS, only the first CS in sequence should
291
* get a timeout, any non staged CS will always get a timeout
292
*/
293
if (cs->staged_cs && !cs->staged_first)
294
return false;
295
296
return true;
297
}
298
299
static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
300
{
301
/* Patched CB is created for external queues jobs */
302
return (job->queue_type == QUEUE_TYPE_EXT);
303
}
304
305
/*
306
* cs_parser - parse the user command submission
307
*
308
* @hpriv : pointer to the private data of the fd
309
* @job : pointer to the job that holds the command submission info
310
*
311
* The function parses the command submission of the user. It calls the
312
* ASIC specific parser, which returns a list of memory blocks to send
313
* to the device as different command buffers
314
*
315
*/
316
static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
317
{
318
struct hl_device *hdev = hpriv->hdev;
319
struct hl_cs_parser parser;
320
int rc;
321
322
parser.ctx_id = job->cs->ctx->asid;
323
parser.cs_sequence = job->cs->sequence;
324
parser.job_id = job->id;
325
326
parser.hw_queue_id = job->hw_queue_id;
327
parser.job_userptr_list = &job->userptr_list;
328
parser.patched_cb = NULL;
329
parser.user_cb = job->user_cb;
330
parser.user_cb_size = job->user_cb_size;
331
parser.queue_type = job->queue_type;
332
parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
333
job->patched_cb = NULL;
334
parser.completion = cs_needs_completion(job->cs);
335
336
rc = hdev->asic_funcs->cs_parser(hdev, &parser);
337
338
if (is_cb_patched(hdev, job)) {
339
if (!rc) {
340
job->patched_cb = parser.patched_cb;
341
job->job_cb_size = parser.patched_cb_size;
342
job->contains_dma_pkt = parser.contains_dma_pkt;
343
atomic_inc(&job->patched_cb->cs_cnt);
344
}
345
346
/*
347
* Whether the parsing worked or not, we don't need the
348
* original CB anymore because it was already parsed and
349
* won't be accessed again for this CS
350
*/
351
atomic_dec(&job->user_cb->cs_cnt);
352
hl_cb_put(job->user_cb);
353
job->user_cb = NULL;
354
} else if (!rc) {
355
job->job_cb_size = job->user_cb_size;
356
}
357
358
return rc;
359
}
360
361
static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
362
{
363
struct hl_cs *cs = job->cs;
364
365
if (is_cb_patched(hdev, job)) {
366
hl_userptr_delete_list(hdev, &job->userptr_list);
367
368
/*
369
* We might arrive here from rollback and patched CB wasn't
370
* created, so we need to check it's not NULL
371
*/
372
if (job->patched_cb) {
373
atomic_dec(&job->patched_cb->cs_cnt);
374
hl_cb_put(job->patched_cb);
375
}
376
}
377
378
/* For H/W queue jobs, if a user CB was allocated by driver,
379
* the user CB isn't released in cs_parser() and thus should be
380
* released here. This is also true for INT queues jobs which were
381
* allocated by driver.
382
*/
383
if (job->is_kernel_allocated_cb &&
384
(job->queue_type == QUEUE_TYPE_HW || job->queue_type == QUEUE_TYPE_INT)) {
385
atomic_dec(&job->user_cb->cs_cnt);
386
hl_cb_put(job->user_cb);
387
}
388
389
/*
390
* This is the only place where there can be multiple threads
391
* modifying the list at the same time
392
*/
393
spin_lock(&cs->job_lock);
394
list_del(&job->cs_node);
395
spin_unlock(&cs->job_lock);
396
397
hl_debugfs_remove_job(hdev, job);
398
399
/* We decrement reference only for a CS that gets completion
400
* because the reference was incremented only for this kind of CS
401
* right before it was scheduled.
402
*
403
* In staged submission, only the last CS marked as 'staged_last'
404
* gets completion, hence its release function will be called from here.
405
* As for all the rest CS's in the staged submission which do not get
406
* completion, their CS reference will be decremented by the
407
* 'staged_last' CS during the CS release flow.
408
* All relevant PQ CI counters will be incremented during the CS release
409
* flow by calling 'hl_hw_queue_update_ci'.
410
*/
411
if (cs_needs_completion(cs) &&
412
(job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) {
413
414
/* In CS based completions, the timestamp is already available,
415
* so no need to extract it from job
416
*/
417
if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB)
418
cs->completion_timestamp = job->timestamp;
419
420
cs_put(cs);
421
}
422
423
hl_cs_job_put(job);
424
}
425
426
/*
427
* hl_staged_cs_find_first - locate the first CS in this staged submission
428
*
429
* @hdev: pointer to device structure
430
* @cs_seq: staged submission sequence number
431
*
432
* @note: This function must be called under 'hdev->cs_mirror_lock'
433
*
434
* Find and return a CS pointer with the given sequence
435
*/
436
struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
437
{
438
struct hl_cs *cs;
439
440
list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
441
if (cs->staged_cs && cs->staged_first &&
442
cs->sequence == cs_seq)
443
return cs;
444
445
return NULL;
446
}
447
448
/*
449
* is_staged_cs_last_exists - returns true if the last CS in sequence exists
450
*
451
* @hdev: pointer to device structure
452
* @cs: staged submission member
453
*
454
*/
455
bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
456
{
457
struct hl_cs *last_entry;
458
459
last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
460
staged_cs_node);
461
462
if (last_entry->staged_last)
463
return true;
464
465
return false;
466
}
467
468
/*
469
* staged_cs_get - get CS reference if this CS is a part of a staged CS
470
*
471
* @hdev: pointer to device structure
472
* @cs: current CS
473
* @cs_seq: staged submission sequence number
474
*
475
* Increment CS reference for every CS in this staged submission except for
476
* the CS which get completion.
477
*/
478
static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
479
{
480
/* Only the last CS in this staged submission will get a completion.
481
* We must increment the reference for all other CS's in this
482
* staged submission.
483
* Once we get a completion we will release the whole staged submission.
484
*/
485
if (!cs->staged_last)
486
cs_get(cs);
487
}
488
489
/*
490
* staged_cs_put - put a CS in case it is part of staged submission
491
*
492
* @hdev: pointer to device structure
493
* @cs: CS to put
494
*
495
* This function decrements a CS reference (for a non completion CS)
496
*/
497
static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
498
{
499
/* We release all CS's in a staged submission except the last
500
* CS which we have never incremented its reference.
501
*/
502
if (!cs_needs_completion(cs))
503
cs_put(cs);
504
}
505
506
static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
507
{
508
struct hl_cs *next = NULL, *iter, *first_cs;
509
510
if (!cs_needs_timeout(cs))
511
return;
512
513
spin_lock(&hdev->cs_mirror_lock);
514
515
/* We need to handle tdr only once for the complete staged submission.
516
* Hence, we choose the CS that reaches this function first which is
517
* the CS marked as 'staged_last'.
518
* In case single staged cs was submitted which has both first and last
519
* indications, then "cs_find_first" below will return NULL, since we
520
* removed the cs node from the list before getting here,
521
* in such cases just continue with the cs to cancel it's TDR work.
522
*/
523
if (cs->staged_cs && cs->staged_last) {
524
first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
525
if (first_cs)
526
cs = first_cs;
527
}
528
529
spin_unlock(&hdev->cs_mirror_lock);
530
531
/* Don't cancel TDR in case this CS was timedout because we might be
532
* running from the TDR context
533
*/
534
if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
535
return;
536
537
if (cs->tdr_active)
538
cancel_delayed_work_sync(&cs->work_tdr);
539
540
spin_lock(&hdev->cs_mirror_lock);
541
542
/* queue TDR for next CS */
543
list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node)
544
if (cs_needs_timeout(iter)) {
545
next = iter;
546
break;
547
}
548
549
if (next && !next->tdr_active) {
550
next->tdr_active = true;
551
schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
552
}
553
554
spin_unlock(&hdev->cs_mirror_lock);
555
}
556
557
/*
558
* force_complete_multi_cs - complete all contexts that wait on multi-CS
559
*
560
* @hdev: pointer to habanalabs device structure
561
*/
562
static void force_complete_multi_cs(struct hl_device *hdev)
563
{
564
int i;
565
566
for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
567
struct multi_cs_completion *mcs_compl;
568
569
mcs_compl = &hdev->multi_cs_completion[i];
570
571
spin_lock(&mcs_compl->lock);
572
573
if (!mcs_compl->used) {
574
spin_unlock(&mcs_compl->lock);
575
continue;
576
}
577
578
/* when calling force complete no context should be waiting on
579
* multi-cS.
580
* We are calling the function as a protection for such case
581
* to free any pending context and print error message
582
*/
583
dev_err(hdev->dev,
584
"multi-CS completion context %d still waiting when calling force completion\n",
585
i);
586
complete_all(&mcs_compl->completion);
587
spin_unlock(&mcs_compl->lock);
588
}
589
}
590
591
/*
592
* complete_multi_cs - complete all waiting entities on multi-CS
593
*
594
* @hdev: pointer to habanalabs device structure
595
* @cs: CS structure
596
* The function signals a waiting entity that has an overlapping stream masters
597
* with the completed CS.
598
* For example:
599
* - a completed CS worked on stream master QID 4, multi CS completion
600
* is actively waiting on stream master QIDs 3, 5. don't send signal as no
601
* common stream master QID
602
* - a completed CS worked on stream master QID 4, multi CS completion
603
* is actively waiting on stream master QIDs 3, 4. send signal as stream
604
* master QID 4 is common
605
*/
606
static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
607
{
608
struct hl_fence *fence = cs->fence;
609
int i;
610
611
/* in case of multi CS check for completion only for the first CS */
612
if (cs->staged_cs && !cs->staged_first)
613
return;
614
615
for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
616
struct multi_cs_completion *mcs_compl;
617
618
mcs_compl = &hdev->multi_cs_completion[i];
619
if (!mcs_compl->used)
620
continue;
621
622
spin_lock(&mcs_compl->lock);
623
624
/*
625
* complete if:
626
* 1. still waiting for completion
627
* 2. the completed CS has at least one overlapping stream
628
* master with the stream masters in the completion
629
*/
630
if (mcs_compl->used &&
631
(fence->stream_master_qid_map &
632
mcs_compl->stream_master_qid_map)) {
633
/* extract the timestamp only of first completed CS */
634
if (!mcs_compl->timestamp)
635
mcs_compl->timestamp = ktime_to_ns(fence->timestamp);
636
637
complete_all(&mcs_compl->completion);
638
639
/*
640
* Setting mcs_handling_done inside the lock ensures
641
* at least one fence have mcs_handling_done set to
642
* true before wait for mcs finish. This ensures at
643
* least one CS will be set as completed when polling
644
* mcs fences.
645
*/
646
fence->mcs_handling_done = true;
647
}
648
649
spin_unlock(&mcs_compl->lock);
650
}
651
/* In case CS completed without mcs completion initialized */
652
fence->mcs_handling_done = true;
653
}
654
655
static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
656
struct hl_cs *cs,
657
struct hl_cs_compl *hl_cs_cmpl)
658
{
659
/* Skip this handler if the cs wasn't submitted, to avoid putting
660
* the hw_sob twice, since this case already handled at this point,
661
* also skip if the hw_sob pointer wasn't set.
662
*/
663
if (!hl_cs_cmpl->hw_sob || !cs->submitted)
664
return;
665
666
spin_lock(&hl_cs_cmpl->lock);
667
668
/*
669
* we get refcount upon reservation of signals or signal/wait cs for the
670
* hw_sob object, and need to put it when the first staged cs
671
* (which contains the encaps signals) or cs signal/wait is completed.
672
*/
673
if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
674
(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
675
(hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) ||
676
(!!hl_cs_cmpl->encaps_signals)) {
677
dev_dbg(hdev->dev,
678
"CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n",
679
hl_cs_cmpl->cs_seq,
680
hl_cs_cmpl->type,
681
hl_cs_cmpl->hw_sob->sob_id,
682
hl_cs_cmpl->sob_val);
683
684
hw_sob_put(hl_cs_cmpl->hw_sob);
685
686
if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
687
hdev->asic_funcs->reset_sob_group(hdev,
688
hl_cs_cmpl->sob_group);
689
}
690
691
spin_unlock(&hl_cs_cmpl->lock);
692
}
693
694
static void cs_do_release(struct kref *ref)
695
{
696
struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
697
struct hl_device *hdev = cs->ctx->hdev;
698
struct hl_cs_job *job, *tmp;
699
struct hl_cs_compl *hl_cs_cmpl =
700
container_of(cs->fence, struct hl_cs_compl, base_fence);
701
702
cs->completed = true;
703
704
/*
705
* Although if we reached here it means that all external jobs have
706
* finished, because each one of them took refcnt to CS, we still
707
* need to go over the internal jobs and complete them. Otherwise, we
708
* will have leaked memory and what's worse, the CS object (and
709
* potentially the CTX object) could be released, while the JOB
710
* still holds a pointer to them (but no reference).
711
*/
712
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
713
hl_complete_job(hdev, job);
714
715
if (!cs->submitted) {
716
/*
717
* In case the wait for signal CS was submitted, the fence put
718
* occurs in init_signal_wait_cs() or collective_wait_init_cs()
719
* right before hanging on the PQ.
720
*/
721
if (cs->type == CS_TYPE_WAIT ||
722
cs->type == CS_TYPE_COLLECTIVE_WAIT)
723
hl_fence_put(cs->signal_fence);
724
725
goto out;
726
}
727
728
/* Need to update CI for all queue jobs that does not get completion */
729
hl_hw_queue_update_ci(cs);
730
731
/* remove CS from CS mirror list */
732
spin_lock(&hdev->cs_mirror_lock);
733
list_del_init(&cs->mirror_node);
734
spin_unlock(&hdev->cs_mirror_lock);
735
736
cs_handle_tdr(hdev, cs);
737
738
if (cs->staged_cs) {
739
/* the completion CS decrements reference for the entire
740
* staged submission
741
*/
742
if (cs->staged_last) {
743
struct hl_cs *staged_cs, *tmp_cs;
744
745
list_for_each_entry_safe(staged_cs, tmp_cs,
746
&cs->staged_cs_node, staged_cs_node)
747
staged_cs_put(hdev, staged_cs);
748
}
749
750
/* A staged CS will be a member in the list only after it
751
* was submitted. We used 'cs_mirror_lock' when inserting
752
* it to list so we will use it again when removing it
753
*/
754
if (cs->submitted) {
755
spin_lock(&hdev->cs_mirror_lock);
756
list_del(&cs->staged_cs_node);
757
spin_unlock(&hdev->cs_mirror_lock);
758
}
759
760
/* decrement refcount to handle when first staged cs
761
* with encaps signals is completed.
762
*/
763
if (hl_cs_cmpl->encaps_signals)
764
kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount,
765
hl_encaps_release_handle_and_put_ctx);
766
}
767
768
if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals)
769
kref_put(&cs->encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx);
770
771
out:
772
/* Must be called before hl_ctx_put because inside we use ctx to get
773
* the device
774
*/
775
hl_debugfs_remove_cs(cs);
776
777
hdev->shadow_cs_queue[cs->sequence & (hdev->asic_prop.max_pending_cs - 1)] = NULL;
778
779
/* We need to mark an error for not submitted because in that case
780
* the hl fence release flow is different. Mainly, we don't need
781
* to handle hw_sob for signal/wait
782
*/
783
if (cs->timedout)
784
cs->fence->error = -ETIMEDOUT;
785
else if (cs->aborted)
786
cs->fence->error = -EIO;
787
else if (!cs->submitted)
788
cs->fence->error = -EBUSY;
789
790
if (unlikely(cs->skip_reset_on_timeout)) {
791
dev_err(hdev->dev,
792
"Command submission %llu completed after %llu (s)\n",
793
cs->sequence,
794
div_u64(jiffies - cs->submission_time_jiffies, HZ));
795
}
796
797
if (cs->timestamp) {
798
cs->fence->timestamp = cs->completion_timestamp;
799
hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence,
800
cs->fence->timestamp, cs->fence->error);
801
}
802
803
hl_ctx_put(cs->ctx);
804
805
complete_all(&cs->fence->completion);
806
complete_multi_cs(hdev, cs);
807
808
cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl);
809
810
hl_fence_put(cs->fence);
811
812
kfree(cs->jobs_in_queue_cnt);
813
kfree(cs);
814
}
815
816
static void cs_timedout(struct work_struct *work)
817
{
818
struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work);
819
bool skip_reset_on_timeout, device_reset = false;
820
struct hl_device *hdev;
821
u64 event_mask = 0x0;
822
uint timeout_sec;
823
int rc;
824
825
skip_reset_on_timeout = cs->skip_reset_on_timeout;
826
827
rc = cs_get_unless_zero(cs);
828
if (!rc)
829
return;
830
831
if ((!cs->submitted) || (cs->completed)) {
832
cs_put(cs);
833
return;
834
}
835
836
hdev = cs->ctx->hdev;
837
838
if (likely(!skip_reset_on_timeout)) {
839
if (hdev->reset_on_lockup)
840
device_reset = true;
841
else
842
hdev->reset_info.needs_reset = true;
843
844
/* Mark the CS is timed out so we won't try to cancel its TDR */
845
cs->timedout = true;
846
}
847
848
/* Save only the first CS timeout parameters */
849
rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0);
850
if (rc) {
851
hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
852
hdev->captured_err_info.cs_timeout.seq = cs->sequence;
853
event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT;
854
}
855
856
timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000;
857
858
switch (cs->type) {
859
case CS_TYPE_SIGNAL:
860
dev_err(hdev->dev,
861
"Signal command submission %llu has not finished in %u seconds!\n",
862
cs->sequence, timeout_sec);
863
break;
864
865
case CS_TYPE_WAIT:
866
dev_err(hdev->dev,
867
"Wait command submission %llu has not finished in %u seconds!\n",
868
cs->sequence, timeout_sec);
869
break;
870
871
case CS_TYPE_COLLECTIVE_WAIT:
872
dev_err(hdev->dev,
873
"Collective Wait command submission %llu has not finished in %u seconds!\n",
874
cs->sequence, timeout_sec);
875
break;
876
877
default:
878
dev_err(hdev->dev,
879
"Command submission %llu has not finished in %u seconds!\n",
880
cs->sequence, timeout_sec);
881
break;
882
}
883
884
rc = hl_state_dump(hdev);
885
if (rc)
886
dev_err(hdev->dev, "Error during system state dump %d\n", rc);
887
888
cs_put(cs);
889
890
if (device_reset) {
891
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
892
hl_device_cond_reset(hdev, HL_DRV_RESET_TDR, event_mask);
893
} else if (event_mask) {
894
hl_notifier_event_send_all(hdev, event_mask);
895
}
896
}
897
898
static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
899
enum hl_cs_type cs_type, u64 user_sequence,
900
struct hl_cs **cs_new, u32 flags, u32 timeout)
901
{
902
struct hl_cs_counters_atomic *cntr;
903
struct hl_fence *other = NULL;
904
struct hl_cs_compl *cs_cmpl;
905
struct hl_cs *cs;
906
int rc;
907
908
cntr = &hdev->aggregated_cs_counters;
909
910
cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
911
if (!cs)
912
cs = kzalloc(sizeof(*cs), GFP_KERNEL);
913
914
if (!cs) {
915
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
916
atomic64_inc(&cntr->out_of_mem_drop_cnt);
917
return -ENOMEM;
918
}
919
920
/* increment refcnt for context */
921
hl_ctx_get(ctx);
922
923
cs->ctx = ctx;
924
cs->submitted = false;
925
cs->completed = false;
926
cs->type = cs_type;
927
cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
928
cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
929
cs->timeout_jiffies = timeout;
930
cs->skip_reset_on_timeout =
931
hdev->reset_info.skip_reset_on_timeout ||
932
!!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
933
cs->submission_time_jiffies = jiffies;
934
INIT_LIST_HEAD(&cs->job_list);
935
INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
936
kref_init(&cs->refcount);
937
spin_lock_init(&cs->job_lock);
938
939
cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
940
if (!cs_cmpl)
941
cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL);
942
943
if (!cs_cmpl) {
944
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
945
atomic64_inc(&cntr->out_of_mem_drop_cnt);
946
rc = -ENOMEM;
947
goto free_cs;
948
}
949
950
cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
951
sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
952
if (!cs->jobs_in_queue_cnt)
953
cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
954
sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
955
956
if (!cs->jobs_in_queue_cnt) {
957
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
958
atomic64_inc(&cntr->out_of_mem_drop_cnt);
959
rc = -ENOMEM;
960
goto free_cs_cmpl;
961
}
962
963
cs_cmpl->hdev = hdev;
964
cs_cmpl->type = cs->type;
965
spin_lock_init(&cs_cmpl->lock);
966
cs->fence = &cs_cmpl->base_fence;
967
968
spin_lock(&ctx->cs_lock);
969
970
cs_cmpl->cs_seq = ctx->cs_sequence;
971
other = ctx->cs_pending[cs_cmpl->cs_seq &
972
(hdev->asic_prop.max_pending_cs - 1)];
973
974
if (other && !completion_done(&other->completion)) {
975
/* If the following statement is true, it means we have reached
976
* a point in which only part of the staged submission was
977
* submitted and we don't have enough room in the 'cs_pending'
978
* array for the rest of the submission.
979
* This causes a deadlock because this CS will never be
980
* completed as it depends on future CS's for completion.
981
*/
982
if (other->cs_sequence == user_sequence)
983
dev_crit_ratelimited(hdev->dev,
984
"Staged CS %llu deadlock due to lack of resources",
985
user_sequence);
986
987
dev_dbg_ratelimited(hdev->dev,
988
"Rejecting CS because of too many in-flights CS\n");
989
atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
990
atomic64_inc(&cntr->max_cs_in_flight_drop_cnt);
991
rc = -EAGAIN;
992
goto free_fence;
993
}
994
995
/* init hl_fence */
996
hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
997
998
cs->sequence = cs_cmpl->cs_seq;
999
1000
ctx->cs_pending[cs_cmpl->cs_seq &
1001
(hdev->asic_prop.max_pending_cs - 1)] =
1002
&cs_cmpl->base_fence;
1003
ctx->cs_sequence++;
1004
1005
hl_fence_get(&cs_cmpl->base_fence);
1006
1007
hl_fence_put(other);
1008
1009
spin_unlock(&ctx->cs_lock);
1010
1011
*cs_new = cs;
1012
1013
return 0;
1014
1015
free_fence:
1016
spin_unlock(&ctx->cs_lock);
1017
kfree(cs->jobs_in_queue_cnt);
1018
free_cs_cmpl:
1019
kfree(cs_cmpl);
1020
free_cs:
1021
kfree(cs);
1022
hl_ctx_put(ctx);
1023
return rc;
1024
}
1025
1026
static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
1027
{
1028
struct hl_cs_job *job, *tmp;
1029
1030
staged_cs_put(hdev, cs);
1031
1032
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
1033
hl_complete_job(hdev, job);
1034
}
1035
1036
/*
1037
* release_reserved_encaps_signals() - release reserved encapsulated signals.
1038
* @hdev: pointer to habanalabs device structure
1039
*
1040
* Release reserved encapsulated signals which weren't un-reserved, or for which a CS with
1041
* encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back.
1042
* For these signals need also to put the refcount of the H/W SOB which was taken at the
1043
* reservation.
1044
*/
1045
static void release_reserved_encaps_signals(struct hl_device *hdev)
1046
{
1047
struct hl_ctx *ctx = hl_get_compute_ctx(hdev);
1048
struct hl_cs_encaps_sig_handle *handle;
1049
struct hl_encaps_signals_mgr *mgr;
1050
u32 id;
1051
1052
if (!ctx)
1053
return;
1054
1055
mgr = &ctx->sig_mgr;
1056
1057
idr_for_each_entry(&mgr->handles, handle, id)
1058
if (handle->cs_seq == ULLONG_MAX)
1059
kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob_ctx);
1060
1061
hl_ctx_put(ctx);
1062
}
1063
1064
void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
1065
{
1066
int i;
1067
struct hl_cs *cs, *tmp;
1068
1069
if (!skip_wq_flush) {
1070
flush_workqueue(hdev->ts_free_obj_wq);
1071
1072
/* flush all completions before iterating over the CS mirror list in
1073
* order to avoid a race with the release functions
1074
*/
1075
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1076
flush_workqueue(hdev->cq_wq[i]);
1077
1078
flush_workqueue(hdev->cs_cmplt_wq);
1079
}
1080
1081
/* Make sure we don't have leftovers in the CS mirror list */
1082
list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
1083
cs_get(cs);
1084
cs->aborted = true;
1085
dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
1086
cs->ctx->asid, cs->sequence);
1087
cs_rollback(hdev, cs);
1088
cs_put(cs);
1089
}
1090
1091
force_complete_multi_cs(hdev);
1092
1093
release_reserved_encaps_signals(hdev);
1094
}
1095
1096
static void
1097
wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
1098
{
1099
struct hl_user_pending_interrupt *pend, *temp;
1100
unsigned long flags;
1101
1102
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
1103
list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) {
1104
pend->fence.error = -EIO;
1105
complete_all(&pend->fence.completion);
1106
}
1107
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
1108
1109
spin_lock_irqsave(&interrupt->ts_list_lock, flags);
1110
list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) {
1111
list_del(&pend->list_node);
1112
hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
1113
hl_cb_put(pend->ts_reg_info.cq_cb);
1114
}
1115
spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
1116
}
1117
1118
void hl_release_pending_user_interrupts(struct hl_device *hdev)
1119
{
1120
struct asic_fixed_properties *prop = &hdev->asic_prop;
1121
struct hl_user_interrupt *interrupt;
1122
int i;
1123
1124
if (!prop->user_interrupt_count)
1125
return;
1126
1127
/* We iterate through the user interrupt requests and waking up all
1128
* user threads waiting for interrupt completion. We iterate the
1129
* list under a lock, this is why all user threads, once awake,
1130
* will wait on the same lock and will release the waiting object upon
1131
* unlock.
1132
*/
1133
1134
for (i = 0 ; i < prop->user_interrupt_count ; i++) {
1135
interrupt = &hdev->user_interrupt[i];
1136
wake_pending_user_interrupt_threads(interrupt);
1137
}
1138
1139
interrupt = &hdev->common_user_cq_interrupt;
1140
wake_pending_user_interrupt_threads(interrupt);
1141
1142
interrupt = &hdev->common_decoder_interrupt;
1143
wake_pending_user_interrupt_threads(interrupt);
1144
}
1145
1146
static void force_complete_cs(struct hl_device *hdev)
1147
{
1148
struct hl_cs *cs;
1149
1150
spin_lock(&hdev->cs_mirror_lock);
1151
1152
list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) {
1153
cs->fence->error = -EIO;
1154
complete_all(&cs->fence->completion);
1155
}
1156
1157
spin_unlock(&hdev->cs_mirror_lock);
1158
}
1159
1160
void hl_abort_waiting_for_cs_completions(struct hl_device *hdev)
1161
{
1162
force_complete_cs(hdev);
1163
force_complete_multi_cs(hdev);
1164
}
1165
1166
static void job_wq_completion(struct work_struct *work)
1167
{
1168
struct hl_cs_job *job = container_of(work, struct hl_cs_job,
1169
finish_work);
1170
struct hl_cs *cs = job->cs;
1171
struct hl_device *hdev = cs->ctx->hdev;
1172
1173
/* job is no longer needed */
1174
hl_complete_job(hdev, job);
1175
}
1176
1177
static void cs_completion(struct work_struct *work)
1178
{
1179
struct hl_cs *cs = container_of(work, struct hl_cs, finish_work);
1180
struct hl_device *hdev = cs->ctx->hdev;
1181
struct hl_cs_job *job, *tmp;
1182
1183
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
1184
hl_complete_job(hdev, job);
1185
}
1186
1187
u32 hl_get_active_cs_num(struct hl_device *hdev)
1188
{
1189
u32 active_cs_num = 0;
1190
struct hl_cs *cs;
1191
1192
spin_lock(&hdev->cs_mirror_lock);
1193
1194
list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node)
1195
if (!cs->completed)
1196
active_cs_num++;
1197
1198
spin_unlock(&hdev->cs_mirror_lock);
1199
1200
return active_cs_num;
1201
}
1202
1203
static int validate_queue_index(struct hl_device *hdev,
1204
struct hl_cs_chunk *chunk,
1205
enum hl_queue_type *queue_type,
1206
bool *is_kernel_allocated_cb)
1207
{
1208
struct asic_fixed_properties *asic = &hdev->asic_prop;
1209
struct hw_queue_properties *hw_queue_prop;
1210
1211
/* This must be checked here to prevent out-of-bounds access to
1212
* hw_queues_props array
1213
*/
1214
if (chunk->queue_index >= asic->max_queues) {
1215
dev_err(hdev->dev, "Queue index %d is invalid\n",
1216
chunk->queue_index);
1217
return -EINVAL;
1218
}
1219
1220
hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
1221
1222
if (hw_queue_prop->type == QUEUE_TYPE_NA) {
1223
dev_err(hdev->dev, "Queue index %d is not applicable\n",
1224
chunk->queue_index);
1225
return -EINVAL;
1226
}
1227
1228
if (hw_queue_prop->binned) {
1229
dev_err(hdev->dev, "Queue index %d is binned out\n",
1230
chunk->queue_index);
1231
return -EINVAL;
1232
}
1233
1234
if (hw_queue_prop->driver_only) {
1235
dev_err(hdev->dev,
1236
"Queue index %d is restricted for the kernel driver\n",
1237
chunk->queue_index);
1238
return -EINVAL;
1239
}
1240
1241
/* When hw queue type isn't QUEUE_TYPE_HW,
1242
* USER_ALLOC_CB flag shall be referred as "don't care".
1243
*/
1244
if (hw_queue_prop->type == QUEUE_TYPE_HW) {
1245
if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) {
1246
if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) {
1247
dev_err(hdev->dev,
1248
"Queue index %d doesn't support user CB\n",
1249
chunk->queue_index);
1250
return -EINVAL;
1251
}
1252
1253
*is_kernel_allocated_cb = false;
1254
} else {
1255
if (!(hw_queue_prop->cb_alloc_flags &
1256
CB_ALLOC_KERNEL)) {
1257
dev_err(hdev->dev,
1258
"Queue index %d doesn't support kernel CB\n",
1259
chunk->queue_index);
1260
return -EINVAL;
1261
}
1262
1263
*is_kernel_allocated_cb = true;
1264
}
1265
} else {
1266
*is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags
1267
& CB_ALLOC_KERNEL);
1268
}
1269
1270
*queue_type = hw_queue_prop->type;
1271
return 0;
1272
}
1273
1274
static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
1275
struct hl_mem_mgr *mmg,
1276
struct hl_cs_chunk *chunk)
1277
{
1278
struct hl_cb *cb;
1279
1280
cb = hl_cb_get(mmg, chunk->cb_handle);
1281
if (!cb) {
1282
dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle);
1283
return NULL;
1284
}
1285
1286
if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
1287
dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
1288
goto release_cb;
1289
}
1290
1291
atomic_inc(&cb->cs_cnt);
1292
1293
return cb;
1294
1295
release_cb:
1296
hl_cb_put(cb);
1297
return NULL;
1298
}
1299
1300
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
1301
enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
1302
{
1303
struct hl_cs_job *job;
1304
1305
job = kzalloc(sizeof(*job), GFP_ATOMIC);
1306
if (!job)
1307
job = kzalloc(sizeof(*job), GFP_KERNEL);
1308
1309
if (!job)
1310
return NULL;
1311
1312
kref_init(&job->refcount);
1313
job->queue_type = queue_type;
1314
job->is_kernel_allocated_cb = is_kernel_allocated_cb;
1315
1316
if (is_cb_patched(hdev, job))
1317
INIT_LIST_HEAD(&job->userptr_list);
1318
1319
if (job->queue_type == QUEUE_TYPE_EXT)
1320
INIT_WORK(&job->finish_work, job_wq_completion);
1321
1322
return job;
1323
}
1324
1325
static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
1326
{
1327
if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
1328
return CS_TYPE_SIGNAL;
1329
else if (cs_type_flags & HL_CS_FLAGS_WAIT)
1330
return CS_TYPE_WAIT;
1331
else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
1332
return CS_TYPE_COLLECTIVE_WAIT;
1333
else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY)
1334
return CS_RESERVE_SIGNALS;
1335
else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
1336
return CS_UNRESERVE_SIGNALS;
1337
else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
1338
return CS_TYPE_ENGINE_CORE;
1339
else if (cs_type_flags & HL_CS_FLAGS_ENGINES_COMMAND)
1340
return CS_TYPE_ENGINES;
1341
else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
1342
return CS_TYPE_FLUSH_PCI_HBW_WRITES;
1343
else
1344
return CS_TYPE_DEFAULT;
1345
}
1346
1347
static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
1348
{
1349
struct hl_device *hdev = hpriv->hdev;
1350
struct hl_ctx *ctx = hpriv->ctx;
1351
u32 cs_type_flags, num_chunks;
1352
enum hl_device_status status;
1353
enum hl_cs_type cs_type;
1354
bool is_sync_stream;
1355
int i;
1356
1357
for (i = 0 ; i < sizeof(args->in.pad) ; i++)
1358
if (args->in.pad[i]) {
1359
dev_dbg(hdev->dev, "Padding bytes must be 0\n");
1360
return -EINVAL;
1361
}
1362
1363
if (!hl_device_operational(hdev, &status))
1364
return -EBUSY;
1365
1366
if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1367
!hdev->supports_staged_submission) {
1368
dev_err(hdev->dev, "staged submission not supported");
1369
return -EPERM;
1370
}
1371
1372
cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
1373
1374
if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
1375
dev_err(hdev->dev,
1376
"CS type flags are mutually exclusive, context %d\n",
1377
ctx->asid);
1378
return -EINVAL;
1379
}
1380
1381
cs_type = hl_cs_get_cs_type(cs_type_flags);
1382
num_chunks = args->in.num_chunks_execute;
1383
1384
is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT ||
1385
cs_type == CS_TYPE_COLLECTIVE_WAIT);
1386
1387
if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) {
1388
dev_err(hdev->dev, "Sync stream CS is not supported\n");
1389
return -EINVAL;
1390
}
1391
1392
if (cs_type == CS_TYPE_DEFAULT) {
1393
if (!num_chunks) {
1394
dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid);
1395
return -EINVAL;
1396
}
1397
} else if (is_sync_stream && num_chunks != 1) {
1398
dev_err(hdev->dev,
1399
"Sync stream CS mandates one chunk only, context %d\n",
1400
ctx->asid);
1401
return -EINVAL;
1402
}
1403
1404
return 0;
1405
}
1406
1407
static int hl_cs_copy_chunk_array(struct hl_device *hdev,
1408
struct hl_cs_chunk **cs_chunk_array,
1409
void __user *chunks, u32 num_chunks,
1410
struct hl_ctx *ctx)
1411
{
1412
u32 size_to_copy;
1413
1414
if (num_chunks > HL_MAX_JOBS_PER_CS) {
1415
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1416
atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1417
dev_err(hdev->dev,
1418
"Number of chunks can NOT be larger than %d\n",
1419
HL_MAX_JOBS_PER_CS);
1420
return -EINVAL;
1421
}
1422
1423
*cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
1424
GFP_ATOMIC);
1425
if (!*cs_chunk_array)
1426
*cs_chunk_array = kmalloc_array(num_chunks,
1427
sizeof(**cs_chunk_array), GFP_KERNEL);
1428
if (!*cs_chunk_array) {
1429
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1430
atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1431
return -ENOMEM;
1432
}
1433
1434
size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
1435
if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
1436
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1437
atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1438
dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1439
kfree(*cs_chunk_array);
1440
return -EFAULT;
1441
}
1442
1443
return 0;
1444
}
1445
1446
static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
1447
u64 sequence, u32 flags,
1448
u32 encaps_signal_handle)
1449
{
1450
if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
1451
return 0;
1452
1453
cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
1454
cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
1455
1456
if (cs->staged_first) {
1457
/* Staged CS sequence is the first CS sequence */
1458
INIT_LIST_HEAD(&cs->staged_cs_node);
1459
cs->staged_sequence = cs->sequence;
1460
1461
if (cs->encaps_signals)
1462
cs->encaps_sig_hdl_id = encaps_signal_handle;
1463
} else {
1464
/* User sequence will be validated in 'hl_hw_queue_schedule_cs'
1465
* under the cs_mirror_lock
1466
*/
1467
cs->staged_sequence = sequence;
1468
}
1469
1470
/* Increment CS reference if needed */
1471
staged_cs_get(hdev, cs);
1472
1473
cs->staged_cs = true;
1474
1475
return 0;
1476
}
1477
1478
static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
1479
{
1480
int i;
1481
1482
for (i = 0; i < hdev->stream_master_qid_arr_size; i++)
1483
if (qid == hdev->stream_master_qid_arr[i])
1484
return BIT(i);
1485
1486
return 0;
1487
}
1488
1489
static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
1490
u32 num_chunks, u64 *cs_seq, u32 flags,
1491
u32 encaps_signals_handle, u32 timeout,
1492
u16 *signal_initial_sob_count)
1493
{
1494
bool staged_mid, int_queues_only = true, using_hw_queues = false;
1495
struct hl_device *hdev = hpriv->hdev;
1496
struct hl_cs_chunk *cs_chunk_array;
1497
struct hl_cs_counters_atomic *cntr;
1498
struct hl_ctx *ctx = hpriv->ctx;
1499
struct hl_cs_job *job;
1500
struct hl_cs *cs;
1501
struct hl_cb *cb;
1502
u64 user_sequence;
1503
u8 stream_master_qid_map = 0;
1504
int rc, i;
1505
1506
cntr = &hdev->aggregated_cs_counters;
1507
user_sequence = *cs_seq;
1508
*cs_seq = ULLONG_MAX;
1509
1510
rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1511
hpriv->ctx);
1512
if (rc)
1513
goto out;
1514
1515
if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
1516
!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
1517
staged_mid = true;
1518
else
1519
staged_mid = false;
1520
1521
rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1522
staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
1523
timeout);
1524
if (rc)
1525
goto free_cs_chunk_array;
1526
1527
*cs_seq = cs->sequence;
1528
1529
hl_debugfs_add_cs(cs);
1530
1531
rc = cs_staged_submission(hdev, cs, user_sequence, flags,
1532
encaps_signals_handle);
1533
if (rc)
1534
goto free_cs_object;
1535
1536
/* If this is a staged submission we must return the staged sequence
1537
* rather than the internal CS sequence
1538
*/
1539
if (cs->staged_cs)
1540
*cs_seq = cs->staged_sequence;
1541
1542
/* Validate ALL the CS chunks before submitting the CS */
1543
for (i = 0 ; i < num_chunks ; i++) {
1544
struct hl_cs_chunk *chunk = &cs_chunk_array[i];
1545
enum hl_queue_type queue_type;
1546
bool is_kernel_allocated_cb;
1547
1548
rc = validate_queue_index(hdev, chunk, &queue_type,
1549
&is_kernel_allocated_cb);
1550
if (rc) {
1551
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1552
atomic64_inc(&cntr->validation_drop_cnt);
1553
goto free_cs_object;
1554
}
1555
1556
if (is_kernel_allocated_cb) {
1557
cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk);
1558
if (!cb) {
1559
atomic64_inc(
1560
&ctx->cs_counters.validation_drop_cnt);
1561
atomic64_inc(&cntr->validation_drop_cnt);
1562
rc = -EINVAL;
1563
goto free_cs_object;
1564
}
1565
} else {
1566
cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
1567
}
1568
1569
if (queue_type == QUEUE_TYPE_EXT ||
1570
queue_type == QUEUE_TYPE_HW) {
1571
int_queues_only = false;
1572
1573
/*
1574
* store which stream are being used for external/HW
1575
* queues of this CS
1576
*/
1577
if (hdev->supports_wait_for_multi_cs)
1578
stream_master_qid_map |=
1579
get_stream_master_qid_mask(hdev,
1580
chunk->queue_index);
1581
}
1582
1583
if (queue_type == QUEUE_TYPE_HW)
1584
using_hw_queues = true;
1585
1586
job = hl_cs_allocate_job(hdev, queue_type,
1587
is_kernel_allocated_cb);
1588
if (!job) {
1589
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1590
atomic64_inc(&cntr->out_of_mem_drop_cnt);
1591
dev_err(hdev->dev, "Failed to allocate a new job\n");
1592
rc = -ENOMEM;
1593
if (is_kernel_allocated_cb)
1594
goto release_cb;
1595
1596
goto free_cs_object;
1597
}
1598
1599
job->id = i + 1;
1600
job->cs = cs;
1601
job->user_cb = cb;
1602
job->user_cb_size = chunk->cb_size;
1603
job->hw_queue_id = chunk->queue_index;
1604
1605
cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1606
cs->jobs_cnt++;
1607
1608
list_add_tail(&job->cs_node, &cs->job_list);
1609
1610
/*
1611
* Increment CS reference. When CS reference is 0, CS is
1612
* done and can be signaled to user and free all its resources
1613
* Only increment for JOB on external or H/W queues, because
1614
* only for those JOBs we get completion
1615
*/
1616
if (cs_needs_completion(cs) &&
1617
(job->queue_type == QUEUE_TYPE_EXT ||
1618
job->queue_type == QUEUE_TYPE_HW))
1619
cs_get(cs);
1620
1621
hl_debugfs_add_job(hdev, job);
1622
1623
rc = cs_parser(hpriv, job);
1624
if (rc) {
1625
atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
1626
atomic64_inc(&cntr->parsing_drop_cnt);
1627
dev_err(hdev->dev,
1628
"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
1629
cs->ctx->asid, cs->sequence, job->id, rc);
1630
goto free_cs_object;
1631
}
1632
}
1633
1634
/* We allow a CS with any queue type combination as long as it does
1635
* not get a completion
1636
*/
1637
if (int_queues_only && cs_needs_completion(cs)) {
1638
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1639
atomic64_inc(&cntr->validation_drop_cnt);
1640
dev_err(hdev->dev,
1641
"Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
1642
cs->ctx->asid, cs->sequence);
1643
rc = -EINVAL;
1644
goto free_cs_object;
1645
}
1646
1647
if (using_hw_queues)
1648
INIT_WORK(&cs->finish_work, cs_completion);
1649
1650
/*
1651
* store the (external/HW queues) streams used by the CS in the
1652
* fence object for multi-CS completion
1653
*/
1654
if (hdev->supports_wait_for_multi_cs)
1655
cs->fence->stream_master_qid_map = stream_master_qid_map;
1656
1657
rc = hl_hw_queue_schedule_cs(cs);
1658
if (rc) {
1659
if (rc != -EAGAIN)
1660
dev_err(hdev->dev,
1661
"Failed to submit CS %d.%llu to H/W queues, error %d\n",
1662
cs->ctx->asid, cs->sequence, rc);
1663
goto free_cs_object;
1664
}
1665
1666
*signal_initial_sob_count = cs->initial_sob_count;
1667
1668
rc = HL_CS_STATUS_SUCCESS;
1669
goto put_cs;
1670
1671
release_cb:
1672
atomic_dec(&cb->cs_cnt);
1673
hl_cb_put(cb);
1674
free_cs_object:
1675
cs_rollback(hdev, cs);
1676
*cs_seq = ULLONG_MAX;
1677
/* The path below is both for good and erroneous exits */
1678
put_cs:
1679
/* We finished with the CS in this function, so put the ref */
1680
cs_put(cs);
1681
free_cs_chunk_array:
1682
kfree(cs_chunk_array);
1683
out:
1684
return rc;
1685
}
1686
1687
static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
1688
u64 *cs_seq)
1689
{
1690
struct hl_device *hdev = hpriv->hdev;
1691
struct hl_ctx *ctx = hpriv->ctx;
1692
bool need_soft_reset = false;
1693
int rc = 0, do_ctx_switch = 0;
1694
void __user *chunks;
1695
u32 num_chunks, tmp;
1696
u16 sob_count;
1697
int ret;
1698
1699
if (hdev->supports_ctx_switch)
1700
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1701
1702
if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1703
mutex_lock(&hpriv->restore_phase_mutex);
1704
1705
if (do_ctx_switch) {
1706
rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1707
if (rc) {
1708
dev_err_ratelimited(hdev->dev,
1709
"Failed to switch to context %d, rejecting CS! %d\n",
1710
ctx->asid, rc);
1711
/*
1712
* If we timedout, or if the device is not IDLE
1713
* while we want to do context-switch (-EBUSY),
1714
* we need to soft-reset because QMAN is
1715
* probably stuck. However, we can't call to
1716
* reset here directly because of deadlock, so
1717
* need to do it at the very end of this
1718
* function
1719
*/
1720
if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1721
need_soft_reset = true;
1722
mutex_unlock(&hpriv->restore_phase_mutex);
1723
goto out;
1724
}
1725
}
1726
1727
hdev->asic_funcs->restore_phase_topology(hdev);
1728
1729
chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
1730
num_chunks = args->in.num_chunks_restore;
1731
1732
if (!num_chunks) {
1733
dev_dbg(hdev->dev,
1734
"Need to run restore phase but restore CS is empty\n");
1735
rc = 0;
1736
} else {
1737
rc = cs_ioctl_default(hpriv, chunks, num_chunks,
1738
cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count);
1739
}
1740
1741
mutex_unlock(&hpriv->restore_phase_mutex);
1742
1743
if (rc) {
1744
dev_err(hdev->dev,
1745
"Failed to submit restore CS for context %d (%d)\n",
1746
ctx->asid, rc);
1747
goto out;
1748
}
1749
1750
/* Need to wait for restore completion before execution phase */
1751
if (num_chunks) {
1752
enum hl_cs_wait_status status;
1753
1754
ret = _hl_cs_wait_ioctl(hdev, ctx,
1755
jiffies_to_usecs(hdev->timeout_jiffies),
1756
*cs_seq, &status, NULL);
1757
if (ret) {
1758
dev_err(hdev->dev,
1759
"Restore CS for context %d failed to complete %d\n",
1760
ctx->asid, ret);
1761
rc = -ENOEXEC;
1762
goto out;
1763
}
1764
}
1765
1766
if (hdev->supports_ctx_switch)
1767
ctx->thread_ctx_switch_wait_token = 1;
1768
1769
} else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) {
1770
rc = hl_poll_timeout_memory(hdev,
1771
&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1772
100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1773
1774
if (rc == -ETIMEDOUT) {
1775
dev_err(hdev->dev,
1776
"context switch phase timeout (%d)\n", tmp);
1777
goto out;
1778
}
1779
}
1780
1781
out:
1782
if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
1783
hl_device_reset(hdev, 0);
1784
1785
return rc;
1786
}
1787
1788
/*
1789
* hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
1790
* if the SOB value reaches the max value move to the other SOB reserved
1791
* to the queue.
1792
* @hdev: pointer to device structure
1793
* @q_idx: stream queue index
1794
* @hw_sob: the H/W SOB used in this signal CS.
1795
* @count: signals count
1796
* @encaps_sig: tells whether it's reservation for encaps signals or not.
1797
*
1798
* Note that this function must be called while hw_queues_lock is taken.
1799
*/
1800
int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
1801
struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig)
1802
1803
{
1804
struct hl_sync_stream_properties *prop;
1805
struct hl_hw_sob *sob = *hw_sob, *other_sob;
1806
u8 other_sob_offset;
1807
1808
prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1809
1810
hw_sob_get(sob);
1811
1812
/* check for wraparound */
1813
if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
1814
/*
1815
* Decrement as we reached the max value.
1816
* The release function won't be called here as we've
1817
* just incremented the refcount right before calling this
1818
* function.
1819
*/
1820
hw_sob_put_err(sob);
1821
1822
/*
1823
* check the other sob value, if it still in use then fail
1824
* otherwise make the switch
1825
*/
1826
other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
1827
other_sob = &prop->hw_sob[other_sob_offset];
1828
1829
if (kref_read(&other_sob->kref) != 1) {
1830
dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n",
1831
q_idx);
1832
return -EINVAL;
1833
}
1834
1835
/*
1836
* next_sob_val always points to the next available signal
1837
* in the sob, so in encaps signals it will be the next one
1838
* after reserving the required amount.
1839
*/
1840
if (encaps_sig)
1841
prop->next_sob_val = count + 1;
1842
else
1843
prop->next_sob_val = count;
1844
1845
/* only two SOBs are currently in use */
1846
prop->curr_sob_offset = other_sob_offset;
1847
*hw_sob = other_sob;
1848
1849
/*
1850
* check if other_sob needs reset, then do it before using it
1851
* for the reservation or the next signal cs.
1852
* we do it here, and for both encaps and regular signal cs
1853
* cases in order to avoid possible races of two kref_put
1854
* of the sob which can occur at the same time if we move the
1855
* sob reset(kref_put) to cs_do_release function.
1856
* in addition, if we have combination of cs signal and
1857
* encaps, and at the point we need to reset the sob there was
1858
* no more reservations and only signal cs keep coming,
1859
* in such case we need signal_cs to put the refcount and
1860
* reset the sob.
1861
*/
1862
if (other_sob->need_reset)
1863
hw_sob_put(other_sob);
1864
1865
if (encaps_sig) {
1866
/* set reset indication for the sob */
1867
sob->need_reset = true;
1868
hw_sob_get(other_sob);
1869
}
1870
1871
dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
1872
prop->curr_sob_offset, q_idx);
1873
} else {
1874
prop->next_sob_val += count;
1875
}
1876
1877
return 0;
1878
}
1879
1880
static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1881
struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx,
1882
bool encaps_signals)
1883
{
1884
u64 *signal_seq_arr = NULL;
1885
u32 size_to_copy, signal_seq_arr_len;
1886
int rc = 0;
1887
1888
if (encaps_signals) {
1889
*signal_seq = chunk->encaps_signal_seq;
1890
return 0;
1891
}
1892
1893
signal_seq_arr_len = chunk->num_signal_seq_arr;
1894
1895
/* currently only one signal seq is supported */
1896
if (signal_seq_arr_len != 1) {
1897
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1898
atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1899
dev_err(hdev->dev,
1900
"Wait for signal CS supports only one signal CS seq\n");
1901
return -EINVAL;
1902
}
1903
1904
signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1905
sizeof(*signal_seq_arr),
1906
GFP_ATOMIC);
1907
if (!signal_seq_arr)
1908
signal_seq_arr = kmalloc_array(signal_seq_arr_len,
1909
sizeof(*signal_seq_arr),
1910
GFP_KERNEL);
1911
if (!signal_seq_arr) {
1912
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1913
atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1914
return -ENOMEM;
1915
}
1916
1917
size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr);
1918
if (copy_from_user(signal_seq_arr,
1919
u64_to_user_ptr(chunk->signal_seq_arr),
1920
size_to_copy)) {
1921
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
1922
atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1923
dev_err(hdev->dev,
1924
"Failed to copy signal seq array from user\n");
1925
rc = -EFAULT;
1926
goto out;
1927
}
1928
1929
/* currently it is guaranteed to have only one signal seq */
1930
*signal_seq = signal_seq_arr[0];
1931
1932
out:
1933
kfree(signal_seq_arr);
1934
1935
return rc;
1936
}
1937
1938
static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
1939
struct hl_ctx *ctx, struct hl_cs *cs,
1940
enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset)
1941
{
1942
struct hl_cs_counters_atomic *cntr;
1943
struct hl_cs_job *job;
1944
struct hl_cb *cb;
1945
u32 cb_size;
1946
1947
cntr = &hdev->aggregated_cs_counters;
1948
1949
job = hl_cs_allocate_job(hdev, q_type, true);
1950
if (!job) {
1951
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1952
atomic64_inc(&cntr->out_of_mem_drop_cnt);
1953
dev_err(hdev->dev, "Failed to allocate a new job\n");
1954
return -ENOMEM;
1955
}
1956
1957
if (cs->type == CS_TYPE_WAIT)
1958
cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1959
else
1960
cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1961
1962
cb = hl_cb_kernel_create(hdev, cb_size, q_type == QUEUE_TYPE_HW);
1963
if (!cb) {
1964
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1965
atomic64_inc(&cntr->out_of_mem_drop_cnt);
1966
kfree(job);
1967
return -EFAULT;
1968
}
1969
1970
job->id = 0;
1971
job->cs = cs;
1972
job->user_cb = cb;
1973
atomic_inc(&job->user_cb->cs_cnt);
1974
job->user_cb_size = cb_size;
1975
job->hw_queue_id = q_idx;
1976
1977
if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
1978
&& cs->encaps_signals)
1979
job->encaps_sig_wait_offset = encaps_signal_offset;
1980
/*
1981
* No need in parsing, user CB is the patched CB.
1982
* We call hl_cb_destroy() out of two reasons - we don't need the CB in
1983
* the CB idr anymore and to decrement its refcount as it was
1984
* incremented inside hl_cb_kernel_create().
1985
*/
1986
job->patched_cb = job->user_cb;
1987
job->job_cb_size = job->user_cb_size;
1988
hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1989
1990
/* increment refcount as for external queues we get completion */
1991
cs_get(cs);
1992
1993
cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1994
cs->jobs_cnt++;
1995
1996
list_add_tail(&job->cs_node, &cs->job_list);
1997
1998
hl_debugfs_add_job(hdev, job);
1999
2000
return 0;
2001
}
2002
2003
static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
2004
u32 q_idx, u32 count,
2005
u32 *handle_id, u32 *sob_addr,
2006
u32 *signals_count)
2007
{
2008
struct hw_queue_properties *hw_queue_prop;
2009
struct hl_sync_stream_properties *prop;
2010
struct hl_device *hdev = hpriv->hdev;
2011
struct hl_cs_encaps_sig_handle *handle;
2012
struct hl_encaps_signals_mgr *mgr;
2013
struct hl_hw_sob *hw_sob;
2014
int hdl_id;
2015
int rc = 0;
2016
2017
if (count >= HL_MAX_SOB_VAL) {
2018
dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n",
2019
count);
2020
rc = -EINVAL;
2021
goto out;
2022
}
2023
2024
if (q_idx >= hdev->asic_prop.max_queues) {
2025
dev_err(hdev->dev, "Queue index %d is invalid\n",
2026
q_idx);
2027
rc = -EINVAL;
2028
goto out;
2029
}
2030
2031
hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
2032
2033
if (!hw_queue_prop->supports_sync_stream) {
2034
dev_err(hdev->dev,
2035
"Queue index %d does not support sync stream operations\n",
2036
q_idx);
2037
rc = -EINVAL;
2038
goto out;
2039
}
2040
2041
prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
2042
2043
handle = kzalloc(sizeof(*handle), GFP_KERNEL);
2044
if (!handle) {
2045
rc = -ENOMEM;
2046
goto out;
2047
}
2048
2049
handle->count = count;
2050
2051
hl_ctx_get(hpriv->ctx);
2052
handle->ctx = hpriv->ctx;
2053
mgr = &hpriv->ctx->sig_mgr;
2054
2055
spin_lock(&mgr->lock);
2056
hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC);
2057
spin_unlock(&mgr->lock);
2058
2059
if (hdl_id < 0) {
2060
dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
2061
rc = -EINVAL;
2062
goto put_ctx;
2063
}
2064
2065
handle->id = hdl_id;
2066
handle->q_idx = q_idx;
2067
handle->hdev = hdev;
2068
kref_init(&handle->refcount);
2069
2070
hdev->asic_funcs->hw_queues_lock(hdev);
2071
2072
hw_sob = &prop->hw_sob[prop->curr_sob_offset];
2073
2074
/*
2075
* Increment the SOB value by count by user request
2076
* to reserve those signals
2077
* check if the signals amount to reserve is not exceeding the max sob
2078
* value, if yes then switch sob.
2079
*/
2080
rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count,
2081
true);
2082
if (rc) {
2083
dev_err(hdev->dev, "Failed to switch SOB\n");
2084
hdev->asic_funcs->hw_queues_unlock(hdev);
2085
rc = -EINVAL;
2086
goto remove_idr;
2087
}
2088
/* set the hw_sob to the handle after calling the sob wraparound handler
2089
* since sob could have changed.
2090
*/
2091
handle->hw_sob = hw_sob;
2092
2093
/* store the current sob value for unreserve validity check, and
2094
* signal offset support
2095
*/
2096
handle->pre_sob_val = prop->next_sob_val - handle->count;
2097
2098
handle->cs_seq = ULLONG_MAX;
2099
2100
*signals_count = prop->next_sob_val;
2101
hdev->asic_funcs->hw_queues_unlock(hdev);
2102
2103
*sob_addr = handle->hw_sob->sob_addr;
2104
*handle_id = hdl_id;
2105
2106
dev_dbg(hdev->dev,
2107
"Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n",
2108
hw_sob->sob_id, handle->hw_sob->sob_addr,
2109
prop->next_sob_val - 1, q_idx, hdl_id);
2110
goto out;
2111
2112
remove_idr:
2113
spin_lock(&mgr->lock);
2114
idr_remove(&mgr->handles, hdl_id);
2115
spin_unlock(&mgr->lock);
2116
2117
put_ctx:
2118
hl_ctx_put(handle->ctx);
2119
kfree(handle);
2120
2121
out:
2122
return rc;
2123
}
2124
2125
static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
2126
{
2127
struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
2128
struct hl_sync_stream_properties *prop;
2129
struct hl_device *hdev = hpriv->hdev;
2130
struct hl_encaps_signals_mgr *mgr;
2131
struct hl_hw_sob *hw_sob;
2132
u32 q_idx, sob_addr;
2133
int rc = 0;
2134
2135
mgr = &hpriv->ctx->sig_mgr;
2136
2137
spin_lock(&mgr->lock);
2138
encaps_sig_hdl = idr_find(&mgr->handles, handle_id);
2139
if (encaps_sig_hdl) {
2140
dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n",
2141
handle_id, encaps_sig_hdl->hw_sob->sob_addr,
2142
encaps_sig_hdl->count);
2143
2144
hdev->asic_funcs->hw_queues_lock(hdev);
2145
2146
q_idx = encaps_sig_hdl->q_idx;
2147
prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
2148
hw_sob = &prop->hw_sob[prop->curr_sob_offset];
2149
sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
2150
2151
/* Check if sob_val got out of sync due to other
2152
* signal submission requests which were handled
2153
* between the reserve-unreserve calls or SOB switch
2154
* upon reaching SOB max value.
2155
*/
2156
if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count
2157
!= prop->next_sob_val ||
2158
sob_addr != encaps_sig_hdl->hw_sob->sob_addr) {
2159
dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n",
2160
encaps_sig_hdl->pre_sob_val,
2161
(prop->next_sob_val - encaps_sig_hdl->count));
2162
2163
hdev->asic_funcs->hw_queues_unlock(hdev);
2164
rc = -EINVAL;
2165
goto out_unlock;
2166
}
2167
2168
/*
2169
* Decrement the SOB value by count by user request
2170
* to unreserve those signals
2171
*/
2172
prop->next_sob_val -= encaps_sig_hdl->count;
2173
2174
hdev->asic_funcs->hw_queues_unlock(hdev);
2175
2176
hw_sob_put(hw_sob);
2177
2178
/* Release the id and free allocated memory of the handle */
2179
idr_remove(&mgr->handles, handle_id);
2180
2181
/* unlock before calling ctx_put, where we might sleep */
2182
spin_unlock(&mgr->lock);
2183
hl_ctx_put(encaps_sig_hdl->ctx);
2184
kfree(encaps_sig_hdl);
2185
goto out;
2186
} else {
2187
rc = -EINVAL;
2188
dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
2189
}
2190
2191
out_unlock:
2192
spin_unlock(&mgr->lock);
2193
2194
out:
2195
return rc;
2196
}
2197
2198
static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
2199
void __user *chunks, u32 num_chunks,
2200
u64 *cs_seq, u32 flags, u32 timeout,
2201
u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count)
2202
{
2203
struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL;
2204
bool handle_found = false, is_wait_cs = false,
2205
wait_cs_submitted = false,
2206
cs_encaps_signals = false;
2207
struct hl_cs_chunk *cs_chunk_array, *chunk;
2208
bool staged_cs_with_encaps_signals = false;
2209
struct hw_queue_properties *hw_queue_prop;
2210
struct hl_device *hdev = hpriv->hdev;
2211
struct hl_cs_compl *sig_waitcs_cmpl;
2212
u32 q_idx, collective_engine_id = 0;
2213
struct hl_cs_counters_atomic *cntr;
2214
struct hl_fence *sig_fence = NULL;
2215
struct hl_ctx *ctx = hpriv->ctx;
2216
enum hl_queue_type q_type;
2217
struct hl_cs *cs;
2218
u64 signal_seq;
2219
int rc;
2220
2221
cntr = &hdev->aggregated_cs_counters;
2222
*cs_seq = ULLONG_MAX;
2223
2224
rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
2225
ctx);
2226
if (rc)
2227
goto out;
2228
2229
/* currently it is guaranteed to have only one chunk */
2230
chunk = &cs_chunk_array[0];
2231
2232
if (chunk->queue_index >= hdev->asic_prop.max_queues) {
2233
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2234
atomic64_inc(&cntr->validation_drop_cnt);
2235
dev_err(hdev->dev, "Queue index %d is invalid\n",
2236
chunk->queue_index);
2237
rc = -EINVAL;
2238
goto free_cs_chunk_array;
2239
}
2240
2241
q_idx = chunk->queue_index;
2242
hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
2243
q_type = hw_queue_prop->type;
2244
2245
if (!hw_queue_prop->supports_sync_stream) {
2246
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2247
atomic64_inc(&cntr->validation_drop_cnt);
2248
dev_err(hdev->dev,
2249
"Queue index %d does not support sync stream operations\n",
2250
q_idx);
2251
rc = -EINVAL;
2252
goto free_cs_chunk_array;
2253
}
2254
2255
if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
2256
if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
2257
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2258
atomic64_inc(&cntr->validation_drop_cnt);
2259
dev_err(hdev->dev,
2260
"Queue index %d is invalid\n", q_idx);
2261
rc = -EINVAL;
2262
goto free_cs_chunk_array;
2263
}
2264
2265
if (!hdev->nic_ports_mask) {
2266
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2267
atomic64_inc(&cntr->validation_drop_cnt);
2268
dev_err(hdev->dev,
2269
"Collective operations not supported when NIC ports are disabled");
2270
rc = -EINVAL;
2271
goto free_cs_chunk_array;
2272
}
2273
2274
collective_engine_id = chunk->collective_engine_id;
2275
}
2276
2277
is_wait_cs = !!(cs_type == CS_TYPE_WAIT ||
2278
cs_type == CS_TYPE_COLLECTIVE_WAIT);
2279
2280
cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
2281
2282
if (is_wait_cs) {
2283
rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq,
2284
ctx, cs_encaps_signals);
2285
if (rc)
2286
goto free_cs_chunk_array;
2287
2288
if (cs_encaps_signals) {
2289
/* check if cs sequence has encapsulated
2290
* signals handle
2291
*/
2292
struct idr *idp;
2293
u32 id;
2294
2295
spin_lock(&ctx->sig_mgr.lock);
2296
idp = &ctx->sig_mgr.handles;
2297
idr_for_each_entry(idp, encaps_sig_hdl, id) {
2298
if (encaps_sig_hdl->cs_seq == signal_seq) {
2299
/* get refcount to protect removing this handle from idr,
2300
* needed when multiple wait cs are used with offset
2301
* to wait on reserved encaps signals.
2302
* Since kref_put of this handle is executed outside the
2303
* current lock, it is possible that the handle refcount
2304
* is 0 but it yet to be removed from the list. In this
2305
* case need to consider the handle as not valid.
2306
*/
2307
if (kref_get_unless_zero(&encaps_sig_hdl->refcount))
2308
handle_found = true;
2309
break;
2310
}
2311
}
2312
spin_unlock(&ctx->sig_mgr.lock);
2313
2314
if (!handle_found) {
2315
/* treat as signal CS already finished */
2316
dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
2317
signal_seq);
2318
rc = 0;
2319
goto free_cs_chunk_array;
2320
}
2321
2322
/* validate also the signal offset value */
2323
if (chunk->encaps_signal_offset >
2324
encaps_sig_hdl->count) {
2325
dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n",
2326
chunk->encaps_signal_offset,
2327
encaps_sig_hdl->count);
2328
rc = -EINVAL;
2329
goto free_cs_chunk_array;
2330
}
2331
}
2332
2333
sig_fence = hl_ctx_get_fence(ctx, signal_seq);
2334
if (IS_ERR(sig_fence)) {
2335
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2336
atomic64_inc(&cntr->validation_drop_cnt);
2337
dev_err(hdev->dev,
2338
"Failed to get signal CS with seq 0x%llx\n",
2339
signal_seq);
2340
rc = PTR_ERR(sig_fence);
2341
goto free_cs_chunk_array;
2342
}
2343
2344
if (!sig_fence) {
2345
/* signal CS already finished */
2346
rc = 0;
2347
goto free_cs_chunk_array;
2348
}
2349
2350
sig_waitcs_cmpl =
2351
container_of(sig_fence, struct hl_cs_compl, base_fence);
2352
2353
staged_cs_with_encaps_signals = !!
2354
(sig_waitcs_cmpl->type == CS_TYPE_DEFAULT &&
2355
(flags & HL_CS_FLAGS_ENCAP_SIGNALS));
2356
2357
if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL &&
2358
!staged_cs_with_encaps_signals) {
2359
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2360
atomic64_inc(&cntr->validation_drop_cnt);
2361
dev_err(hdev->dev,
2362
"CS seq 0x%llx is not of a signal/encaps-signal CS\n",
2363
signal_seq);
2364
hl_fence_put(sig_fence);
2365
rc = -EINVAL;
2366
goto free_cs_chunk_array;
2367
}
2368
2369
if (completion_done(&sig_fence->completion)) {
2370
/* signal CS already finished */
2371
hl_fence_put(sig_fence);
2372
rc = 0;
2373
goto free_cs_chunk_array;
2374
}
2375
}
2376
2377
rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
2378
if (rc) {
2379
if (is_wait_cs)
2380
hl_fence_put(sig_fence);
2381
2382
goto free_cs_chunk_array;
2383
}
2384
2385
/*
2386
* Save the signal CS fence for later initialization right before
2387
* hanging the wait CS on the queue.
2388
* for encaps signals case, we save the cs sequence and handle pointer
2389
* for later initialization.
2390
*/
2391
if (is_wait_cs) {
2392
cs->signal_fence = sig_fence;
2393
/* store the handle pointer, so we don't have to
2394
* look for it again, later on the flow
2395
* when we need to set SOB info in hw_queue.
2396
*/
2397
if (cs->encaps_signals)
2398
cs->encaps_sig_hdl = encaps_sig_hdl;
2399
}
2400
2401
hl_debugfs_add_cs(cs);
2402
2403
*cs_seq = cs->sequence;
2404
2405
if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
2406
rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
2407
q_idx, chunk->encaps_signal_offset);
2408
else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
2409
rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
2410
cs, q_idx, collective_engine_id,
2411
chunk->encaps_signal_offset);
2412
else {
2413
atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
2414
atomic64_inc(&cntr->validation_drop_cnt);
2415
rc = -EINVAL;
2416
}
2417
2418
if (rc)
2419
goto free_cs_object;
2420
2421
if (q_type == QUEUE_TYPE_HW)
2422
INIT_WORK(&cs->finish_work, cs_completion);
2423
2424
rc = hl_hw_queue_schedule_cs(cs);
2425
if (rc) {
2426
/* In case wait cs failed here, it means the signal cs
2427
* already completed. we want to free all it's related objects
2428
* but we don't want to fail the ioctl.
2429
*/
2430
if (is_wait_cs)
2431
rc = 0;
2432
else if (rc != -EAGAIN)
2433
dev_err(hdev->dev,
2434
"Failed to submit CS %d.%llu to H/W queues, error %d\n",
2435
ctx->asid, cs->sequence, rc);
2436
goto free_cs_object;
2437
}
2438
2439
*signal_sob_addr_offset = cs->sob_addr_offset;
2440
*signal_initial_sob_count = cs->initial_sob_count;
2441
2442
rc = HL_CS_STATUS_SUCCESS;
2443
if (is_wait_cs)
2444
wait_cs_submitted = true;
2445
goto put_cs;
2446
2447
free_cs_object:
2448
cs_rollback(hdev, cs);
2449
*cs_seq = ULLONG_MAX;
2450
/* The path below is both for good and erroneous exits */
2451
put_cs:
2452
/* We finished with the CS in this function, so put the ref */
2453
cs_put(cs);
2454
free_cs_chunk_array:
2455
if (!wait_cs_submitted && cs_encaps_signals && handle_found && is_wait_cs)
2456
kref_put(&encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx);
2457
kfree(cs_chunk_array);
2458
out:
2459
return rc;
2460
}
2461
2462
static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
2463
u32 num_engine_cores, u32 core_command)
2464
{
2465
struct hl_device *hdev = hpriv->hdev;
2466
void __user *engine_cores_arr;
2467
u32 *cores;
2468
int rc;
2469
2470
if (!hdev->asic_prop.supports_engine_modes)
2471
return -EPERM;
2472
2473
if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
2474
dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
2475
return -EINVAL;
2476
}
2477
2478
if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) {
2479
dev_err(hdev->dev, "Engine core command is invalid\n");
2480
return -EINVAL;
2481
}
2482
2483
engine_cores_arr = (void __user *) (uintptr_t) engine_cores;
2484
cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL);
2485
if (!cores)
2486
return -ENOMEM;
2487
2488
if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) {
2489
dev_err(hdev->dev, "Failed to copy core-ids array from user\n");
2490
kfree(cores);
2491
return -EFAULT;
2492
}
2493
2494
rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command);
2495
kfree(cores);
2496
2497
return rc;
2498
}
2499
2500
static int cs_ioctl_engines(struct hl_fpriv *hpriv, u64 engines_arr_user_addr,
2501
u32 num_engines, enum hl_engine_command command)
2502
{
2503
struct hl_device *hdev = hpriv->hdev;
2504
u32 *engines, max_num_of_engines;
2505
void __user *engines_arr;
2506
int rc;
2507
2508
if (!hdev->asic_prop.supports_engine_modes)
2509
return -EPERM;
2510
2511
if (command >= HL_ENGINE_COMMAND_MAX) {
2512
dev_err(hdev->dev, "Engine command is invalid\n");
2513
return -EINVAL;
2514
}
2515
2516
max_num_of_engines = hdev->asic_prop.max_num_of_engines;
2517
if (command == HL_ENGINE_CORE_RUN || command == HL_ENGINE_CORE_HALT)
2518
max_num_of_engines = hdev->asic_prop.num_engine_cores;
2519
2520
if (!num_engines || num_engines > max_num_of_engines) {
2521
dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines);
2522
return -EINVAL;
2523
}
2524
2525
engines_arr = (void __user *) (uintptr_t) engines_arr_user_addr;
2526
engines = kmalloc_array(num_engines, sizeof(u32), GFP_KERNEL);
2527
if (!engines)
2528
return -ENOMEM;
2529
2530
if (copy_from_user(engines, engines_arr, num_engines * sizeof(u32))) {
2531
dev_err(hdev->dev, "Failed to copy engine-ids array from user\n");
2532
kfree(engines);
2533
return -EFAULT;
2534
}
2535
2536
rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command);
2537
kfree(engines);
2538
2539
return rc;
2540
}
2541
2542
static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
2543
{
2544
struct hl_device *hdev = hpriv->hdev;
2545
struct asic_fixed_properties *prop = &hdev->asic_prop;
2546
2547
if (!prop->hbw_flush_reg) {
2548
dev_dbg(hdev->dev, "HBW flush is not supported\n");
2549
return -EOPNOTSUPP;
2550
}
2551
2552
RREG32(prop->hbw_flush_reg);
2553
2554
return 0;
2555
}
2556
2557
int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
2558
{
2559
struct hl_fpriv *hpriv = file_priv->driver_priv;
2560
union hl_cs_args *args = data;
2561
enum hl_cs_type cs_type = 0;
2562
u64 cs_seq = ULONG_MAX;
2563
void __user *chunks;
2564
u32 num_chunks, flags, timeout,
2565
signals_count = 0, sob_addr = 0, handle_id = 0;
2566
u16 sob_initial_count = 0;
2567
int rc;
2568
2569
rc = hl_cs_sanity_checks(hpriv, args);
2570
if (rc)
2571
goto out;
2572
2573
rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
2574
if (rc)
2575
goto out;
2576
2577
cs_type = hl_cs_get_cs_type(args->in.cs_flags &
2578
~HL_CS_FLAGS_FORCE_RESTORE);
2579
chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
2580
num_chunks = args->in.num_chunks_execute;
2581
flags = args->in.cs_flags;
2582
2583
/* In case this is a staged CS, user should supply the CS sequence */
2584
if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
2585
!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
2586
cs_seq = args->in.seq;
2587
2588
timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
2589
? secs_to_jiffies(args->in.timeout)
2590
: hpriv->hdev->timeout_jiffies;
2591
2592
switch (cs_type) {
2593
case CS_TYPE_SIGNAL:
2594
case CS_TYPE_WAIT:
2595
case CS_TYPE_COLLECTIVE_WAIT:
2596
rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
2597
&cs_seq, args->in.cs_flags, timeout,
2598
&sob_addr, &sob_initial_count);
2599
break;
2600
case CS_RESERVE_SIGNALS:
2601
rc = cs_ioctl_reserve_signals(hpriv,
2602
args->in.encaps_signals_q_idx,
2603
args->in.encaps_signals_count,
2604
&handle_id, &sob_addr, &signals_count);
2605
break;
2606
case CS_UNRESERVE_SIGNALS:
2607
rc = cs_ioctl_unreserve_signals(hpriv,
2608
args->in.encaps_sig_handle_id);
2609
break;
2610
case CS_TYPE_ENGINE_CORE:
2611
rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
2612
args->in.num_engine_cores, args->in.core_command);
2613
break;
2614
case CS_TYPE_ENGINES:
2615
rc = cs_ioctl_engines(hpriv, args->in.engines,
2616
args->in.num_engines, args->in.engine_command);
2617
break;
2618
case CS_TYPE_FLUSH_PCI_HBW_WRITES:
2619
rc = cs_ioctl_flush_pci_hbw_writes(hpriv);
2620
break;
2621
default:
2622
rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
2623
args->in.cs_flags,
2624
args->in.encaps_sig_handle_id,
2625
timeout, &sob_initial_count);
2626
break;
2627
}
2628
out:
2629
if (rc != -EAGAIN) {
2630
memset(args, 0, sizeof(*args));
2631
2632
switch (cs_type) {
2633
case CS_RESERVE_SIGNALS:
2634
args->out.handle_id = handle_id;
2635
args->out.sob_base_addr_offset = sob_addr;
2636
args->out.count = signals_count;
2637
break;
2638
case CS_TYPE_SIGNAL:
2639
args->out.sob_base_addr_offset = sob_addr;
2640
args->out.sob_count_before_submission = sob_initial_count;
2641
args->out.seq = cs_seq;
2642
break;
2643
case CS_TYPE_DEFAULT:
2644
args->out.sob_count_before_submission = sob_initial_count;
2645
args->out.seq = cs_seq;
2646
break;
2647
default:
2648
args->out.seq = cs_seq;
2649
break;
2650
}
2651
2652
args->out.status = rc;
2653
}
2654
2655
return rc;
2656
}
2657
2658
static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence,
2659
enum hl_cs_wait_status *status, u64 timeout_us, s64 *timestamp)
2660
{
2661
struct hl_device *hdev = ctx->hdev;
2662
ktime_t timestamp_kt;
2663
long completion_rc;
2664
int rc = 0, error;
2665
2666
if (IS_ERR(fence)) {
2667
rc = PTR_ERR(fence);
2668
if (rc == -EINVAL)
2669
dev_notice_ratelimited(hdev->dev,
2670
"Can't wait on CS %llu because current CS is at seq %llu\n",
2671
seq, ctx->cs_sequence);
2672
return rc;
2673
}
2674
2675
if (!fence) {
2676
if (!hl_pop_cs_outcome(&ctx->outcome_store, seq, &timestamp_kt, &error)) {
2677
dev_dbg(hdev->dev,
2678
"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
2679
seq, ctx->cs_sequence);
2680
*status = CS_WAIT_STATUS_GONE;
2681
return 0;
2682
}
2683
2684
completion_rc = 1;
2685
goto report_results;
2686
}
2687
2688
if (!timeout_us) {
2689
completion_rc = completion_done(&fence->completion);
2690
} else {
2691
unsigned long timeout;
2692
2693
timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ?
2694
timeout_us : usecs_to_jiffies(timeout_us);
2695
completion_rc =
2696
wait_for_completion_interruptible_timeout(
2697
&fence->completion, timeout);
2698
}
2699
2700
error = fence->error;
2701
timestamp_kt = fence->timestamp;
2702
2703
report_results:
2704
if (completion_rc > 0) {
2705
*status = CS_WAIT_STATUS_COMPLETED;
2706
if (timestamp)
2707
*timestamp = ktime_to_ns(timestamp_kt);
2708
} else {
2709
*status = CS_WAIT_STATUS_BUSY;
2710
}
2711
2712
if (completion_rc == -ERESTARTSYS)
2713
rc = completion_rc;
2714
else if (error == -ETIMEDOUT || error == -EIO)
2715
rc = error;
2716
2717
return rc;
2718
}
2719
2720
/*
2721
* hl_cs_poll_fences - iterate CS fences to check for CS completion
2722
*
2723
* @mcs_data: multi-CS internal data
2724
* @mcs_compl: multi-CS completion structure
2725
*
2726
* @return 0 on success, otherwise non 0 error code
2727
*
2728
* The function iterates on all CS sequence in the list and set bit in
2729
* completion_bitmap for each completed CS.
2730
* While iterating, the function sets the stream map of each fence in the fence
2731
* array in the completion QID stream map to be used by CSs to perform
2732
* completion to the multi-CS context.
2733
* This function shall be called after taking context ref
2734
*/
2735
static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl)
2736
{
2737
struct hl_fence **fence_ptr = mcs_data->fence_arr;
2738
struct hl_device *hdev = mcs_data->ctx->hdev;
2739
int i, rc, arr_len = mcs_data->arr_len;
2740
u64 *seq_arr = mcs_data->seq_arr;
2741
ktime_t max_ktime, first_cs_time;
2742
enum hl_cs_wait_status status;
2743
2744
memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *));
2745
2746
/* get all fences under the same lock */
2747
rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
2748
if (rc)
2749
return rc;
2750
2751
/*
2752
* re-initialize the completion here to handle 2 possible cases:
2753
* 1. CS will complete the multi-CS prior clearing the completion. in which
2754
* case the fence iteration is guaranteed to catch the CS completion.
2755
* 2. the completion will occur after re-init of the completion.
2756
* in which case we will wake up immediately in wait_for_completion.
2757
*/
2758
reinit_completion(&mcs_compl->completion);
2759
2760
/*
2761
* set to maximum time to verify timestamp is valid: if at the end
2762
* this value is maintained- no timestamp was updated
2763
*/
2764
max_ktime = ktime_set(KTIME_SEC_MAX, 0);
2765
first_cs_time = max_ktime;
2766
2767
for (i = 0; i < arr_len; i++, fence_ptr++) {
2768
struct hl_fence *fence = *fence_ptr;
2769
2770
/*
2771
* In order to prevent case where we wait until timeout even though a CS associated
2772
* with the multi-CS actually completed we do things in the below order:
2773
* 1. for each fence set it's QID map in the multi-CS completion QID map. This way
2774
* any CS can, potentially, complete the multi CS for the specific QID (note
2775
* that once completion is initialized, calling complete* and then wait on the
2776
* completion will cause it to return at once)
2777
* 2. only after allowing multi-CS completion for the specific QID we check whether
2778
* the specific CS already completed (and thus the wait for completion part will
2779
* be skipped). if the CS not completed it is guaranteed that completing CS will
2780
* wake up the completion.
2781
*/
2782
if (fence)
2783
mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map;
2784
2785
/*
2786
* function won't sleep as it is called with timeout 0 (i.e.
2787
* poll the fence)
2788
*/
2789
rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence, &status, 0, NULL);
2790
if (rc) {
2791
dev_err(hdev->dev,
2792
"wait_for_fence error :%d for CS seq %llu\n",
2793
rc, seq_arr[i]);
2794
break;
2795
}
2796
2797
switch (status) {
2798
case CS_WAIT_STATUS_BUSY:
2799
/* CS did not finished, QID to wait on already stored */
2800
break;
2801
case CS_WAIT_STATUS_COMPLETED:
2802
/*
2803
* Using mcs_handling_done to avoid possibility of mcs_data
2804
* returns to user indicating CS completed before it finished
2805
* all of its mcs handling, to avoid race the next time the
2806
* user waits for mcs.
2807
* note: when reaching this case fence is definitely not NULL
2808
* but NULL check was added to overcome static analysis
2809
*/
2810
if (fence && !fence->mcs_handling_done) {
2811
/*
2812
* in case multi CS is completed but MCS handling not done
2813
* we "complete" the multi CS to prevent it from waiting
2814
* until time-out and the "multi-CS handling done" will have
2815
* another chance at the next iteration
2816
*/
2817
complete_all(&mcs_compl->completion);
2818
break;
2819
}
2820
2821
mcs_data->completion_bitmap |= BIT(i);
2822
/*
2823
* For all completed CSs we take the earliest timestamp.
2824
* For this we have to validate that the timestamp is
2825
* earliest of all timestamps so far.
2826
*/
2827
if (fence && mcs_data->update_ts &&
2828
(ktime_compare(fence->timestamp, first_cs_time) < 0))
2829
first_cs_time = fence->timestamp;
2830
break;
2831
case CS_WAIT_STATUS_GONE:
2832
mcs_data->update_ts = false;
2833
mcs_data->gone_cs = true;
2834
/*
2835
* It is possible to get an old sequence numbers from user
2836
* which related to already completed CSs and their fences
2837
* already gone. In this case, CS set as completed but
2838
* no need to consider its QID for mcs completion.
2839
*/
2840
mcs_data->completion_bitmap |= BIT(i);
2841
break;
2842
default:
2843
dev_err(hdev->dev, "Invalid fence status\n");
2844
rc = -EINVAL;
2845
break;
2846
}
2847
2848
}
2849
2850
hl_fences_put(mcs_data->fence_arr, arr_len);
2851
2852
if (mcs_data->update_ts &&
2853
(ktime_compare(first_cs_time, max_ktime) != 0))
2854
mcs_data->timestamp = ktime_to_ns(first_cs_time);
2855
2856
return rc;
2857
}
2858
2859
static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
2860
enum hl_cs_wait_status *status, s64 *timestamp)
2861
{
2862
struct hl_fence *fence;
2863
int rc = 0;
2864
2865
if (timestamp)
2866
*timestamp = 0;
2867
2868
hl_ctx_get(ctx);
2869
2870
fence = hl_ctx_get_fence(ctx, seq);
2871
2872
rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp);
2873
hl_fence_put(fence);
2874
hl_ctx_put(ctx);
2875
2876
return rc;
2877
}
2878
2879
static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
2880
{
2881
if (usecs <= U32_MAX)
2882
return usecs_to_jiffies(usecs);
2883
2884
/*
2885
* If the value in nanoseconds is larger than 64 bit, use the largest
2886
* 64 bit value.
2887
*/
2888
if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
2889
return nsecs_to_jiffies(U64_MAX);
2890
2891
return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
2892
}
2893
2894
/*
2895
* hl_wait_multi_cs_completion_init - init completion structure
2896
*
2897
* @hdev: pointer to habanalabs device structure
2898
* @stream_master_bitmap: stream master QIDs map, set bit indicates stream
2899
* master QID to wait on
2900
*
2901
* @return valid completion struct pointer on success, otherwise error pointer
2902
*
2903
* up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver.
2904
* the function gets the first available completion (by marking it "used")
2905
* and initialize its values.
2906
*/
2907
static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev)
2908
{
2909
struct multi_cs_completion *mcs_compl;
2910
int i;
2911
2912
/* find free multi_cs completion structure */
2913
for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2914
mcs_compl = &hdev->multi_cs_completion[i];
2915
spin_lock(&mcs_compl->lock);
2916
if (!mcs_compl->used) {
2917
mcs_compl->used = 1;
2918
mcs_compl->timestamp = 0;
2919
/*
2920
* init QID map to 0 to avoid completion by CSs. the actual QID map
2921
* to multi-CS CSs will be set incrementally at a later stage
2922
*/
2923
mcs_compl->stream_master_qid_map = 0;
2924
spin_unlock(&mcs_compl->lock);
2925
break;
2926
}
2927
spin_unlock(&mcs_compl->lock);
2928
}
2929
2930
if (i == MULTI_CS_MAX_USER_CTX) {
2931
dev_err(hdev->dev, "no available multi-CS completion structure\n");
2932
return ERR_PTR(-ENOMEM);
2933
}
2934
return mcs_compl;
2935
}
2936
2937
/*
2938
* hl_wait_multi_cs_completion_fini - return completion structure and set as
2939
* unused
2940
*
2941
* @mcs_compl: pointer to the completion structure
2942
*/
2943
static void hl_wait_multi_cs_completion_fini(
2944
struct multi_cs_completion *mcs_compl)
2945
{
2946
/*
2947
* free completion structure, do it under lock to be in-sync with the
2948
* thread that signals completion
2949
*/
2950
spin_lock(&mcs_compl->lock);
2951
mcs_compl->used = 0;
2952
spin_unlock(&mcs_compl->lock);
2953
}
2954
2955
/*
2956
* hl_wait_multi_cs_completion - wait for first CS to complete
2957
*
2958
* @mcs_data: multi-CS internal data
2959
*
2960
* @return 0 on success, otherwise non 0 error code
2961
*/
2962
static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
2963
struct multi_cs_completion *mcs_compl)
2964
{
2965
long completion_rc;
2966
2967
completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
2968
mcs_data->timeout_jiffies);
2969
2970
/* update timestamp */
2971
if (completion_rc > 0)
2972
mcs_data->timestamp = mcs_compl->timestamp;
2973
2974
if (completion_rc == -ERESTARTSYS)
2975
return completion_rc;
2976
2977
mcs_data->wait_status = completion_rc;
2978
2979
return 0;
2980
}
2981
2982
/*
2983
* hl_multi_cs_completion_init - init array of multi-CS completion structures
2984
*
2985
* @hdev: pointer to habanalabs device structure
2986
*/
2987
void hl_multi_cs_completion_init(struct hl_device *hdev)
2988
{
2989
struct multi_cs_completion *mcs_cmpl;
2990
int i;
2991
2992
for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
2993
mcs_cmpl = &hdev->multi_cs_completion[i];
2994
mcs_cmpl->used = 0;
2995
spin_lock_init(&mcs_cmpl->lock);
2996
init_completion(&mcs_cmpl->completion);
2997
}
2998
}
2999
3000
/*
3001
* hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
3002
*
3003
* @hpriv: pointer to the private data of the fd
3004
* @data: pointer to multi-CS wait ioctl in/out args
3005
*
3006
*/
3007
static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3008
{
3009
struct multi_cs_completion *mcs_compl;
3010
struct hl_device *hdev = hpriv->hdev;
3011
struct multi_cs_data mcs_data = {};
3012
union hl_wait_cs_args *args = data;
3013
struct hl_ctx *ctx = hpriv->ctx;
3014
struct hl_fence **fence_arr;
3015
void __user *seq_arr;
3016
u32 size_to_copy;
3017
u64 *cs_seq_arr;
3018
u8 seq_arr_len;
3019
int rc, i;
3020
3021
for (i = 0 ; i < sizeof(args->in.pad) ; i++)
3022
if (args->in.pad[i]) {
3023
dev_dbg(hdev->dev, "Padding bytes must be 0\n");
3024
return -EINVAL;
3025
}
3026
3027
if (!hdev->supports_wait_for_multi_cs) {
3028
dev_err(hdev->dev, "Wait for multi CS is not supported\n");
3029
return -EPERM;
3030
}
3031
3032
seq_arr_len = args->in.seq_arr_len;
3033
3034
if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) {
3035
dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n",
3036
HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len);
3037
return -EINVAL;
3038
}
3039
3040
/* allocate memory for sequence array */
3041
cs_seq_arr =
3042
kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL);
3043
if (!cs_seq_arr)
3044
return -ENOMEM;
3045
3046
/* copy CS sequence array from user */
3047
seq_arr = (void __user *) (uintptr_t) args->in.seq;
3048
size_to_copy = seq_arr_len * sizeof(*cs_seq_arr);
3049
if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) {
3050
dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n");
3051
rc = -EFAULT;
3052
goto free_seq_arr;
3053
}
3054
3055
/* allocate array for the fences */
3056
fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL);
3057
if (!fence_arr) {
3058
rc = -ENOMEM;
3059
goto free_seq_arr;
3060
}
3061
3062
/* initialize the multi-CS internal data */
3063
mcs_data.ctx = ctx;
3064
mcs_data.seq_arr = cs_seq_arr;
3065
mcs_data.fence_arr = fence_arr;
3066
mcs_data.arr_len = seq_arr_len;
3067
3068
hl_ctx_get(ctx);
3069
3070
/* wait (with timeout) for the first CS to be completed */
3071
mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
3072
mcs_compl = hl_wait_multi_cs_completion_init(hdev);
3073
if (IS_ERR(mcs_compl)) {
3074
rc = PTR_ERR(mcs_compl);
3075
goto put_ctx;
3076
}
3077
3078
/* poll all CS fences, extract timestamp */
3079
mcs_data.update_ts = true;
3080
rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
3081
/*
3082
* skip wait for CS completion when one of the below is true:
3083
* - an error on the poll function
3084
* - one or more CS in the list completed
3085
* - the user called ioctl with timeout 0
3086
*/
3087
if (rc || mcs_data.completion_bitmap || !args->in.timeout_us)
3088
goto completion_fini;
3089
3090
while (true) {
3091
rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl);
3092
if (rc || (mcs_data.wait_status == 0))
3093
break;
3094
3095
/*
3096
* poll fences once again to update the CS map.
3097
* no timestamp should be updated this time.
3098
*/
3099
mcs_data.update_ts = false;
3100
rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
3101
3102
if (rc || mcs_data.completion_bitmap)
3103
break;
3104
3105
/*
3106
* if hl_wait_multi_cs_completion returned before timeout (i.e.
3107
* it got a completion) it either got completed by CS in the multi CS list
3108
* (in which case the indication will be non empty completion_bitmap) or it
3109
* got completed by CS submitted to one of the shared stream master but
3110
* not in the multi CS list (in which case we should wait again but modify
3111
* the timeout and set timestamp as zero to let a CS related to the current
3112
* multi-CS set a new, relevant, timestamp)
3113
*/
3114
mcs_data.timeout_jiffies = mcs_data.wait_status;
3115
mcs_compl->timestamp = 0;
3116
}
3117
3118
completion_fini:
3119
hl_wait_multi_cs_completion_fini(mcs_compl);
3120
3121
put_ctx:
3122
hl_ctx_put(ctx);
3123
kfree(fence_arr);
3124
3125
free_seq_arr:
3126
kfree(cs_seq_arr);
3127
3128
if (rc == -ERESTARTSYS) {
3129
dev_err_ratelimited(hdev->dev,
3130
"user process got signal while waiting for Multi-CS\n");
3131
rc = -EINTR;
3132
}
3133
3134
if (rc)
3135
return rc;
3136
3137
/* update output args */
3138
memset(args, 0, sizeof(*args));
3139
3140
if (mcs_data.completion_bitmap) {
3141
args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
3142
args->out.cs_completion_map = mcs_data.completion_bitmap;
3143
3144
/* if timestamp not 0- it's valid */
3145
if (mcs_data.timestamp) {
3146
args->out.timestamp_nsec = mcs_data.timestamp;
3147
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3148
}
3149
3150
/* update if some CS was gone */
3151
if (!mcs_data.timestamp)
3152
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
3153
} else {
3154
args->out.status = HL_WAIT_CS_STATUS_BUSY;
3155
}
3156
3157
return 0;
3158
}
3159
3160
static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3161
{
3162
struct hl_device *hdev = hpriv->hdev;
3163
union hl_wait_cs_args *args = data;
3164
enum hl_cs_wait_status status;
3165
u64 seq = args->in.seq;
3166
s64 timestamp;
3167
int rc;
3168
3169
rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, &status, &timestamp);
3170
3171
if (rc == -ERESTARTSYS) {
3172
dev_err_ratelimited(hdev->dev,
3173
"user process got signal while waiting for CS handle %llu\n",
3174
seq);
3175
return -EINTR;
3176
}
3177
3178
memset(args, 0, sizeof(*args));
3179
3180
if (rc) {
3181
if (rc == -ETIMEDOUT) {
3182
dev_err_ratelimited(hdev->dev,
3183
"CS %llu has timed-out while user process is waiting for it\n",
3184
seq);
3185
args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
3186
} else if (rc == -EIO) {
3187
dev_err_ratelimited(hdev->dev,
3188
"CS %llu has been aborted while user process is waiting for it\n",
3189
seq);
3190
args->out.status = HL_WAIT_CS_STATUS_ABORTED;
3191
}
3192
return rc;
3193
}
3194
3195
if (timestamp) {
3196
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3197
args->out.timestamp_nsec = timestamp;
3198
}
3199
3200
switch (status) {
3201
case CS_WAIT_STATUS_GONE:
3202
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
3203
fallthrough;
3204
case CS_WAIT_STATUS_COMPLETED:
3205
args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
3206
break;
3207
case CS_WAIT_STATUS_BUSY:
3208
default:
3209
args->out.status = HL_WAIT_CS_STATUS_BUSY;
3210
break;
3211
}
3212
3213
return 0;
3214
}
3215
3216
static inline void set_record_cq_info(struct hl_user_pending_interrupt *record,
3217
struct hl_cb *cq_cb, u32 cq_offset, u32 target_value)
3218
{
3219
record->ts_reg_info.cq_cb = cq_cb;
3220
record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset;
3221
record->cq_target_value = target_value;
3222
}
3223
3224
static int validate_and_get_ts_record(struct device *dev,
3225
struct hl_ts_buff *ts_buff, u64 ts_offset,
3226
struct hl_user_pending_interrupt **req_event_record)
3227
{
3228
struct hl_user_pending_interrupt *ts_cb_last;
3229
3230
*req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
3231
ts_offset;
3232
ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
3233
(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
3234
3235
/* Validate ts_offset not exceeding last max */
3236
if (*req_event_record >= ts_cb_last) {
3237
dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n",
3238
ts_offset, (u64)(uintptr_t)ts_cb_last);
3239
return -EINVAL;
3240
}
3241
3242
return 0;
3243
}
3244
3245
static void unregister_timestamp_node(struct hl_device *hdev,
3246
struct hl_user_pending_interrupt *record, bool need_lock)
3247
{
3248
struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt;
3249
bool ts_rec_found = false;
3250
unsigned long flags;
3251
3252
if (need_lock)
3253
spin_lock_irqsave(&interrupt->ts_list_lock, flags);
3254
3255
if (record->ts_reg_info.in_use) {
3256
record->ts_reg_info.in_use = false;
3257
list_del(&record->list_node);
3258
ts_rec_found = true;
3259
}
3260
3261
if (need_lock)
3262
spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
3263
3264
/* Put refcounts that were taken when we registered the event */
3265
if (ts_rec_found) {
3266
hl_mmap_mem_buf_put(record->ts_reg_info.buf);
3267
hl_cb_put(record->ts_reg_info.cq_cb);
3268
}
3269
}
3270
3271
static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
3272
struct wait_interrupt_data *data, unsigned long *flags,
3273
struct hl_user_pending_interrupt **pend)
3274
{
3275
struct hl_user_pending_interrupt *req_offset_record;
3276
struct hl_ts_buff *ts_buff = data->buf->private;
3277
bool need_lock = false;
3278
int rc;
3279
3280
rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset,
3281
&req_offset_record);
3282
if (rc)
3283
return rc;
3284
3285
/* In case the node already registered, need to unregister first then re-use */
3286
if (req_offset_record->ts_reg_info.in_use) {
3287
/*
3288
* Since interrupt here can be different than the one the node currently registered
3289
* on, and we don't want to lock two lists while we're doing unregister, so
3290
* unlock the new interrupt wait list here and acquire the lock again after you done
3291
*/
3292
if (data->interrupt->interrupt_id !=
3293
req_offset_record->ts_reg_info.interrupt->interrupt_id) {
3294
3295
need_lock = true;
3296
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags);
3297
}
3298
3299
unregister_timestamp_node(hdev, req_offset_record, need_lock);
3300
3301
if (need_lock)
3302
spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags);
3303
}
3304
3305
/* Fill up the new registration node info and add it to the list */
3306
req_offset_record->ts_reg_info.in_use = true;
3307
req_offset_record->ts_reg_info.buf = data->buf;
3308
req_offset_record->ts_reg_info.timestamp_kernel_addr =
3309
(u64 *) ts_buff->user_buff_address + data->ts_offset;
3310
req_offset_record->ts_reg_info.interrupt = data->interrupt;
3311
set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset,
3312
data->target_value);
3313
3314
*pend = req_offset_record;
3315
3316
return rc;
3317
}
3318
3319
static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
3320
struct wait_interrupt_data *data,
3321
u32 *status, u64 *timestamp)
3322
{
3323
struct hl_user_pending_interrupt *pend;
3324
unsigned long flags;
3325
int rc = 0;
3326
3327
hl_ctx_get(ctx);
3328
3329
data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
3330
if (!data->cq_cb) {
3331
rc = -EINVAL;
3332
goto put_ctx;
3333
}
3334
3335
/* Validate the cq offset */
3336
if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
3337
((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
3338
rc = -EINVAL;
3339
goto put_cq_cb;
3340
}
3341
3342
data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
3343
if (!data->buf) {
3344
rc = -EINVAL;
3345
goto put_cq_cb;
3346
}
3347
3348
spin_lock_irqsave(&data->interrupt->ts_list_lock, flags);
3349
3350
/* get ts buffer record */
3351
rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend);
3352
if (rc) {
3353
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
3354
goto put_ts_buff;
3355
}
3356
3357
/* We check for completion value as interrupt could have been received
3358
* before we add the timestamp node to the ts list.
3359
*/
3360
if (*pend->cq_kernel_addr >= data->target_value) {
3361
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
3362
3363
pend->ts_reg_info.in_use = 0;
3364
*status = HL_WAIT_CS_STATUS_COMPLETED;
3365
*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
3366
3367
goto put_ts_buff;
3368
}
3369
3370
list_add_tail(&pend->list_node, &data->interrupt->ts_list_head);
3371
spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
3372
3373
rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
3374
3375
hl_ctx_put(ctx);
3376
3377
return rc;
3378
3379
put_ts_buff:
3380
hl_mmap_mem_buf_put(data->buf);
3381
put_cq_cb:
3382
hl_cb_put(data->cq_cb);
3383
put_ctx:
3384
hl_ctx_put(ctx);
3385
3386
return rc;
3387
}
3388
3389
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
3390
struct wait_interrupt_data *data,
3391
u32 *status, u64 *timestamp)
3392
{
3393
struct hl_user_pending_interrupt *pend;
3394
unsigned long timeout, flags;
3395
long completion_rc;
3396
int rc = 0;
3397
3398
timeout = hl_usecs64_to_jiffies(data->intr_timeout_us);
3399
3400
hl_ctx_get(ctx);
3401
3402
data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
3403
if (!data->cq_cb) {
3404
rc = -EINVAL;
3405
goto put_ctx;
3406
}
3407
3408
/* Validate the cq offset */
3409
if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
3410
((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
3411
rc = -EINVAL;
3412
goto put_cq_cb;
3413
}
3414
3415
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
3416
if (!pend) {
3417
rc = -ENOMEM;
3418
goto put_cq_cb;
3419
}
3420
3421
hl_fence_init(&pend->fence, ULONG_MAX);
3422
pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset;
3423
pend->cq_target_value = data->target_value;
3424
spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
3425
3426
3427
/* We check for completion value as interrupt could have been received
3428
* before we add the wait node to the wait list.
3429
*/
3430
if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) {
3431
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
3432
3433
if (*pend->cq_kernel_addr >= data->target_value)
3434
*status = HL_WAIT_CS_STATUS_COMPLETED;
3435
else
3436
*status = HL_WAIT_CS_STATUS_BUSY;
3437
3438
pend->fence.timestamp = ktime_get();
3439
goto set_timestamp;
3440
}
3441
3442
/* Add pending user interrupt to relevant list for the interrupt
3443
* handler to monitor.
3444
* Note that we cannot have sorted list by target value,
3445
* in order to shorten the list pass loop, since
3446
* same list could have nodes for different cq counter handle.
3447
*/
3448
list_add_tail(&pend->list_node, &data->interrupt->wait_list_head);
3449
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
3450
3451
/* Wait for interrupt handler to signal completion */
3452
completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
3453
timeout);
3454
if (completion_rc > 0) {
3455
if (pend->fence.error == -EIO) {
3456
dev_err_ratelimited(hdev->dev,
3457
"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
3458
pend->fence.error);
3459
rc = -EIO;
3460
*status = HL_WAIT_CS_STATUS_ABORTED;
3461
} else {
3462
*status = HL_WAIT_CS_STATUS_COMPLETED;
3463
}
3464
} else {
3465
if (completion_rc == -ERESTARTSYS) {
3466
dev_err_ratelimited(hdev->dev,
3467
"user process got signal while waiting for interrupt ID %d\n",
3468
data->interrupt->interrupt_id);
3469
rc = -EINTR;
3470
*status = HL_WAIT_CS_STATUS_ABORTED;
3471
} else {
3472
/* The wait has timed-out. We don't know anything beyond that
3473
* because the workload was not submitted through the driver.
3474
* Therefore, from driver's perspective, the workload is still
3475
* executing.
3476
*/
3477
rc = 0;
3478
*status = HL_WAIT_CS_STATUS_BUSY;
3479
}
3480
}
3481
3482
/*
3483
* We keep removing the node from list here, and not at the irq handler
3484
* for completion timeout case. and if it's a registration
3485
* for ts record, the node will be deleted in the irq handler after
3486
* we reach the target value.
3487
*/
3488
spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
3489
list_del(&pend->list_node);
3490
spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
3491
3492
set_timestamp:
3493
*timestamp = ktime_to_ns(pend->fence.timestamp);
3494
kfree(pend);
3495
hl_cb_put(data->cq_cb);
3496
hl_ctx_put(ctx);
3497
3498
return rc;
3499
3500
put_cq_cb:
3501
hl_cb_put(data->cq_cb);
3502
put_ctx:
3503
hl_ctx_put(ctx);
3504
3505
return rc;
3506
}
3507
3508
static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx,
3509
u64 timeout_us, u64 user_address,
3510
u64 target_value, struct hl_user_interrupt *interrupt,
3511
u32 *status,
3512
u64 *timestamp)
3513
{
3514
struct hl_user_pending_interrupt *pend;
3515
unsigned long timeout, flags;
3516
u64 completion_value;
3517
long completion_rc;
3518
int rc = 0;
3519
3520
timeout = hl_usecs64_to_jiffies(timeout_us);
3521
3522
hl_ctx_get(ctx);
3523
3524
pend = kzalloc(sizeof(*pend), GFP_KERNEL);
3525
if (!pend) {
3526
hl_ctx_put(ctx);
3527
return -ENOMEM;
3528
}
3529
3530
hl_fence_init(&pend->fence, ULONG_MAX);
3531
3532
/* Add pending user interrupt to relevant list for the interrupt
3533
* handler to monitor
3534
*/
3535
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3536
list_add_tail(&pend->list_node, &interrupt->wait_list_head);
3537
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3538
3539
/* We check for completion value as interrupt could have been received
3540
* before we added the node to the wait list
3541
*/
3542
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
3543
dev_err(hdev->dev, "Failed to copy completion value from user\n");
3544
rc = -EFAULT;
3545
goto remove_pending_user_interrupt;
3546
}
3547
3548
if (completion_value >= target_value) {
3549
*status = HL_WAIT_CS_STATUS_COMPLETED;
3550
/* There was no interrupt, we assume the completion is now. */
3551
pend->fence.timestamp = ktime_get();
3552
} else {
3553
*status = HL_WAIT_CS_STATUS_BUSY;
3554
}
3555
3556
if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
3557
goto remove_pending_user_interrupt;
3558
3559
wait_again:
3560
/* Wait for interrupt handler to signal completion */
3561
completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
3562
timeout);
3563
3564
/* If timeout did not expire we need to perform the comparison.
3565
* If comparison fails, keep waiting until timeout expires
3566
*/
3567
if (completion_rc > 0) {
3568
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3569
/* reinit_completion must be called before we check for user
3570
* completion value, otherwise, if interrupt is received after
3571
* the comparison and before the next wait_for_completion,
3572
* we will reach timeout and fail
3573
*/
3574
reinit_completion(&pend->fence.completion);
3575
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3576
3577
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
3578
dev_err(hdev->dev, "Failed to copy completion value from user\n");
3579
rc = -EFAULT;
3580
3581
goto remove_pending_user_interrupt;
3582
}
3583
3584
if (completion_value >= target_value) {
3585
*status = HL_WAIT_CS_STATUS_COMPLETED;
3586
} else if (pend->fence.error) {
3587
dev_err_ratelimited(hdev->dev,
3588
"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
3589
pend->fence.error);
3590
/* set the command completion status as ABORTED */
3591
*status = HL_WAIT_CS_STATUS_ABORTED;
3592
} else {
3593
timeout = completion_rc;
3594
goto wait_again;
3595
}
3596
} else if (completion_rc == -ERESTARTSYS) {
3597
dev_err_ratelimited(hdev->dev,
3598
"user process got signal while waiting for interrupt ID %d\n",
3599
interrupt->interrupt_id);
3600
rc = -EINTR;
3601
} else {
3602
/* The wait has timed-out. We don't know anything beyond that
3603
* because the workload wasn't submitted through the driver.
3604
* Therefore, from driver's perspective, the workload is still
3605
* executing.
3606
*/
3607
rc = 0;
3608
*status = HL_WAIT_CS_STATUS_BUSY;
3609
}
3610
3611
remove_pending_user_interrupt:
3612
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
3613
list_del(&pend->list_node);
3614
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
3615
3616
*timestamp = ktime_to_ns(pend->fence.timestamp);
3617
3618
kfree(pend);
3619
hl_ctx_put(ctx);
3620
3621
return rc;
3622
}
3623
3624
static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
3625
{
3626
u16 interrupt_id, first_interrupt, last_interrupt;
3627
struct hl_device *hdev = hpriv->hdev;
3628
struct asic_fixed_properties *prop;
3629
struct hl_user_interrupt *interrupt;
3630
union hl_wait_cs_args *args = data;
3631
u32 status = HL_WAIT_CS_STATUS_BUSY;
3632
u64 timestamp = 0;
3633
int rc, int_idx;
3634
3635
prop = &hdev->asic_prop;
3636
3637
if (!(prop->user_interrupt_count + prop->user_dec_intr_count)) {
3638
dev_err(hdev->dev, "no user interrupts allowed");
3639
return -EPERM;
3640
}
3641
3642
interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
3643
3644
first_interrupt = prop->first_available_user_interrupt;
3645
last_interrupt = prop->first_available_user_interrupt + prop->user_interrupt_count - 1;
3646
3647
if (interrupt_id < prop->user_dec_intr_count) {
3648
3649
/* Check if the requested core is enabled */
3650
if (!(prop->decoder_enabled_mask & BIT(interrupt_id))) {
3651
dev_err(hdev->dev, "interrupt on a disabled core(%u) not allowed",
3652
interrupt_id);
3653
return -EINVAL;
3654
}
3655
3656
interrupt = &hdev->user_interrupt[interrupt_id];
3657
3658
} else if (interrupt_id >= first_interrupt && interrupt_id <= last_interrupt) {
3659
3660
int_idx = interrupt_id - first_interrupt + prop->user_dec_intr_count;
3661
interrupt = &hdev->user_interrupt[int_idx];
3662
3663
} else if (interrupt_id == HL_COMMON_USER_CQ_INTERRUPT_ID) {
3664
interrupt = &hdev->common_user_cq_interrupt;
3665
} else if (interrupt_id == HL_COMMON_DEC_INTERRUPT_ID) {
3666
interrupt = &hdev->common_decoder_interrupt;
3667
} else {
3668
dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
3669
return -EINVAL;
3670
}
3671
3672
if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) {
3673
struct wait_interrupt_data wait_intr_data = {0};
3674
3675
wait_intr_data.interrupt = interrupt;
3676
wait_intr_data.mmg = &hpriv->mem_mgr;
3677
wait_intr_data.cq_handle = args->in.cq_counters_handle;
3678
wait_intr_data.cq_offset = args->in.cq_counters_offset;
3679
wait_intr_data.ts_handle = args->in.timestamp_handle;
3680
wait_intr_data.ts_offset = args->in.timestamp_offset;
3681
wait_intr_data.target_value = args->in.target;
3682
wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us;
3683
3684
if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) {
3685
/*
3686
* Allow only one registration at a time. this is needed in order to prevent
3687
* issues while handling the flow of re-use of the same offset.
3688
* Since the registration flow is protected only by the interrupt lock,
3689
* re-use flow might request to move ts node to another interrupt list,
3690
* and in such case we're not protected.
3691
*/
3692
mutex_lock(&hpriv->ctx->ts_reg_lock);
3693
3694
rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data,
3695
&status, &timestamp);
3696
3697
mutex_unlock(&hpriv->ctx->ts_reg_lock);
3698
} else
3699
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data,
3700
&status, &timestamp);
3701
} else {
3702
rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
3703
args->in.interrupt_timeout_us, args->in.addr,
3704
args->in.target, interrupt, &status,
3705
&timestamp);
3706
}
3707
3708
if (rc)
3709
return rc;
3710
3711
memset(args, 0, sizeof(*args));
3712
args->out.status = status;
3713
3714
if (timestamp) {
3715
args->out.timestamp_nsec = timestamp;
3716
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
3717
}
3718
3719
return 0;
3720
}
3721
3722
int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
3723
{
3724
struct hl_fpriv *hpriv = file_priv->driver_priv;
3725
struct hl_device *hdev = hpriv->hdev;
3726
union hl_wait_cs_args *args = data;
3727
u32 flags = args->in.flags;
3728
int rc;
3729
3730
/* If the device is not operational, or if an error has happened and user should release the
3731
* device, there is no point in waiting for any command submission or user interrupt.
3732
*/
3733
if (!hl_device_operational(hpriv->hdev, NULL) || hdev->reset_info.watchdog_active)
3734
return -EBUSY;
3735
3736
if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
3737
rc = hl_interrupt_wait_ioctl(hpriv, data);
3738
else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS)
3739
rc = hl_multi_cs_wait_ioctl(hpriv, data);
3740
else
3741
rc = hl_cs_wait_ioctl(hpriv, data);
3742
3743
return rc;
3744
}
3745
3746