Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
26517 views
1
/*
2
* Copyright 2015 Advanced Micro Devices, Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
* OTHER DEALINGS IN THE SOFTWARE.
21
*
22
* Authors: monk liu <[email protected]>
23
*/
24
25
#include <drm/drm_auth.h>
26
#include <drm/drm_drv.h>
27
#include "amdgpu.h"
28
#include "amdgpu_sched.h"
29
#include "amdgpu_ras.h"
30
#include <linux/nospec.h>
31
32
#define to_amdgpu_ctx_entity(e) \
33
container_of((e), struct amdgpu_ctx_entity, entity)
34
35
const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
36
[AMDGPU_HW_IP_GFX] = 1,
37
[AMDGPU_HW_IP_COMPUTE] = 4,
38
[AMDGPU_HW_IP_DMA] = 2,
39
[AMDGPU_HW_IP_UVD] = 1,
40
[AMDGPU_HW_IP_VCE] = 1,
41
[AMDGPU_HW_IP_UVD_ENC] = 1,
42
[AMDGPU_HW_IP_VCN_DEC] = 1,
43
[AMDGPU_HW_IP_VCN_ENC] = 1,
44
[AMDGPU_HW_IP_VCN_JPEG] = 1,
45
[AMDGPU_HW_IP_VPE] = 1,
46
};
47
48
bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
49
{
50
switch (ctx_prio) {
51
case AMDGPU_CTX_PRIORITY_VERY_LOW:
52
case AMDGPU_CTX_PRIORITY_LOW:
53
case AMDGPU_CTX_PRIORITY_NORMAL:
54
case AMDGPU_CTX_PRIORITY_HIGH:
55
case AMDGPU_CTX_PRIORITY_VERY_HIGH:
56
return true;
57
default:
58
case AMDGPU_CTX_PRIORITY_UNSET:
59
/* UNSET priority is not valid and we don't carry that
60
* around, but set it to NORMAL in the only place this
61
* function is called, amdgpu_ctx_ioctl().
62
*/
63
return false;
64
}
65
}
66
67
static enum drm_sched_priority
68
amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
69
{
70
switch (ctx_prio) {
71
case AMDGPU_CTX_PRIORITY_UNSET:
72
pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL");
73
return DRM_SCHED_PRIORITY_NORMAL;
74
75
case AMDGPU_CTX_PRIORITY_VERY_LOW:
76
return DRM_SCHED_PRIORITY_LOW;
77
78
case AMDGPU_CTX_PRIORITY_LOW:
79
return DRM_SCHED_PRIORITY_LOW;
80
81
case AMDGPU_CTX_PRIORITY_NORMAL:
82
return DRM_SCHED_PRIORITY_NORMAL;
83
84
case AMDGPU_CTX_PRIORITY_HIGH:
85
return DRM_SCHED_PRIORITY_HIGH;
86
87
case AMDGPU_CTX_PRIORITY_VERY_HIGH:
88
return DRM_SCHED_PRIORITY_HIGH;
89
90
/* This should not happen as we sanitized userspace provided priority
91
* already, WARN if this happens.
92
*/
93
default:
94
WARN(1, "Invalid context priority %d\n", ctx_prio);
95
return DRM_SCHED_PRIORITY_NORMAL;
96
}
97
98
}
99
100
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
101
int32_t priority)
102
{
103
/* NORMAL and below are accessible by everyone */
104
if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
105
return 0;
106
107
if (capable(CAP_SYS_NICE))
108
return 0;
109
110
if (drm_is_current_master(filp))
111
return 0;
112
113
return -EACCES;
114
}
115
116
static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
117
{
118
switch (prio) {
119
case AMDGPU_CTX_PRIORITY_HIGH:
120
case AMDGPU_CTX_PRIORITY_VERY_HIGH:
121
return AMDGPU_GFX_PIPE_PRIO_HIGH;
122
default:
123
return AMDGPU_GFX_PIPE_PRIO_NORMAL;
124
}
125
}
126
127
static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio)
128
{
129
switch (prio) {
130
case AMDGPU_CTX_PRIORITY_HIGH:
131
return AMDGPU_RING_PRIO_1;
132
case AMDGPU_CTX_PRIORITY_VERY_HIGH:
133
return AMDGPU_RING_PRIO_2;
134
default:
135
return AMDGPU_RING_PRIO_0;
136
}
137
}
138
139
static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
140
{
141
struct amdgpu_device *adev = ctx->mgr->adev;
142
unsigned int hw_prio;
143
int32_t ctx_prio;
144
145
ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
146
ctx->init_priority : ctx->override_priority;
147
148
switch (hw_ip) {
149
case AMDGPU_HW_IP_GFX:
150
case AMDGPU_HW_IP_COMPUTE:
151
hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio);
152
break;
153
case AMDGPU_HW_IP_VCE:
154
case AMDGPU_HW_IP_VCN_ENC:
155
hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio);
156
break;
157
default:
158
hw_prio = AMDGPU_RING_PRIO_DEFAULT;
159
break;
160
}
161
162
hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
163
if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
164
hw_prio = AMDGPU_RING_PRIO_DEFAULT;
165
166
return hw_prio;
167
}
168
169
/* Calculate the time spend on the hw */
170
static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
171
{
172
struct drm_sched_fence *s_fence;
173
174
if (!fence)
175
return ns_to_ktime(0);
176
177
/* When the fence is not even scheduled it can't have spend time */
178
s_fence = to_drm_sched_fence(fence);
179
if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
180
return ns_to_ktime(0);
181
182
/* When it is still running account how much already spend */
183
if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
184
return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
185
186
return ktime_sub(s_fence->finished.timestamp,
187
s_fence->scheduled.timestamp);
188
}
189
190
static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
191
struct amdgpu_ctx_entity *centity)
192
{
193
ktime_t res = ns_to_ktime(0);
194
uint32_t i;
195
196
spin_lock(&ctx->ring_lock);
197
for (i = 0; i < amdgpu_sched_jobs; i++) {
198
res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
199
}
200
spin_unlock(&ctx->ring_lock);
201
return res;
202
}
203
204
static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
205
const u32 ring)
206
{
207
struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
208
struct amdgpu_device *adev = ctx->mgr->adev;
209
struct amdgpu_ctx_entity *entity;
210
enum drm_sched_priority drm_prio;
211
unsigned int hw_prio, num_scheds;
212
int32_t ctx_prio;
213
int r;
214
215
entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
216
GFP_KERNEL);
217
if (!entity)
218
return -ENOMEM;
219
220
ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
221
ctx->init_priority : ctx->override_priority;
222
entity->hw_ip = hw_ip;
223
entity->sequence = 1;
224
hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
225
drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
226
227
hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
228
229
if (!(adev)->xcp_mgr) {
230
scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
231
num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
232
} else {
233
struct amdgpu_fpriv *fpriv;
234
235
fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr);
236
r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv,
237
&num_scheds, &scheds);
238
if (r)
239
goto cleanup_entity;
240
}
241
242
/* disable load balance if the hw engine retains context among dependent jobs */
243
if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
244
hw_ip == AMDGPU_HW_IP_VCN_DEC ||
245
hw_ip == AMDGPU_HW_IP_UVD_ENC ||
246
hw_ip == AMDGPU_HW_IP_UVD) {
247
sched = drm_sched_pick_best(scheds, num_scheds);
248
scheds = &sched;
249
num_scheds = 1;
250
}
251
252
r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds,
253
&ctx->guilty);
254
if (r)
255
goto error_free_entity;
256
257
/* It's not an error if we fail to install the new entity */
258
if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity))
259
goto cleanup_entity;
260
261
return 0;
262
263
cleanup_entity:
264
drm_sched_entity_fini(&entity->entity);
265
266
error_free_entity:
267
kfree(entity);
268
269
return r;
270
}
271
272
static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev,
273
struct amdgpu_ctx_entity *entity)
274
{
275
ktime_t res = ns_to_ktime(0);
276
int i;
277
278
if (!entity)
279
return res;
280
281
for (i = 0; i < amdgpu_sched_jobs; ++i) {
282
res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
283
dma_fence_put(entity->fences[i]);
284
}
285
286
amdgpu_xcp_release_sched(adev, entity);
287
288
kfree(entity);
289
return res;
290
}
291
292
static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
293
u32 *stable_pstate)
294
{
295
struct amdgpu_device *adev = ctx->mgr->adev;
296
enum amd_dpm_forced_level current_level;
297
298
current_level = amdgpu_dpm_get_performance_level(adev);
299
300
switch (current_level) {
301
case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
302
*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD;
303
break;
304
case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
305
*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
306
break;
307
case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
308
*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
309
break;
310
case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
311
*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK;
312
break;
313
default:
314
*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
315
break;
316
}
317
return 0;
318
}
319
320
static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
321
struct drm_file *filp, struct amdgpu_ctx *ctx)
322
{
323
struct amdgpu_fpriv *fpriv = filp->driver_priv;
324
u32 current_stable_pstate;
325
int r;
326
327
r = amdgpu_ctx_priority_permit(filp, priority);
328
if (r)
329
return r;
330
331
memset(ctx, 0, sizeof(*ctx));
332
333
kref_init(&ctx->refcount);
334
ctx->mgr = mgr;
335
spin_lock_init(&ctx->ring_lock);
336
337
ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
338
ctx->reset_counter_query = ctx->reset_counter;
339
ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm);
340
ctx->init_priority = priority;
341
ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
342
343
r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
344
if (r)
345
return r;
346
347
if (mgr->adev->pm.stable_pstate_ctx)
348
ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate;
349
else
350
ctx->stable_pstate = current_stable_pstate;
351
352
ctx->ctx_mgr = &(fpriv->ctx_mgr);
353
return 0;
354
}
355
356
static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
357
u32 stable_pstate)
358
{
359
struct amdgpu_device *adev = ctx->mgr->adev;
360
enum amd_dpm_forced_level level;
361
u32 current_stable_pstate;
362
int r;
363
364
mutex_lock(&adev->pm.stable_pstate_ctx_lock);
365
if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
366
r = -EBUSY;
367
goto done;
368
}
369
370
r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
371
if (r || (stable_pstate == current_stable_pstate))
372
goto done;
373
374
switch (stable_pstate) {
375
case AMDGPU_CTX_STABLE_PSTATE_NONE:
376
level = AMD_DPM_FORCED_LEVEL_AUTO;
377
break;
378
case AMDGPU_CTX_STABLE_PSTATE_STANDARD:
379
level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD;
380
break;
381
case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK:
382
level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK;
383
break;
384
case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK:
385
level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK;
386
break;
387
case AMDGPU_CTX_STABLE_PSTATE_PEAK:
388
level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
389
break;
390
default:
391
r = -EINVAL;
392
goto done;
393
}
394
395
r = amdgpu_dpm_force_performance_level(adev, level);
396
397
if (level == AMD_DPM_FORCED_LEVEL_AUTO)
398
adev->pm.stable_pstate_ctx = NULL;
399
else
400
adev->pm.stable_pstate_ctx = ctx;
401
done:
402
mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
403
404
return r;
405
}
406
407
static void amdgpu_ctx_fini(struct kref *ref)
408
{
409
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
410
struct amdgpu_ctx_mgr *mgr = ctx->mgr;
411
struct amdgpu_device *adev = mgr->adev;
412
unsigned i, j, idx;
413
414
if (!adev)
415
return;
416
417
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
418
for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
419
ktime_t spend;
420
421
spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]);
422
atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
423
}
424
}
425
426
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
427
amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
428
drm_dev_exit(idx);
429
}
430
431
kfree(ctx);
432
}
433
434
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
435
u32 ring, struct drm_sched_entity **entity)
436
{
437
int r;
438
struct drm_sched_entity *ctx_entity;
439
440
if (hw_ip >= AMDGPU_HW_IP_NUM) {
441
DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
442
return -EINVAL;
443
}
444
445
/* Right now all IPs have only one instance - multiple rings. */
446
if (instance != 0) {
447
DRM_DEBUG("invalid ip instance: %d\n", instance);
448
return -EINVAL;
449
}
450
451
if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
452
DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
453
return -EINVAL;
454
}
455
456
if (ctx->entities[hw_ip][ring] == NULL) {
457
r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
458
if (r)
459
return r;
460
}
461
462
ctx_entity = &ctx->entities[hw_ip][ring]->entity;
463
r = drm_sched_entity_error(ctx_entity);
464
if (r) {
465
DRM_DEBUG("error entity %p\n", ctx_entity);
466
return r;
467
}
468
469
*entity = ctx_entity;
470
return 0;
471
}
472
473
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
474
struct amdgpu_fpriv *fpriv,
475
struct drm_file *filp,
476
int32_t priority,
477
uint32_t *id)
478
{
479
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
480
struct amdgpu_ctx *ctx;
481
int r;
482
483
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
484
if (!ctx)
485
return -ENOMEM;
486
487
mutex_lock(&mgr->lock);
488
r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
489
if (r < 0) {
490
mutex_unlock(&mgr->lock);
491
kfree(ctx);
492
return r;
493
}
494
495
*id = (uint32_t)r;
496
r = amdgpu_ctx_init(mgr, priority, filp, ctx);
497
if (r) {
498
idr_remove(&mgr->ctx_handles, *id);
499
*id = 0;
500
kfree(ctx);
501
}
502
mutex_unlock(&mgr->lock);
503
return r;
504
}
505
506
static void amdgpu_ctx_do_release(struct kref *ref)
507
{
508
struct amdgpu_ctx *ctx;
509
u32 i, j;
510
511
ctx = container_of(ref, struct amdgpu_ctx, refcount);
512
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
513
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
514
if (!ctx->entities[i][j])
515
continue;
516
517
drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
518
}
519
}
520
521
amdgpu_ctx_fini(ref);
522
}
523
524
static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
525
{
526
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
527
struct amdgpu_ctx *ctx;
528
529
mutex_lock(&mgr->lock);
530
ctx = idr_remove(&mgr->ctx_handles, id);
531
if (ctx)
532
kref_put(&ctx->refcount, amdgpu_ctx_do_release);
533
mutex_unlock(&mgr->lock);
534
return ctx ? 0 : -EINVAL;
535
}
536
537
static int amdgpu_ctx_query(struct amdgpu_device *adev,
538
struct amdgpu_fpriv *fpriv, uint32_t id,
539
union drm_amdgpu_ctx_out *out)
540
{
541
struct amdgpu_ctx *ctx;
542
struct amdgpu_ctx_mgr *mgr;
543
unsigned reset_counter;
544
545
if (!fpriv)
546
return -EINVAL;
547
548
mgr = &fpriv->ctx_mgr;
549
mutex_lock(&mgr->lock);
550
ctx = idr_find(&mgr->ctx_handles, id);
551
if (!ctx) {
552
mutex_unlock(&mgr->lock);
553
return -EINVAL;
554
}
555
556
/* TODO: these two are always zero */
557
out->state.flags = 0x0;
558
out->state.hangs = 0x0;
559
560
/* determine if a GPU reset has occured since the last call */
561
reset_counter = atomic_read(&adev->gpu_reset_counter);
562
/* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
563
if (ctx->reset_counter_query == reset_counter)
564
out->state.reset_status = AMDGPU_CTX_NO_RESET;
565
else
566
out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
567
ctx->reset_counter_query = reset_counter;
568
569
mutex_unlock(&mgr->lock);
570
return 0;
571
}
572
573
#define AMDGPU_RAS_COUNTE_DELAY_MS 3000
574
575
static int amdgpu_ctx_query2(struct amdgpu_device *adev,
576
struct amdgpu_fpriv *fpriv, uint32_t id,
577
union drm_amdgpu_ctx_out *out)
578
{
579
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
580
struct amdgpu_ctx *ctx;
581
struct amdgpu_ctx_mgr *mgr;
582
583
if (!fpriv)
584
return -EINVAL;
585
586
mgr = &fpriv->ctx_mgr;
587
mutex_lock(&mgr->lock);
588
ctx = idr_find(&mgr->ctx_handles, id);
589
if (!ctx) {
590
mutex_unlock(&mgr->lock);
591
return -EINVAL;
592
}
593
594
out->state.flags = 0x0;
595
out->state.hangs = 0x0;
596
597
if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
598
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
599
600
if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm))
601
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
602
603
if (atomic_read(&ctx->guilty))
604
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
605
606
if (amdgpu_in_reset(adev))
607
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS;
608
609
if (adev->ras_enabled && con) {
610
/* Return the cached values in O(1),
611
* and schedule delayed work to cache
612
* new vaues.
613
*/
614
int ce_count, ue_count;
615
616
ce_count = atomic_read(&con->ras_ce_count);
617
ue_count = atomic_read(&con->ras_ue_count);
618
619
if (ce_count != ctx->ras_counter_ce) {
620
ctx->ras_counter_ce = ce_count;
621
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
622
}
623
624
if (ue_count != ctx->ras_counter_ue) {
625
ctx->ras_counter_ue = ue_count;
626
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
627
}
628
629
schedule_delayed_work(&con->ras_counte_delay_work,
630
msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
631
}
632
633
mutex_unlock(&mgr->lock);
634
return 0;
635
}
636
637
static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
638
struct amdgpu_fpriv *fpriv, uint32_t id,
639
bool set, u32 *stable_pstate)
640
{
641
struct amdgpu_ctx *ctx;
642
struct amdgpu_ctx_mgr *mgr;
643
int r;
644
645
if (!fpriv)
646
return -EINVAL;
647
648
mgr = &fpriv->ctx_mgr;
649
mutex_lock(&mgr->lock);
650
ctx = idr_find(&mgr->ctx_handles, id);
651
if (!ctx) {
652
mutex_unlock(&mgr->lock);
653
return -EINVAL;
654
}
655
656
if (set)
657
r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate);
658
else
659
r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
660
661
mutex_unlock(&mgr->lock);
662
return r;
663
}
664
665
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
666
struct drm_file *filp)
667
{
668
int r;
669
uint32_t id, stable_pstate;
670
int32_t priority;
671
672
union drm_amdgpu_ctx *args = data;
673
struct amdgpu_device *adev = drm_to_adev(dev);
674
struct amdgpu_fpriv *fpriv = filp->driver_priv;
675
676
id = args->in.ctx_id;
677
priority = args->in.priority;
678
679
/* For backwards compatibility, we need to accept ioctls with garbage
680
* in the priority field. Garbage values in the priority field, result
681
* in the priority being set to NORMAL.
682
*/
683
if (!amdgpu_ctx_priority_is_valid(priority))
684
priority = AMDGPU_CTX_PRIORITY_NORMAL;
685
686
switch (args->in.op) {
687
case AMDGPU_CTX_OP_ALLOC_CTX:
688
if (args->in.flags)
689
return -EINVAL;
690
r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
691
args->out.alloc.ctx_id = id;
692
break;
693
case AMDGPU_CTX_OP_FREE_CTX:
694
if (args->in.flags)
695
return -EINVAL;
696
r = amdgpu_ctx_free(fpriv, id);
697
break;
698
case AMDGPU_CTX_OP_QUERY_STATE:
699
if (args->in.flags)
700
return -EINVAL;
701
r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
702
break;
703
case AMDGPU_CTX_OP_QUERY_STATE2:
704
if (args->in.flags)
705
return -EINVAL;
706
r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
707
break;
708
case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
709
if (args->in.flags)
710
return -EINVAL;
711
r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate);
712
if (!r)
713
args->out.pstate.flags = stable_pstate;
714
break;
715
case AMDGPU_CTX_OP_SET_STABLE_PSTATE:
716
if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK)
717
return -EINVAL;
718
stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK;
719
if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK)
720
return -EINVAL;
721
r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate);
722
break;
723
default:
724
return -EINVAL;
725
}
726
727
return r;
728
}
729
730
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
731
{
732
struct amdgpu_ctx *ctx;
733
struct amdgpu_ctx_mgr *mgr;
734
735
if (!fpriv)
736
return NULL;
737
738
mgr = &fpriv->ctx_mgr;
739
740
mutex_lock(&mgr->lock);
741
ctx = idr_find(&mgr->ctx_handles, id);
742
if (ctx)
743
kref_get(&ctx->refcount);
744
mutex_unlock(&mgr->lock);
745
return ctx;
746
}
747
748
int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
749
{
750
if (ctx == NULL)
751
return -EINVAL;
752
753
kref_put(&ctx->refcount, amdgpu_ctx_do_release);
754
return 0;
755
}
756
757
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
758
struct drm_sched_entity *entity,
759
struct dma_fence *fence)
760
{
761
struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
762
uint64_t seq = centity->sequence;
763
struct dma_fence *other = NULL;
764
unsigned idx = 0;
765
766
idx = seq & (amdgpu_sched_jobs - 1);
767
other = centity->fences[idx];
768
WARN_ON(other && !dma_fence_is_signaled(other));
769
770
dma_fence_get(fence);
771
772
spin_lock(&ctx->ring_lock);
773
centity->fences[idx] = fence;
774
centity->sequence++;
775
spin_unlock(&ctx->ring_lock);
776
777
atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
778
&ctx->mgr->time_spend[centity->hw_ip]);
779
780
dma_fence_put(other);
781
return seq;
782
}
783
784
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
785
struct drm_sched_entity *entity,
786
uint64_t seq)
787
{
788
struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
789
struct dma_fence *fence;
790
791
spin_lock(&ctx->ring_lock);
792
793
if (seq == ~0ull)
794
seq = centity->sequence - 1;
795
796
if (seq >= centity->sequence) {
797
spin_unlock(&ctx->ring_lock);
798
return ERR_PTR(-EINVAL);
799
}
800
801
802
if (seq + amdgpu_sched_jobs < centity->sequence) {
803
spin_unlock(&ctx->ring_lock);
804
return NULL;
805
}
806
807
fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
808
spin_unlock(&ctx->ring_lock);
809
810
return fence;
811
}
812
813
static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
814
struct amdgpu_ctx_entity *aentity,
815
int hw_ip,
816
int32_t priority)
817
{
818
struct amdgpu_device *adev = ctx->mgr->adev;
819
unsigned int hw_prio;
820
struct drm_gpu_scheduler **scheds = NULL;
821
unsigned num_scheds;
822
823
/* set sw priority */
824
drm_sched_entity_set_priority(&aentity->entity,
825
amdgpu_ctx_to_drm_sched_prio(priority));
826
827
/* set hw priority */
828
if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) {
829
hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
830
hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
831
scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
832
num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
833
drm_sched_entity_modify_sched(&aentity->entity, scheds,
834
num_scheds);
835
}
836
}
837
838
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
839
int32_t priority)
840
{
841
int32_t ctx_prio;
842
unsigned i, j;
843
844
ctx->override_priority = priority;
845
846
ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
847
ctx->init_priority : ctx->override_priority;
848
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
849
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
850
if (!ctx->entities[i][j])
851
continue;
852
853
amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
854
i, ctx_prio);
855
}
856
}
857
}
858
859
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
860
struct drm_sched_entity *entity)
861
{
862
struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
863
struct dma_fence *other;
864
unsigned idx;
865
long r;
866
867
spin_lock(&ctx->ring_lock);
868
idx = centity->sequence & (amdgpu_sched_jobs - 1);
869
other = dma_fence_get(centity->fences[idx]);
870
spin_unlock(&ctx->ring_lock);
871
872
if (!other)
873
return 0;
874
875
r = dma_fence_wait(other, true);
876
if (r < 0 && r != -ERESTARTSYS)
877
DRM_ERROR("Error (%ld) waiting for fence!\n", r);
878
879
dma_fence_put(other);
880
return r;
881
}
882
883
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
884
struct amdgpu_device *adev)
885
{
886
unsigned int i;
887
888
mgr->adev = adev;
889
mutex_init(&mgr->lock);
890
idr_init_base(&mgr->ctx_handles, 1);
891
892
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
893
atomic64_set(&mgr->time_spend[i], 0);
894
}
895
896
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
897
{
898
struct amdgpu_ctx *ctx;
899
struct idr *idp;
900
uint32_t id, i, j;
901
902
idp = &mgr->ctx_handles;
903
904
mutex_lock(&mgr->lock);
905
idr_for_each_entry(idp, ctx, id) {
906
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
907
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
908
struct drm_sched_entity *entity;
909
910
if (!ctx->entities[i][j])
911
continue;
912
913
entity = &ctx->entities[i][j]->entity;
914
timeout = drm_sched_entity_flush(entity, timeout);
915
}
916
}
917
}
918
mutex_unlock(&mgr->lock);
919
return timeout;
920
}
921
922
static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
923
{
924
struct amdgpu_ctx *ctx;
925
struct idr *idp;
926
uint32_t id, i, j;
927
928
idp = &mgr->ctx_handles;
929
930
idr_for_each_entry(idp, ctx, id) {
931
if (kref_read(&ctx->refcount) != 1) {
932
DRM_ERROR("ctx %p is still alive\n", ctx);
933
continue;
934
}
935
936
for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
937
for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
938
struct drm_sched_entity *entity;
939
940
if (!ctx->entities[i][j])
941
continue;
942
943
entity = &ctx->entities[i][j]->entity;
944
drm_sched_entity_fini(entity);
945
}
946
}
947
kref_put(&ctx->refcount, amdgpu_ctx_fini);
948
}
949
}
950
951
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
952
{
953
amdgpu_ctx_mgr_entity_fini(mgr);
954
idr_destroy(&mgr->ctx_handles);
955
mutex_destroy(&mgr->lock);
956
}
957
958
void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
959
ktime_t usage[AMDGPU_HW_IP_NUM])
960
{
961
struct amdgpu_ctx *ctx;
962
unsigned int hw_ip, i;
963
uint32_t id;
964
965
/*
966
* This is a little bit racy because it can be that a ctx or a fence are
967
* destroyed just in the moment we try to account them. But that is ok
968
* since exactly that case is explicitely allowed by the interface.
969
*/
970
mutex_lock(&mgr->lock);
971
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
972
uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
973
974
usage[hw_ip] = ns_to_ktime(ns);
975
}
976
977
idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
978
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
979
for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
980
struct amdgpu_ctx_entity *centity;
981
ktime_t spend;
982
983
centity = ctx->entities[hw_ip][i];
984
if (!centity)
985
continue;
986
spend = amdgpu_ctx_entity_time(ctx, centity);
987
usage[hw_ip] = ktime_add(usage[hw_ip], spend);
988
}
989
}
990
}
991
mutex_unlock(&mgr->lock);
992
}
993
994