CoCalc -- amdgpu

GitHub Repository: torvalds/linux
Path: blob/master/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
²⁶⁵¹⁷ views
1
/*
2
 * Copyright 2015 Advanced Micro Devices, Inc.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
 * OTHER DEALINGS IN THE SOFTWARE.
21
 *
22
 * Authors: monk liu <[email protected]>
23
 */
24

25
#include <drm/drm_auth.h>
26
#include <drm/drm_drv.h>
27
#include "amdgpu.h"
28
#include "amdgpu_sched.h"
29
#include "amdgpu_ras.h"
30
#include <linux/nospec.h>
31

32
#define to_amdgpu_ctx_entity(e)	\
33
	container_of((e), struct amdgpu_ctx_entity, entity)
34

35
const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
36
	[AMDGPU_HW_IP_GFX]	=	1,
37
	[AMDGPU_HW_IP_COMPUTE]	=	4,
38
	[AMDGPU_HW_IP_DMA]	=	2,
39
	[AMDGPU_HW_IP_UVD]	=	1,
40
	[AMDGPU_HW_IP_VCE]	=	1,
41
	[AMDGPU_HW_IP_UVD_ENC]	=	1,
42
	[AMDGPU_HW_IP_VCN_DEC]	=	1,
43
	[AMDGPU_HW_IP_VCN_ENC]	=	1,
44
	[AMDGPU_HW_IP_VCN_JPEG]	=	1,
45
	[AMDGPU_HW_IP_VPE]	=	1,
46
};
47

48
bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
49
{
50
	switch (ctx_prio) {
51
	case AMDGPU_CTX_PRIORITY_VERY_LOW:
52
	case AMDGPU_CTX_PRIORITY_LOW:
53
	case AMDGPU_CTX_PRIORITY_NORMAL:
54
	case AMDGPU_CTX_PRIORITY_HIGH:
55
	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
56
		return true;
57
	default:
58
	case AMDGPU_CTX_PRIORITY_UNSET:
59
		/* UNSET priority is not valid and we don't carry that
60
		 * around, but set it to NORMAL in the only place this
61
		 * function is called, amdgpu_ctx_ioctl().
62
		 */
63
		return false;
64
	}
65
}
66

67
static enum drm_sched_priority
68
amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
69
{
70
	switch (ctx_prio) {
71
	case AMDGPU_CTX_PRIORITY_UNSET:
72
		pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL");
73
		return DRM_SCHED_PRIORITY_NORMAL;
74

75
	case AMDGPU_CTX_PRIORITY_VERY_LOW:
76
		return DRM_SCHED_PRIORITY_LOW;
77

78
	case AMDGPU_CTX_PRIORITY_LOW:
79
		return DRM_SCHED_PRIORITY_LOW;
80

81
	case AMDGPU_CTX_PRIORITY_NORMAL:
82
		return DRM_SCHED_PRIORITY_NORMAL;
83

84
	case AMDGPU_CTX_PRIORITY_HIGH:
85
		return DRM_SCHED_PRIORITY_HIGH;
86

87
	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
88
		return DRM_SCHED_PRIORITY_HIGH;
89

90
	/* This should not happen as we sanitized userspace provided priority
91
	 * already, WARN if this happens.
92
	 */
93
	default:
94
		WARN(1, "Invalid context priority %d\n", ctx_prio);
95
		return DRM_SCHED_PRIORITY_NORMAL;
96
	}
97

98
}
99

100
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
101
				      int32_t priority)
102
{
103
	/* NORMAL and below are accessible by everyone */
104
	if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
105
		return 0;
106

107
	if (capable(CAP_SYS_NICE))
108
		return 0;
109

110
	if (drm_is_current_master(filp))
111
		return 0;
112

113
	return -EACCES;
114
}
115

116
static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
117
{
118
	switch (prio) {
119
	case AMDGPU_CTX_PRIORITY_HIGH:
120
	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
121
		return AMDGPU_GFX_PIPE_PRIO_HIGH;
122
	default:
123
		return AMDGPU_GFX_PIPE_PRIO_NORMAL;
124
	}
125
}
126

127
static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio)
128
{
129
	switch (prio) {
130
	case AMDGPU_CTX_PRIORITY_HIGH:
131
		return AMDGPU_RING_PRIO_1;
132
	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
133
		return AMDGPU_RING_PRIO_2;
134
	default:
135
		return AMDGPU_RING_PRIO_0;
136
	}
137
}
138

139
static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
140
{
141
	struct amdgpu_device *adev = ctx->mgr->adev;
142
	unsigned int hw_prio;
143
	int32_t ctx_prio;
144

145
	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
146
			ctx->init_priority : ctx->override_priority;
147

148
	switch (hw_ip) {
149
	case AMDGPU_HW_IP_GFX:
150
	case AMDGPU_HW_IP_COMPUTE:
151
		hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio);
152
		break;
153
	case AMDGPU_HW_IP_VCE:
154
	case AMDGPU_HW_IP_VCN_ENC:
155
		hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio);
156
		break;
157
	default:
158
		hw_prio = AMDGPU_RING_PRIO_DEFAULT;
159
		break;
160
	}
161

162
	hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
163
	if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
164
		hw_prio = AMDGPU_RING_PRIO_DEFAULT;
165

166
	return hw_prio;
167
}
168

169
/* Calculate the time spend on the hw */
170
static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
171
{
172
	struct drm_sched_fence *s_fence;
173

174
	if (!fence)
175
		return ns_to_ktime(0);
176

177
	/* When the fence is not even scheduled it can't have spend time */
178
	s_fence = to_drm_sched_fence(fence);
179
	if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
180
		return ns_to_ktime(0);
181

182
	/* When it is still running account how much already spend */
183
	if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
184
		return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
185

186
	return ktime_sub(s_fence->finished.timestamp,
187
			 s_fence->scheduled.timestamp);
188
}
189

190
static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
191
				      struct amdgpu_ctx_entity *centity)
192
{
193
	ktime_t res = ns_to_ktime(0);
194
	uint32_t i;
195

196
	spin_lock(&ctx->ring_lock);
197
	for (i = 0; i < amdgpu_sched_jobs; i++) {
198
		res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
199
	}
200
	spin_unlock(&ctx->ring_lock);
201
	return res;
202
}
203

204
static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
205
				  const u32 ring)
206
{
207
	struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
208
	struct amdgpu_device *adev = ctx->mgr->adev;
209
	struct amdgpu_ctx_entity *entity;
210
	enum drm_sched_priority drm_prio;
211
	unsigned int hw_prio, num_scheds;
212
	int32_t ctx_prio;
213
	int r;
214

215
	entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
216
			 GFP_KERNEL);
217
	if (!entity)
218
		return  -ENOMEM;
219

220
	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
221
			ctx->init_priority : ctx->override_priority;
222
	entity->hw_ip = hw_ip;
223
	entity->sequence = 1;
224
	hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
225
	drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
226

227
	hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
228

229
	if (!(adev)->xcp_mgr) {
230
		scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
231
		num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
232
	} else {
233
		struct amdgpu_fpriv *fpriv;
234

235
		fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr);
236
		r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv,
237
						&num_scheds, &scheds);
238
		if (r)
239
			goto cleanup_entity;
240
	}
241

242
	/* disable load balance if the hw engine retains context among dependent jobs */
243
	if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
244
	    hw_ip == AMDGPU_HW_IP_VCN_DEC ||
245
	    hw_ip == AMDGPU_HW_IP_UVD_ENC ||
246
	    hw_ip == AMDGPU_HW_IP_UVD) {
247
		sched = drm_sched_pick_best(scheds, num_scheds);
248
		scheds = &sched;
249
		num_scheds = 1;
250
	}
251

252
	r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds,
253
				  &ctx->guilty);
254
	if (r)
255
		goto error_free_entity;
256

257
	/* It's not an error if we fail to install the new entity */
258
	if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity))
259
		goto cleanup_entity;
260

261
	return 0;
262

263
cleanup_entity:
264
	drm_sched_entity_fini(&entity->entity);
265

266
error_free_entity:
267
	kfree(entity);
268

269
	return r;
270
}
271

272
static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev,
273
				  struct amdgpu_ctx_entity *entity)
274
{
275
	ktime_t res = ns_to_ktime(0);
276
	int i;
277

278
	if (!entity)
279
		return res;
280

281
	for (i = 0; i < amdgpu_sched_jobs; ++i) {
282
		res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
283
		dma_fence_put(entity->fences[i]);
284
	}
285

286
	amdgpu_xcp_release_sched(adev, entity);
287

288
	kfree(entity);
289
	return res;
290
}
291

292
static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
293
					u32 *stable_pstate)
294
{
295
	struct amdgpu_device *adev = ctx->mgr->adev;
296
	enum amd_dpm_forced_level current_level;
297

298
	current_level = amdgpu_dpm_get_performance_level(adev);
299

300
	switch (current_level) {
301
	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
302
		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD;
303
		break;
304
	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
305
		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
306
		break;
307
	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
308
		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
309
		break;
310
	case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
311
		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK;
312
		break;
313
	default:
314
		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
315
		break;
316
	}
317
	return 0;
318
}
319

320
static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
321
			   struct drm_file *filp, struct amdgpu_ctx *ctx)
322
{
323
	struct amdgpu_fpriv *fpriv = filp->driver_priv;
324
	u32 current_stable_pstate;
325
	int r;
326

327
	r = amdgpu_ctx_priority_permit(filp, priority);
328
	if (r)
329
		return r;
330

331
	memset(ctx, 0, sizeof(*ctx));
332

333
	kref_init(&ctx->refcount);
334
	ctx->mgr = mgr;
335
	spin_lock_init(&ctx->ring_lock);
336

337
	ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
338
	ctx->reset_counter_query = ctx->reset_counter;
339
	ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm);
340
	ctx->init_priority = priority;
341
	ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
342

343
	r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
344
	if (r)
345
		return r;
346

347
	if (mgr->adev->pm.stable_pstate_ctx)
348
		ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate;
349
	else
350
		ctx->stable_pstate = current_stable_pstate;
351

352
	ctx->ctx_mgr = &(fpriv->ctx_mgr);
353
	return 0;
354
}
355

356
static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
357
					u32 stable_pstate)
358
{
359
	struct amdgpu_device *adev = ctx->mgr->adev;
360
	enum amd_dpm_forced_level level;
361
	u32 current_stable_pstate;
362
	int r;
363

364
	mutex_lock(&adev->pm.stable_pstate_ctx_lock);
365
	if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
366
		r = -EBUSY;
367
		goto done;
368
	}
369

370
	r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
371
	if (r || (stable_pstate == current_stable_pstate))
372
		goto done;
373

374
	switch (stable_pstate) {
375
	case AMDGPU_CTX_STABLE_PSTATE_NONE:
376
		level = AMD_DPM_FORCED_LEVEL_AUTO;
377
		break;
378
	case AMDGPU_CTX_STABLE_PSTATE_STANDARD:
379
		level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD;
380
		break;
381
	case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK:
382
		level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK;
383
		break;
384
	case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK:
385
		level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK;
386
		break;
387
	case AMDGPU_CTX_STABLE_PSTATE_PEAK:
388
		level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
389
		break;
390
	default:
391
		r = -EINVAL;
392
		goto done;
393
	}
394

395
	r = amdgpu_dpm_force_performance_level(adev, level);
396

397
	if (level == AMD_DPM_FORCED_LEVEL_AUTO)
398
		adev->pm.stable_pstate_ctx = NULL;
399
	else
400
		adev->pm.stable_pstate_ctx = ctx;
401
done:
402
	mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
403

404
	return r;
405
}
406

407
static void amdgpu_ctx_fini(struct kref *ref)
408
{
409
	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
410
	struct amdgpu_ctx_mgr *mgr = ctx->mgr;
411
	struct amdgpu_device *adev = mgr->adev;
412
	unsigned i, j, idx;
413

414
	if (!adev)
415
		return;
416

417
	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
418
		for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
419
			ktime_t spend;
420

421
			spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]);
422
			atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
423
		}
424
	}
425

426
	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
427
		amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
428
		drm_dev_exit(idx);
429
	}
430

431
	kfree(ctx);
432
}
433

434
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
435
			  u32 ring, struct drm_sched_entity **entity)
436
{
437
	int r;
438
	struct drm_sched_entity *ctx_entity;
439

440
	if (hw_ip >= AMDGPU_HW_IP_NUM) {
441
		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
442
		return -EINVAL;
443
	}
444

445
	/* Right now all IPs have only one instance - multiple rings. */
446
	if (instance != 0) {
447
		DRM_DEBUG("invalid ip instance: %d\n", instance);
448
		return -EINVAL;
449
	}
450

451
	if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
452
		DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
453
		return -EINVAL;
454
	}
455

456
	if (ctx->entities[hw_ip][ring] == NULL) {
457
		r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
458
		if (r)
459
			return r;
460
	}
461

462
	ctx_entity = &ctx->entities[hw_ip][ring]->entity;
463
	r = drm_sched_entity_error(ctx_entity);
464
	if (r) {
465
		DRM_DEBUG("error entity %p\n", ctx_entity);
466
		return r;
467
	}
468

469
	*entity = ctx_entity;
470
	return 0;
471
}
472

473
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
474
			    struct amdgpu_fpriv *fpriv,
475
			    struct drm_file *filp,
476
			    int32_t priority,
477
			    uint32_t *id)
478
{
479
	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
480
	struct amdgpu_ctx *ctx;
481
	int r;
482

483
	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
484
	if (!ctx)
485
		return -ENOMEM;
486

487
	mutex_lock(&mgr->lock);
488
	r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
489
	if (r < 0) {
490
		mutex_unlock(&mgr->lock);
491
		kfree(ctx);
492
		return r;
493
	}
494

495
	*id = (uint32_t)r;
496
	r = amdgpu_ctx_init(mgr, priority, filp, ctx);
497
	if (r) {
498
		idr_remove(&mgr->ctx_handles, *id);
499
		*id = 0;
500
		kfree(ctx);
501
	}
502
	mutex_unlock(&mgr->lock);
503
	return r;
504
}
505

506
static void amdgpu_ctx_do_release(struct kref *ref)
507
{
508
	struct amdgpu_ctx *ctx;
509
	u32 i, j;
510

511
	ctx = container_of(ref, struct amdgpu_ctx, refcount);
512
	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
513
		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
514
			if (!ctx->entities[i][j])
515
				continue;
516

517
			drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
518
		}
519
	}
520

521
	amdgpu_ctx_fini(ref);
522
}
523

524
static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
525
{
526
	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
527
	struct amdgpu_ctx *ctx;
528

529
	mutex_lock(&mgr->lock);
530
	ctx = idr_remove(&mgr->ctx_handles, id);
531
	if (ctx)
532
		kref_put(&ctx->refcount, amdgpu_ctx_do_release);
533
	mutex_unlock(&mgr->lock);
534
	return ctx ? 0 : -EINVAL;
535
}
536

537
static int amdgpu_ctx_query(struct amdgpu_device *adev,
538
			    struct amdgpu_fpriv *fpriv, uint32_t id,
539
			    union drm_amdgpu_ctx_out *out)
540
{
541
	struct amdgpu_ctx *ctx;
542
	struct amdgpu_ctx_mgr *mgr;
543
	unsigned reset_counter;
544

545
	if (!fpriv)
546
		return -EINVAL;
547

548
	mgr = &fpriv->ctx_mgr;
549
	mutex_lock(&mgr->lock);
550
	ctx = idr_find(&mgr->ctx_handles, id);
551
	if (!ctx) {
552
		mutex_unlock(&mgr->lock);
553
		return -EINVAL;
554
	}
555

556
	/* TODO: these two are always zero */
557
	out->state.flags = 0x0;
558
	out->state.hangs = 0x0;
559

560
	/* determine if a GPU reset has occured since the last call */
561
	reset_counter = atomic_read(&adev->gpu_reset_counter);
562
	/* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
563
	if (ctx->reset_counter_query == reset_counter)
564
		out->state.reset_status = AMDGPU_CTX_NO_RESET;
565
	else
566
		out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
567
	ctx->reset_counter_query = reset_counter;
568

569
	mutex_unlock(&mgr->lock);
570
	return 0;
571
}
572

573
#define AMDGPU_RAS_COUNTE_DELAY_MS 3000
574

575
static int amdgpu_ctx_query2(struct amdgpu_device *adev,
576
			     struct amdgpu_fpriv *fpriv, uint32_t id,
577
			     union drm_amdgpu_ctx_out *out)
578
{
579
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
580
	struct amdgpu_ctx *ctx;
581
	struct amdgpu_ctx_mgr *mgr;
582

583
	if (!fpriv)
584
		return -EINVAL;
585

586
	mgr = &fpriv->ctx_mgr;
587
	mutex_lock(&mgr->lock);
588
	ctx = idr_find(&mgr->ctx_handles, id);
589
	if (!ctx) {
590
		mutex_unlock(&mgr->lock);
591
		return -EINVAL;
592
	}
593

594
	out->state.flags = 0x0;
595
	out->state.hangs = 0x0;
596

597
	if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
598
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
599

600
	if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm))
601
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
602

603
	if (atomic_read(&ctx->guilty))
604
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
605

606
	if (amdgpu_in_reset(adev))
607
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS;
608

609
	if (adev->ras_enabled && con) {
610
		/* Return the cached values in O(1),
611
		 * and schedule delayed work to cache
612
		 * new vaues.
613
		 */
614
		int ce_count, ue_count;
615

616
		ce_count = atomic_read(&con->ras_ce_count);
617
		ue_count = atomic_read(&con->ras_ue_count);
618

619
		if (ce_count != ctx->ras_counter_ce) {
620
			ctx->ras_counter_ce = ce_count;
621
			out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
622
		}
623

624
		if (ue_count != ctx->ras_counter_ue) {
625
			ctx->ras_counter_ue = ue_count;
626
			out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
627
		}
628

629
		schedule_delayed_work(&con->ras_counte_delay_work,
630
				      msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
631
	}
632

633
	mutex_unlock(&mgr->lock);
634
	return 0;
635
}
636

637
static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
638
				    struct amdgpu_fpriv *fpriv, uint32_t id,
639
				    bool set, u32 *stable_pstate)
640
{
641
	struct amdgpu_ctx *ctx;
642
	struct amdgpu_ctx_mgr *mgr;
643
	int r;
644

645
	if (!fpriv)
646
		return -EINVAL;
647

648
	mgr = &fpriv->ctx_mgr;
649
	mutex_lock(&mgr->lock);
650
	ctx = idr_find(&mgr->ctx_handles, id);
651
	if (!ctx) {
652
		mutex_unlock(&mgr->lock);
653
		return -EINVAL;
654
	}
655

656
	if (set)
657
		r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate);
658
	else
659
		r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
660

661
	mutex_unlock(&mgr->lock);
662
	return r;
663
}
664

665
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
666
		     struct drm_file *filp)
667
{
668
	int r;
669
	uint32_t id, stable_pstate;
670
	int32_t priority;
671

672
	union drm_amdgpu_ctx *args = data;
673
	struct amdgpu_device *adev = drm_to_adev(dev);
674
	struct amdgpu_fpriv *fpriv = filp->driver_priv;
675

676
	id = args->in.ctx_id;
677
	priority = args->in.priority;
678

679
	/* For backwards compatibility, we need to accept ioctls with garbage
680
	 * in the priority field. Garbage values in the priority field, result
681
	 * in the priority being set to NORMAL.
682
	 */
683
	if (!amdgpu_ctx_priority_is_valid(priority))
684
		priority = AMDGPU_CTX_PRIORITY_NORMAL;
685

686
	switch (args->in.op) {
687
	case AMDGPU_CTX_OP_ALLOC_CTX:
688
		if (args->in.flags)
689
			return -EINVAL;
690
		r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
691
		args->out.alloc.ctx_id = id;
692
		break;
693
	case AMDGPU_CTX_OP_FREE_CTX:
694
		if (args->in.flags)
695
			return -EINVAL;
696
		r = amdgpu_ctx_free(fpriv, id);
697
		break;
698
	case AMDGPU_CTX_OP_QUERY_STATE:
699
		if (args->in.flags)
700
			return -EINVAL;
701
		r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
702
		break;
703
	case AMDGPU_CTX_OP_QUERY_STATE2:
704
		if (args->in.flags)
705
			return -EINVAL;
706
		r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
707
		break;
708
	case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
709
		if (args->in.flags)
710
			return -EINVAL;
711
		r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate);
712
		if (!r)
713
			args->out.pstate.flags = stable_pstate;
714
		break;
715
	case AMDGPU_CTX_OP_SET_STABLE_PSTATE:
716
		if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK)
717
			return -EINVAL;
718
		stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK;
719
		if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK)
720
			return -EINVAL;
721
		r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate);
722
		break;
723
	default:
724
		return -EINVAL;
725
	}
726

727
	return r;
728
}
729

730
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
731
{
732
	struct amdgpu_ctx *ctx;
733
	struct amdgpu_ctx_mgr *mgr;
734

735
	if (!fpriv)
736
		return NULL;
737

738
	mgr = &fpriv->ctx_mgr;
739

740
	mutex_lock(&mgr->lock);
741
	ctx = idr_find(&mgr->ctx_handles, id);
742
	if (ctx)
743
		kref_get(&ctx->refcount);
744
	mutex_unlock(&mgr->lock);
745
	return ctx;
746
}
747

748
int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
749
{
750
	if (ctx == NULL)
751
		return -EINVAL;
752

753
	kref_put(&ctx->refcount, amdgpu_ctx_do_release);
754
	return 0;
755
}
756

757
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
758
			      struct drm_sched_entity *entity,
759
			      struct dma_fence *fence)
760
{
761
	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
762
	uint64_t seq = centity->sequence;
763
	struct dma_fence *other = NULL;
764
	unsigned idx = 0;
765

766
	idx = seq & (amdgpu_sched_jobs - 1);
767
	other = centity->fences[idx];
768
	WARN_ON(other && !dma_fence_is_signaled(other));
769

770
	dma_fence_get(fence);
771

772
	spin_lock(&ctx->ring_lock);
773
	centity->fences[idx] = fence;
774
	centity->sequence++;
775
	spin_unlock(&ctx->ring_lock);
776

777
	atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
778
		     &ctx->mgr->time_spend[centity->hw_ip]);
779

780
	dma_fence_put(other);
781
	return seq;
782
}
783

784
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
785
				       struct drm_sched_entity *entity,
786
				       uint64_t seq)
787
{
788
	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
789
	struct dma_fence *fence;
790

791
	spin_lock(&ctx->ring_lock);
792

793
	if (seq == ~0ull)
794
		seq = centity->sequence - 1;
795

796
	if (seq >= centity->sequence) {
797
		spin_unlock(&ctx->ring_lock);
798
		return ERR_PTR(-EINVAL);
799
	}
800

801

802
	if (seq + amdgpu_sched_jobs < centity->sequence) {
803
		spin_unlock(&ctx->ring_lock);
804
		return NULL;
805
	}
806

807
	fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
808
	spin_unlock(&ctx->ring_lock);
809

810
	return fence;
811
}
812

813
static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
814
					   struct amdgpu_ctx_entity *aentity,
815
					   int hw_ip,
816
					   int32_t priority)
817
{
818
	struct amdgpu_device *adev = ctx->mgr->adev;
819
	unsigned int hw_prio;
820
	struct drm_gpu_scheduler **scheds = NULL;
821
	unsigned num_scheds;
822

823
	/* set sw priority */
824
	drm_sched_entity_set_priority(&aentity->entity,
825
				      amdgpu_ctx_to_drm_sched_prio(priority));
826

827
	/* set hw priority */
828
	if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) {
829
		hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
830
		hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
831
		scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
832
		num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
833
		drm_sched_entity_modify_sched(&aentity->entity, scheds,
834
					      num_scheds);
835
	}
836
}
837

838
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
839
				  int32_t priority)
840
{
841
	int32_t ctx_prio;
842
	unsigned i, j;
843

844
	ctx->override_priority = priority;
845

846
	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
847
			ctx->init_priority : ctx->override_priority;
848
	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
849
		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
850
			if (!ctx->entities[i][j])
851
				continue;
852

853
			amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
854
						       i, ctx_prio);
855
		}
856
	}
857
}
858

859
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
860
			       struct drm_sched_entity *entity)
861
{
862
	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
863
	struct dma_fence *other;
864
	unsigned idx;
865
	long r;
866

867
	spin_lock(&ctx->ring_lock);
868
	idx = centity->sequence & (amdgpu_sched_jobs - 1);
869
	other = dma_fence_get(centity->fences[idx]);
870
	spin_unlock(&ctx->ring_lock);
871

872
	if (!other)
873
		return 0;
874

875
	r = dma_fence_wait(other, true);
876
	if (r < 0 && r != -ERESTARTSYS)
877
		DRM_ERROR("Error (%ld) waiting for fence!\n", r);
878

879
	dma_fence_put(other);
880
	return r;
881
}
882

883
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
884
			 struct amdgpu_device *adev)
885
{
886
	unsigned int i;
887

888
	mgr->adev = adev;
889
	mutex_init(&mgr->lock);
890
	idr_init_base(&mgr->ctx_handles, 1);
891

892
	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
893
		atomic64_set(&mgr->time_spend[i], 0);
894
}
895

896
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
897
{
898
	struct amdgpu_ctx *ctx;
899
	struct idr *idp;
900
	uint32_t id, i, j;
901

902
	idp = &mgr->ctx_handles;
903

904
	mutex_lock(&mgr->lock);
905
	idr_for_each_entry(idp, ctx, id) {
906
		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
907
			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
908
				struct drm_sched_entity *entity;
909

910
				if (!ctx->entities[i][j])
911
					continue;
912

913
				entity = &ctx->entities[i][j]->entity;
914
				timeout = drm_sched_entity_flush(entity, timeout);
915
			}
916
		}
917
	}
918
	mutex_unlock(&mgr->lock);
919
	return timeout;
920
}
921

922
static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
923
{
924
	struct amdgpu_ctx *ctx;
925
	struct idr *idp;
926
	uint32_t id, i, j;
927

928
	idp = &mgr->ctx_handles;
929

930
	idr_for_each_entry(idp, ctx, id) {
931
		if (kref_read(&ctx->refcount) != 1) {
932
			DRM_ERROR("ctx %p is still alive\n", ctx);
933
			continue;
934
		}
935

936
		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
937
			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
938
				struct drm_sched_entity *entity;
939

940
				if (!ctx->entities[i][j])
941
					continue;
942

943
				entity = &ctx->entities[i][j]->entity;
944
				drm_sched_entity_fini(entity);
945
			}
946
		}
947
		kref_put(&ctx->refcount, amdgpu_ctx_fini);
948
	}
949
}
950

951
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
952
{
953
	amdgpu_ctx_mgr_entity_fini(mgr);
954
	idr_destroy(&mgr->ctx_handles);
955
	mutex_destroy(&mgr->lock);
956
}
957

958
void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
959
			  ktime_t usage[AMDGPU_HW_IP_NUM])
960
{
961
	struct amdgpu_ctx *ctx;
962
	unsigned int hw_ip, i;
963
	uint32_t id;
964

965
	/*
966
	 * This is a little bit racy because it can be that a ctx or a fence are
967
	 * destroyed just in the moment we try to account them. But that is ok
968
	 * since exactly that case is explicitely allowed by the interface.
969
	 */
970
	mutex_lock(&mgr->lock);
971
	for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
972
		uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
973

974
		usage[hw_ip] = ns_to_ktime(ns);
975
	}
976

977
	idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
978
		for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
979
			for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
980
				struct amdgpu_ctx_entity *centity;
981
				ktime_t spend;
982

983
				centity = ctx->entities[hw_ip][i];
984
				if (!centity)
985
					continue;
986
				spend = amdgpu_ctx_entity_time(ctx, centity);
987
				usage[hw_ip] = ktime_add(usage[hw_ip], spend);
988
			}
989
		}
990
	}
991
	mutex_unlock(&mgr->lock);
992
}
993

994
Product

Resources

Company