Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
26517 views
1
/*
2
* Copyright 2019 Advanced Micro Devices, Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
* OTHER DEALINGS IN THE SOFTWARE.
21
*/
22
#include <linux/module.h>
23
#include <linux/uaccess.h>
24
#include <linux/firmware.h>
25
#include "amdgpu.h"
26
#include "amdgpu_amdkfd.h"
27
#include "amdgpu_amdkfd_arcturus.h"
28
#include "amdgpu_reset.h"
29
#include "sdma0/sdma0_4_2_2_offset.h"
30
#include "sdma0/sdma0_4_2_2_sh_mask.h"
31
#include "sdma1/sdma1_4_2_2_offset.h"
32
#include "sdma1/sdma1_4_2_2_sh_mask.h"
33
#include "sdma2/sdma2_4_2_2_offset.h"
34
#include "sdma2/sdma2_4_2_2_sh_mask.h"
35
#include "sdma3/sdma3_4_2_2_offset.h"
36
#include "sdma3/sdma3_4_2_2_sh_mask.h"
37
#include "sdma4/sdma4_4_2_2_offset.h"
38
#include "sdma4/sdma4_4_2_2_sh_mask.h"
39
#include "sdma5/sdma5_4_2_2_offset.h"
40
#include "sdma5/sdma5_4_2_2_sh_mask.h"
41
#include "sdma6/sdma6_4_2_2_offset.h"
42
#include "sdma6/sdma6_4_2_2_sh_mask.h"
43
#include "sdma7/sdma7_4_2_2_offset.h"
44
#include "sdma7/sdma7_4_2_2_sh_mask.h"
45
#include "v9_structs.h"
46
#include "soc15.h"
47
#include "soc15d.h"
48
#include "amdgpu_amdkfd_gfx_v9.h"
49
#include "gfxhub_v1_0.h"
50
#include "mmhub_v9_4.h"
51
#include "gc/gc_9_0_offset.h"
52
#include "gc/gc_9_0_sh_mask.h"
53
54
#define HQD_N_REGS 56
55
#define DUMP_REG(addr) do { \
56
if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
57
break; \
58
(*dump)[i][0] = (addr) << 2; \
59
(*dump)[i++][1] = RREG32(addr); \
60
} while (0)
61
62
static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
63
{
64
return (struct v9_sdma_mqd *)mqd;
65
}
66
67
static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
68
unsigned int engine_id,
69
unsigned int queue_id)
70
{
71
uint32_t sdma_engine_reg_base = 0;
72
uint32_t sdma_rlc_reg_offset;
73
74
switch (engine_id) {
75
default:
76
dev_warn(adev->dev,
77
"Invalid sdma engine id (%d), using engine id 0\n",
78
engine_id);
79
fallthrough;
80
case 0:
81
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
82
mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
83
break;
84
case 1:
85
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
86
mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
87
break;
88
case 2:
89
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
90
mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
91
break;
92
case 3:
93
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
94
mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
95
break;
96
case 4:
97
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
98
mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
99
break;
100
case 5:
101
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
102
mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
103
break;
104
case 6:
105
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
106
mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
107
break;
108
case 7:
109
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
110
mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
111
break;
112
}
113
114
sdma_rlc_reg_offset = sdma_engine_reg_base
115
+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
116
117
pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
118
queue_id, sdma_rlc_reg_offset);
119
120
return sdma_rlc_reg_offset;
121
}
122
123
int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
124
uint32_t __user *wptr, struct mm_struct *mm)
125
{
126
struct v9_sdma_mqd *m;
127
uint32_t sdma_rlc_reg_offset;
128
unsigned long end_jiffies;
129
uint32_t data;
130
uint64_t data64;
131
uint64_t __user *wptr64 = (uint64_t __user *)wptr;
132
133
m = get_sdma_mqd(mqd);
134
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
135
m->sdma_queue_id);
136
137
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
138
m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
139
140
end_jiffies = msecs_to_jiffies(2000) + jiffies;
141
while (true) {
142
data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
143
if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
144
break;
145
if (time_after(jiffies, end_jiffies)) {
146
pr_err("SDMA RLC not idle in %s\n", __func__);
147
return -ETIME;
148
}
149
usleep_range(500, 1000);
150
}
151
152
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
153
m->sdmax_rlcx_doorbell_offset);
154
155
data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
156
ENABLE, 1);
157
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
158
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
159
m->sdmax_rlcx_rb_rptr);
160
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
161
m->sdmax_rlcx_rb_rptr_hi);
162
163
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
164
if (read_user_wptr(mm, wptr64, data64)) {
165
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
166
lower_32_bits(data64));
167
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
168
upper_32_bits(data64));
169
} else {
170
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
171
m->sdmax_rlcx_rb_rptr);
172
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
173
m->sdmax_rlcx_rb_rptr_hi);
174
}
175
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
176
177
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
178
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
179
m->sdmax_rlcx_rb_base_hi);
180
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
181
m->sdmax_rlcx_rb_rptr_addr_lo);
182
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
183
m->sdmax_rlcx_rb_rptr_addr_hi);
184
185
data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
186
RB_ENABLE, 1);
187
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
188
189
return 0;
190
}
191
192
int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
193
uint32_t engine_id, uint32_t queue_id,
194
uint32_t (**dump)[2], uint32_t *n_regs)
195
{
196
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
197
engine_id, queue_id);
198
uint32_t i = 0, reg;
199
#undef HQD_N_REGS
200
#define HQD_N_REGS (19+6+7+10)
201
202
*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
203
if (*dump == NULL)
204
return -ENOMEM;
205
206
for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
207
DUMP_REG(sdma_rlc_reg_offset + reg);
208
for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
209
DUMP_REG(sdma_rlc_reg_offset + reg);
210
for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
211
reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
212
DUMP_REG(sdma_rlc_reg_offset + reg);
213
for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
214
reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
215
DUMP_REG(sdma_rlc_reg_offset + reg);
216
217
WARN_ON_ONCE(i != HQD_N_REGS);
218
*n_regs = i;
219
220
return 0;
221
}
222
223
bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev,
224
void *mqd)
225
{
226
struct v9_sdma_mqd *m;
227
uint32_t sdma_rlc_reg_offset;
228
uint32_t sdma_rlc_rb_cntl;
229
230
m = get_sdma_mqd(mqd);
231
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
232
m->sdma_queue_id);
233
234
sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
235
236
if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
237
return true;
238
239
return false;
240
}
241
242
int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
243
unsigned int utimeout)
244
{
245
struct v9_sdma_mqd *m;
246
uint32_t sdma_rlc_reg_offset;
247
uint32_t temp;
248
unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
249
250
m = get_sdma_mqd(mqd);
251
sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
252
m->sdma_queue_id);
253
254
temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
255
temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
256
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
257
258
while (true) {
259
temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
260
if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
261
break;
262
if (time_after(jiffies, end_jiffies)) {
263
pr_err("SDMA RLC not idle in %s\n", __func__);
264
return -ETIME;
265
}
266
usleep_range(500, 1000);
267
}
268
269
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
270
WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
271
RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
272
SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
273
274
m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
275
m->sdmax_rlcx_rb_rptr_hi =
276
RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
277
278
return 0;
279
}
280
281
/*
282
* Helper used to suspend/resume gfx pipe for image post process work to set
283
* barrier behaviour.
284
*/
285
static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool suspend)
286
{
287
int i, r = 0;
288
289
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
290
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
291
292
if (!amdgpu_ring_sched_ready(ring))
293
continue;
294
295
/* stop secheduler and drain ring. */
296
if (suspend) {
297
drm_sched_stop(&ring->sched, NULL);
298
r = amdgpu_fence_wait_empty(ring);
299
if (r)
300
goto out;
301
} else {
302
drm_sched_start(&ring->sched, 0);
303
}
304
}
305
306
out:
307
/* return on resume or failure to drain rings. */
308
if (!suspend || r)
309
return r;
310
311
return amdgpu_device_ip_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GFX);
312
}
313
314
static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_waitcnt)
315
{
316
uint32_t data;
317
318
WRITE_ONCE(adev->barrier_has_auto_waitcnt, enable_waitcnt);
319
320
if (!down_read_trylock(&adev->reset_domain->sem))
321
return;
322
323
amdgpu_amdkfd_suspend(adev, true);
324
325
if (suspend_resume_compute_scheduler(adev, true))
326
goto out;
327
328
data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG));
329
data = REG_SET_FIELD(data, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
330
!enable_waitcnt);
331
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG), data);
332
333
out:
334
suspend_resume_compute_scheduler(adev, false);
335
336
amdgpu_amdkfd_resume(adev, true);
337
338
up_read(&adev->reset_domain->sem);
339
}
340
341
/*
342
* restore_dbg_registers is ignored here but is a general interface requirement
343
* for devices that support GFXOFF and where the RLC save/restore list
344
* does not support hw registers for debugging i.e. the driver has to manually
345
* initialize the debug mode registers after it has disabled GFX off during the
346
* debug session.
347
*/
348
static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
349
bool restore_dbg_registers,
350
uint32_t vmid)
351
{
352
mutex_lock(&adev->grbm_idx_mutex);
353
354
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
355
356
set_barrier_auto_waitcnt(adev, true);
357
358
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
359
360
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
361
362
mutex_unlock(&adev->grbm_idx_mutex);
363
364
return 0;
365
}
366
367
/*
368
* keep_trap_enabled is ignored here but is a general interface requirement
369
* for devices that support multi-process debugging where the performance
370
* overhead from trap temporary setup needs to be bypassed when the debug
371
* session has ended.
372
*/
373
static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
374
bool keep_trap_enabled,
375
uint32_t vmid)
376
{
377
378
mutex_lock(&adev->grbm_idx_mutex);
379
380
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
381
382
set_barrier_auto_waitcnt(adev, false);
383
384
WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
385
386
kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
387
388
mutex_unlock(&adev->grbm_idx_mutex);
389
390
return 0;
391
}
392
const struct kfd2kgd_calls arcturus_kfd2kgd = {
393
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
394
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
395
.init_interrupts = kgd_gfx_v9_init_interrupts,
396
.hqd_load = kgd_gfx_v9_hqd_load,
397
.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
398
.hqd_sdma_load = kgd_arcturus_hqd_sdma_load,
399
.hqd_dump = kgd_gfx_v9_hqd_dump,
400
.hqd_sdma_dump = kgd_arcturus_hqd_sdma_dump,
401
.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
402
.hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
403
.hqd_destroy = kgd_gfx_v9_hqd_destroy,
404
.hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
405
.wave_control_execute = kgd_gfx_v9_wave_control_execute,
406
.get_atc_vmid_pasid_mapping_info =
407
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
408
.set_vm_context_page_table_base =
409
kgd_gfx_v9_set_vm_context_page_table_base,
410
.enable_debug_trap = kgd_arcturus_enable_debug_trap,
411
.disable_debug_trap = kgd_arcturus_disable_debug_trap,
412
.validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
413
.set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
414
.set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
415
.set_address_watch = kgd_gfx_v9_set_address_watch,
416
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
417
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
418
.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
419
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
420
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
421
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
422
.hqd_reset = kgd_gfx_v9_hqd_reset,
423
.hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
424
};
425
426