Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/accel/habanalabs/goya/goya.c
26439 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
/*
4
* Copyright 2016-2022 HabanaLabs, Ltd.
5
* All Rights Reserved.
6
*/
7
8
#include "goyaP.h"
9
#include "../include/hw_ip/mmu/mmu_general.h"
10
#include "../include/hw_ip/mmu/mmu_v1_0.h"
11
#include "../include/goya/asic_reg/goya_masks.h"
12
#include "../include/goya/goya_reg_map.h"
13
14
#include <linux/pci.h>
15
#include <linux/hwmon.h>
16
#include <linux/iommu.h>
17
#include <linux/seq_file.h>
18
19
/*
20
* GOYA security scheme:
21
*
22
* 1. Host is protected by:
23
* - Range registers (When MMU is enabled, DMA RR does NOT protect host)
24
* - MMU
25
*
26
* 2. DRAM is protected by:
27
* - Range registers (protect the first 512MB)
28
* - MMU (isolation between users)
29
*
30
* 3. Configuration is protected by:
31
* - Range registers
32
* - Protection bits
33
*
34
* When MMU is disabled:
35
*
36
* QMAN DMA: PQ, CQ, CP, DMA are secured.
37
* PQ, CB and the data are on the host.
38
*
39
* QMAN TPC/MME:
40
* PQ, CQ and CP are not secured.
41
* PQ, CB and the data are on the SRAM/DRAM.
42
*
43
* Since QMAN DMA is secured, the driver is parsing the DMA CB:
44
* - checks DMA pointer
45
* - WREG, MSG_PROT are not allowed.
46
* - MSG_LONG/SHORT are allowed.
47
*
48
* A read/write transaction by the QMAN to a protected area will succeed if
49
* and only if the QMAN's CP is secured and MSG_PROT is used
50
*
51
*
52
* When MMU is enabled:
53
*
54
* QMAN DMA: PQ, CQ and CP are secured.
55
* MMU is set to bypass on the Secure props register of the QMAN.
56
* The reasons we don't enable MMU for PQ, CQ and CP are:
57
* - PQ entry is in kernel address space and the driver doesn't map it.
58
* - CP writes to MSIX register and to kernel address space (completion
59
* queue).
60
*
61
* DMA is not secured but because CP is secured, the driver still needs to parse
62
* the CB, but doesn't need to check the DMA addresses.
63
*
64
* For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
65
* the driver doesn't map memory in MMU.
66
*
67
* QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
68
*
69
* DMA RR does NOT protect host because DMA is not secured
70
*
71
*/
72
73
#define GOYA_BOOT_FIT_FILE "habanalabs/goya/goya-boot-fit.itb"
74
#define GOYA_LINUX_FW_FILE "habanalabs/goya/goya-fit.itb"
75
76
#define GOYA_MMU_REGS_NUM 63
77
78
#define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
79
80
#define GOYA_RESET_TIMEOUT_MSEC 500 /* 500ms */
81
#define GOYA_PLDM_RESET_TIMEOUT_MSEC 20000 /* 20s */
82
#define GOYA_RESET_WAIT_MSEC 1 /* 1ms */
83
#define GOYA_CPU_RESET_WAIT_MSEC 100 /* 100ms */
84
#define GOYA_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
85
#define GOYA_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
86
#define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
87
#define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
88
#define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
89
#define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
90
#define GOYA_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
91
92
#define GOYA_QMAN0_FENCE_VAL 0xD169B243
93
94
#define GOYA_MAX_STRING_LEN 20
95
96
#define GOYA_CB_POOL_CB_CNT 512
97
#define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */
98
99
#define IS_QM_IDLE(engine, qm_glbl_sts0) \
100
(((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
101
#define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0)
102
#define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0)
103
#define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0)
104
105
#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
106
(((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
107
engine##_CMDQ_IDLE_MASK)
108
#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
109
IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
110
#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
111
IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
112
113
#define IS_DMA_IDLE(dma_core_sts0) \
114
!((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
115
116
#define IS_TPC_IDLE(tpc_cfg_sts) \
117
(((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
118
119
#define IS_MME_IDLE(mme_arch_sts) \
120
(((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
121
122
static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
123
"goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
124
"goya cq 4", "goya cpu eq"
125
};
126
127
static u16 goya_packet_sizes[MAX_PACKET_ID] = {
128
[PACKET_WREG_32] = sizeof(struct packet_wreg32),
129
[PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
130
[PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
131
[PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
132
[PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
133
[PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
134
[PACKET_FENCE] = sizeof(struct packet_fence),
135
[PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
136
[PACKET_NOP] = sizeof(struct packet_nop),
137
[PACKET_STOP] = sizeof(struct packet_stop)
138
};
139
140
static inline bool validate_packet_id(enum packet_id id)
141
{
142
switch (id) {
143
case PACKET_WREG_32:
144
case PACKET_WREG_BULK:
145
case PACKET_MSG_LONG:
146
case PACKET_MSG_SHORT:
147
case PACKET_CP_DMA:
148
case PACKET_MSG_PROT:
149
case PACKET_FENCE:
150
case PACKET_LIN_DMA:
151
case PACKET_NOP:
152
case PACKET_STOP:
153
return true;
154
default:
155
return false;
156
}
157
}
158
159
static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
160
mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
161
mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
162
mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
163
mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
164
mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
165
mmTPC0_QM_GLBL_SECURE_PROPS,
166
mmTPC0_QM_GLBL_NON_SECURE_PROPS,
167
mmTPC0_CMDQ_GLBL_SECURE_PROPS,
168
mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
169
mmTPC0_CFG_ARUSER,
170
mmTPC0_CFG_AWUSER,
171
mmTPC1_QM_GLBL_SECURE_PROPS,
172
mmTPC1_QM_GLBL_NON_SECURE_PROPS,
173
mmTPC1_CMDQ_GLBL_SECURE_PROPS,
174
mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
175
mmTPC1_CFG_ARUSER,
176
mmTPC1_CFG_AWUSER,
177
mmTPC2_QM_GLBL_SECURE_PROPS,
178
mmTPC2_QM_GLBL_NON_SECURE_PROPS,
179
mmTPC2_CMDQ_GLBL_SECURE_PROPS,
180
mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
181
mmTPC2_CFG_ARUSER,
182
mmTPC2_CFG_AWUSER,
183
mmTPC3_QM_GLBL_SECURE_PROPS,
184
mmTPC3_QM_GLBL_NON_SECURE_PROPS,
185
mmTPC3_CMDQ_GLBL_SECURE_PROPS,
186
mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
187
mmTPC3_CFG_ARUSER,
188
mmTPC3_CFG_AWUSER,
189
mmTPC4_QM_GLBL_SECURE_PROPS,
190
mmTPC4_QM_GLBL_NON_SECURE_PROPS,
191
mmTPC4_CMDQ_GLBL_SECURE_PROPS,
192
mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
193
mmTPC4_CFG_ARUSER,
194
mmTPC4_CFG_AWUSER,
195
mmTPC5_QM_GLBL_SECURE_PROPS,
196
mmTPC5_QM_GLBL_NON_SECURE_PROPS,
197
mmTPC5_CMDQ_GLBL_SECURE_PROPS,
198
mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
199
mmTPC5_CFG_ARUSER,
200
mmTPC5_CFG_AWUSER,
201
mmTPC6_QM_GLBL_SECURE_PROPS,
202
mmTPC6_QM_GLBL_NON_SECURE_PROPS,
203
mmTPC6_CMDQ_GLBL_SECURE_PROPS,
204
mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
205
mmTPC6_CFG_ARUSER,
206
mmTPC6_CFG_AWUSER,
207
mmTPC7_QM_GLBL_SECURE_PROPS,
208
mmTPC7_QM_GLBL_NON_SECURE_PROPS,
209
mmTPC7_CMDQ_GLBL_SECURE_PROPS,
210
mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
211
mmTPC7_CFG_ARUSER,
212
mmTPC7_CFG_AWUSER,
213
mmMME_QM_GLBL_SECURE_PROPS,
214
mmMME_QM_GLBL_NON_SECURE_PROPS,
215
mmMME_CMDQ_GLBL_SECURE_PROPS,
216
mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
217
mmMME_SBA_CONTROL_DATA,
218
mmMME_SBB_CONTROL_DATA,
219
mmMME_SBC_CONTROL_DATA,
220
mmMME_WBC_CONTROL_DATA,
221
mmPCIE_WRAP_PSOC_ARUSER,
222
mmPCIE_WRAP_PSOC_AWUSER
223
};
224
225
static u32 goya_all_events[] = {
226
GOYA_ASYNC_EVENT_ID_PCIE_IF,
227
GOYA_ASYNC_EVENT_ID_TPC0_ECC,
228
GOYA_ASYNC_EVENT_ID_TPC1_ECC,
229
GOYA_ASYNC_EVENT_ID_TPC2_ECC,
230
GOYA_ASYNC_EVENT_ID_TPC3_ECC,
231
GOYA_ASYNC_EVENT_ID_TPC4_ECC,
232
GOYA_ASYNC_EVENT_ID_TPC5_ECC,
233
GOYA_ASYNC_EVENT_ID_TPC6_ECC,
234
GOYA_ASYNC_EVENT_ID_TPC7_ECC,
235
GOYA_ASYNC_EVENT_ID_MME_ECC,
236
GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
237
GOYA_ASYNC_EVENT_ID_MMU_ECC,
238
GOYA_ASYNC_EVENT_ID_DMA_MACRO,
239
GOYA_ASYNC_EVENT_ID_DMA_ECC,
240
GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
241
GOYA_ASYNC_EVENT_ID_PSOC_MEM,
242
GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
243
GOYA_ASYNC_EVENT_ID_SRAM0,
244
GOYA_ASYNC_EVENT_ID_SRAM1,
245
GOYA_ASYNC_EVENT_ID_SRAM2,
246
GOYA_ASYNC_EVENT_ID_SRAM3,
247
GOYA_ASYNC_EVENT_ID_SRAM4,
248
GOYA_ASYNC_EVENT_ID_SRAM5,
249
GOYA_ASYNC_EVENT_ID_SRAM6,
250
GOYA_ASYNC_EVENT_ID_SRAM7,
251
GOYA_ASYNC_EVENT_ID_SRAM8,
252
GOYA_ASYNC_EVENT_ID_SRAM9,
253
GOYA_ASYNC_EVENT_ID_SRAM10,
254
GOYA_ASYNC_EVENT_ID_SRAM11,
255
GOYA_ASYNC_EVENT_ID_SRAM12,
256
GOYA_ASYNC_EVENT_ID_SRAM13,
257
GOYA_ASYNC_EVENT_ID_SRAM14,
258
GOYA_ASYNC_EVENT_ID_SRAM15,
259
GOYA_ASYNC_EVENT_ID_SRAM16,
260
GOYA_ASYNC_EVENT_ID_SRAM17,
261
GOYA_ASYNC_EVENT_ID_SRAM18,
262
GOYA_ASYNC_EVENT_ID_SRAM19,
263
GOYA_ASYNC_EVENT_ID_SRAM20,
264
GOYA_ASYNC_EVENT_ID_SRAM21,
265
GOYA_ASYNC_EVENT_ID_SRAM22,
266
GOYA_ASYNC_EVENT_ID_SRAM23,
267
GOYA_ASYNC_EVENT_ID_SRAM24,
268
GOYA_ASYNC_EVENT_ID_SRAM25,
269
GOYA_ASYNC_EVENT_ID_SRAM26,
270
GOYA_ASYNC_EVENT_ID_SRAM27,
271
GOYA_ASYNC_EVENT_ID_SRAM28,
272
GOYA_ASYNC_EVENT_ID_SRAM29,
273
GOYA_ASYNC_EVENT_ID_GIC500,
274
GOYA_ASYNC_EVENT_ID_PLL0,
275
GOYA_ASYNC_EVENT_ID_PLL1,
276
GOYA_ASYNC_EVENT_ID_PLL3,
277
GOYA_ASYNC_EVENT_ID_PLL4,
278
GOYA_ASYNC_EVENT_ID_PLL5,
279
GOYA_ASYNC_EVENT_ID_PLL6,
280
GOYA_ASYNC_EVENT_ID_AXI_ECC,
281
GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
282
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
283
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
284
GOYA_ASYNC_EVENT_ID_PCIE_DEC,
285
GOYA_ASYNC_EVENT_ID_TPC0_DEC,
286
GOYA_ASYNC_EVENT_ID_TPC1_DEC,
287
GOYA_ASYNC_EVENT_ID_TPC2_DEC,
288
GOYA_ASYNC_EVENT_ID_TPC3_DEC,
289
GOYA_ASYNC_EVENT_ID_TPC4_DEC,
290
GOYA_ASYNC_EVENT_ID_TPC5_DEC,
291
GOYA_ASYNC_EVENT_ID_TPC6_DEC,
292
GOYA_ASYNC_EVENT_ID_TPC7_DEC,
293
GOYA_ASYNC_EVENT_ID_MME_WACS,
294
GOYA_ASYNC_EVENT_ID_MME_WACSD,
295
GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
296
GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
297
GOYA_ASYNC_EVENT_ID_PSOC,
298
GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
299
GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
300
GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
301
GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
302
GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
303
GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
304
GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
305
GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
306
GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
307
GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
308
GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
309
GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
310
GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
311
GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
312
GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
313
GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
314
GOYA_ASYNC_EVENT_ID_TPC0_QM,
315
GOYA_ASYNC_EVENT_ID_TPC1_QM,
316
GOYA_ASYNC_EVENT_ID_TPC2_QM,
317
GOYA_ASYNC_EVENT_ID_TPC3_QM,
318
GOYA_ASYNC_EVENT_ID_TPC4_QM,
319
GOYA_ASYNC_EVENT_ID_TPC5_QM,
320
GOYA_ASYNC_EVENT_ID_TPC6_QM,
321
GOYA_ASYNC_EVENT_ID_TPC7_QM,
322
GOYA_ASYNC_EVENT_ID_MME_QM,
323
GOYA_ASYNC_EVENT_ID_MME_CMDQ,
324
GOYA_ASYNC_EVENT_ID_DMA0_QM,
325
GOYA_ASYNC_EVENT_ID_DMA1_QM,
326
GOYA_ASYNC_EVENT_ID_DMA2_QM,
327
GOYA_ASYNC_EVENT_ID_DMA3_QM,
328
GOYA_ASYNC_EVENT_ID_DMA4_QM,
329
GOYA_ASYNC_EVENT_ID_DMA0_CH,
330
GOYA_ASYNC_EVENT_ID_DMA1_CH,
331
GOYA_ASYNC_EVENT_ID_DMA2_CH,
332
GOYA_ASYNC_EVENT_ID_DMA3_CH,
333
GOYA_ASYNC_EVENT_ID_DMA4_CH,
334
GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
335
GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
336
GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
337
GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
338
GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
339
GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
340
GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
341
GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
342
GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
343
GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
344
GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
345
GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
346
GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
347
GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
348
GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
349
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
350
GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
351
};
352
353
static s64 goya_state_dump_specs_props[SP_MAX] = {0};
354
355
static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
356
static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
357
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
358
static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
359
360
int goya_set_fixed_properties(struct hl_device *hdev)
361
{
362
struct asic_fixed_properties *prop = &hdev->asic_prop;
363
int i;
364
365
prop->max_queues = GOYA_QUEUE_ID_SIZE;
366
prop->hw_queues_props = kcalloc(prop->max_queues,
367
sizeof(struct hw_queue_properties),
368
GFP_KERNEL);
369
370
if (!prop->hw_queues_props)
371
return -ENOMEM;
372
373
for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
374
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
375
prop->hw_queues_props[i].driver_only = 0;
376
prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
377
}
378
379
for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
380
prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
381
prop->hw_queues_props[i].driver_only = 1;
382
prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
383
}
384
385
for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
386
NUMBER_OF_INT_HW_QUEUES; i++) {
387
prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
388
prop->hw_queues_props[i].driver_only = 0;
389
prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
390
}
391
392
prop->cfg_base_address = CFG_BASE;
393
prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
394
prop->host_base_address = HOST_PHYS_BASE;
395
prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
396
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
397
prop->completion_mode = HL_COMPLETION_MODE_JOB;
398
prop->dram_base_address = DRAM_PHYS_BASE;
399
prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
400
prop->dram_end_address = prop->dram_base_address + prop->dram_size;
401
prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
402
403
prop->sram_base_address = SRAM_BASE_ADDR;
404
prop->sram_size = SRAM_SIZE;
405
prop->sram_end_address = prop->sram_base_address + prop->sram_size;
406
prop->sram_user_base_address = prop->sram_base_address +
407
SRAM_USER_BASE_OFFSET;
408
409
prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
410
prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
411
if (hdev->pldm)
412
prop->mmu_pgt_size = 0x800000; /* 8MB */
413
else
414
prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
415
prop->mmu_pte_size = HL_PTE_SIZE;
416
prop->dram_page_size = PAGE_SIZE_2MB;
417
prop->device_mem_alloc_default_page_size = prop->dram_page_size;
418
prop->dram_supports_virtual_memory = true;
419
420
prop->dmmu.hop_shifts[MMU_HOP0] = MMU_V1_0_HOP0_SHIFT;
421
prop->dmmu.hop_shifts[MMU_HOP1] = MMU_V1_0_HOP1_SHIFT;
422
prop->dmmu.hop_shifts[MMU_HOP2] = MMU_V1_0_HOP2_SHIFT;
423
prop->dmmu.hop_shifts[MMU_HOP3] = MMU_V1_0_HOP3_SHIFT;
424
prop->dmmu.hop_shifts[MMU_HOP4] = MMU_V1_0_HOP4_SHIFT;
425
prop->dmmu.hop_masks[MMU_HOP0] = MMU_V1_0_HOP0_MASK;
426
prop->dmmu.hop_masks[MMU_HOP1] = MMU_V1_0_HOP1_MASK;
427
prop->dmmu.hop_masks[MMU_HOP2] = MMU_V1_0_HOP2_MASK;
428
prop->dmmu.hop_masks[MMU_HOP3] = MMU_V1_0_HOP3_MASK;
429
prop->dmmu.hop_masks[MMU_HOP4] = MMU_V1_0_HOP4_MASK;
430
prop->dmmu.start_addr = VA_DDR_SPACE_START;
431
prop->dmmu.end_addr = VA_DDR_SPACE_END;
432
prop->dmmu.page_size = PAGE_SIZE_2MB;
433
prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
434
prop->dmmu.last_mask = LAST_MASK;
435
/* TODO: will be duplicated until implementing per-MMU props */
436
prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
437
prop->dmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
438
439
/* shifts and masks are the same in PMMU and DMMU */
440
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
441
prop->pmmu.start_addr = VA_HOST_SPACE_START;
442
prop->pmmu.end_addr = VA_HOST_SPACE_END;
443
prop->pmmu.page_size = PAGE_SIZE_4KB;
444
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
445
prop->pmmu.last_mask = LAST_MASK;
446
/* TODO: will be duplicated until implementing per-MMU props */
447
prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
448
prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
449
450
/* PMMU and HPMMU are the same except of page size */
451
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
452
prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
453
454
prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
455
prop->cfg_size = CFG_SIZE;
456
prop->max_asid = MAX_ASID;
457
prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
458
prop->high_pll = PLL_HIGH_DEFAULT;
459
prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
460
prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
461
prop->max_power_default = MAX_POWER_DEFAULT;
462
prop->dc_power_default = DC_POWER_DEFAULT;
463
prop->tpc_enabled_mask = TPC_ENABLED_MASK;
464
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
465
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
466
467
strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
468
CARD_NAME_MAX_LEN);
469
470
prop->max_pending_cs = GOYA_MAX_PENDING_CS;
471
472
prop->first_available_user_interrupt = USHRT_MAX;
473
prop->tpc_interrupt_id = USHRT_MAX;
474
prop->eq_interrupt_id = GOYA_EVENT_QUEUE_MSIX_IDX;
475
476
for (i = 0 ; i < HL_MAX_DCORES ; i++)
477
prop->first_available_cq[i] = USHRT_MAX;
478
479
prop->fw_cpu_boot_dev_sts0_valid = false;
480
prop->fw_cpu_boot_dev_sts1_valid = false;
481
prop->hard_reset_done_by_fw = false;
482
prop->gic_interrupts_enable = true;
483
484
prop->server_type = HL_SERVER_TYPE_UNKNOWN;
485
486
prop->clk_pll_index = HL_GOYA_MME_PLL;
487
488
prop->use_get_power_for_reset_history = true;
489
490
prop->configurable_stop_on_err = true;
491
492
prop->set_max_power_on_device_init = true;
493
494
prop->dma_mask = 48;
495
496
return 0;
497
}
498
499
/*
500
* goya_pci_bars_map - Map PCI BARS of Goya device
501
*
502
* @hdev: pointer to hl_device structure
503
*
504
* Request PCI regions and map them to kernel virtual addresses.
505
* Returns 0 on success
506
*
507
*/
508
static int goya_pci_bars_map(struct hl_device *hdev)
509
{
510
static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
511
bool is_wc[3] = {false, false, true};
512
int rc;
513
514
rc = hl_pci_bars_map(hdev, name, is_wc);
515
if (rc)
516
return rc;
517
518
hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
519
(CFG_BASE - SRAM_BASE_ADDR);
520
521
return 0;
522
}
523
524
static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
525
{
526
struct goya_device *goya = hdev->asic_specific;
527
struct hl_inbound_pci_region pci_region;
528
u64 old_addr = addr;
529
int rc;
530
531
if ((goya) && (goya->ddr_bar_cur_addr == addr))
532
return old_addr;
533
534
/* Inbound Region 1 - Bar 4 - Point to DDR */
535
pci_region.mode = PCI_BAR_MATCH_MODE;
536
pci_region.bar = DDR_BAR_ID;
537
pci_region.addr = addr;
538
rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
539
if (rc)
540
return U64_MAX;
541
542
if (goya) {
543
old_addr = goya->ddr_bar_cur_addr;
544
goya->ddr_bar_cur_addr = addr;
545
}
546
547
return old_addr;
548
}
549
550
/*
551
* goya_init_iatu - Initialize the iATU unit inside the PCI controller
552
*
553
* @hdev: pointer to hl_device structure
554
*
555
* This is needed in case the firmware doesn't initialize the iATU
556
*
557
*/
558
static int goya_init_iatu(struct hl_device *hdev)
559
{
560
struct hl_inbound_pci_region inbound_region;
561
struct hl_outbound_pci_region outbound_region;
562
int rc;
563
564
if (hdev->asic_prop.iatu_done_by_fw)
565
return 0;
566
567
/* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
568
inbound_region.mode = PCI_BAR_MATCH_MODE;
569
inbound_region.bar = SRAM_CFG_BAR_ID;
570
inbound_region.addr = SRAM_BASE_ADDR;
571
rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
572
if (rc)
573
goto done;
574
575
/* Inbound Region 1 - Bar 4 - Point to DDR */
576
inbound_region.mode = PCI_BAR_MATCH_MODE;
577
inbound_region.bar = DDR_BAR_ID;
578
inbound_region.addr = DRAM_PHYS_BASE;
579
rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
580
if (rc)
581
goto done;
582
583
/* Outbound Region 0 - Point to Host */
584
outbound_region.addr = HOST_PHYS_BASE;
585
outbound_region.size = HOST_PHYS_SIZE;
586
rc = hl_pci_set_outbound_region(hdev, &outbound_region);
587
588
done:
589
return rc;
590
}
591
592
static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
593
{
594
return RREG32(mmHW_STATE);
595
}
596
597
/*
598
* goya_early_init - GOYA early initialization code
599
*
600
* @hdev: pointer to hl_device structure
601
*
602
* Verify PCI bars
603
* Set DMA masks
604
* PCI controller initialization
605
* Map PCI bars
606
*
607
*/
608
static int goya_early_init(struct hl_device *hdev)
609
{
610
struct asic_fixed_properties *prop = &hdev->asic_prop;
611
struct pci_dev *pdev = hdev->pdev;
612
resource_size_t pci_bar_size;
613
u32 fw_boot_status, val;
614
int rc;
615
616
rc = goya_set_fixed_properties(hdev);
617
if (rc) {
618
dev_err(hdev->dev, "Failed to get fixed properties\n");
619
return rc;
620
}
621
622
/* Check BAR sizes */
623
pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
624
625
if (pci_bar_size != CFG_BAR_SIZE) {
626
dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
627
SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
628
rc = -ENODEV;
629
goto free_queue_props;
630
}
631
632
pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
633
634
if (pci_bar_size != MSIX_BAR_SIZE) {
635
dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
636
MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
637
rc = -ENODEV;
638
goto free_queue_props;
639
}
640
641
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
642
hdev->dram_pci_bar_start = pci_resource_start(pdev, DDR_BAR_ID);
643
644
/* If FW security is enabled at this point it means no access to ELBI */
645
if (hdev->asic_prop.fw_security_enabled) {
646
hdev->asic_prop.iatu_done_by_fw = true;
647
goto pci_init;
648
}
649
650
rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
651
&fw_boot_status);
652
if (rc)
653
goto free_queue_props;
654
655
/* Check whether FW is configuring iATU */
656
if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
657
(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
658
hdev->asic_prop.iatu_done_by_fw = true;
659
660
pci_init:
661
rc = hl_pci_init(hdev);
662
if (rc)
663
goto free_queue_props;
664
665
/* Before continuing in the initialization, we need to read the preboot
666
* version to determine whether we run with a security-enabled firmware
667
*/
668
rc = hl_fw_read_preboot_status(hdev);
669
if (rc) {
670
if (hdev->reset_on_preboot_fail)
671
/* we are already on failure flow, so don't check if hw_fini fails. */
672
hdev->asic_funcs->hw_fini(hdev, true, false);
673
goto pci_fini;
674
}
675
676
if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
677
dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
678
rc = hdev->asic_funcs->hw_fini(hdev, true, false);
679
if (rc) {
680
dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
681
goto pci_fini;
682
}
683
}
684
685
if (!hdev->pldm) {
686
val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
687
if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
688
dev_warn(hdev->dev,
689
"PCI strap is not configured correctly, PCI bus errors may occur\n");
690
}
691
692
return 0;
693
694
pci_fini:
695
hl_pci_fini(hdev);
696
free_queue_props:
697
kfree(hdev->asic_prop.hw_queues_props);
698
return rc;
699
}
700
701
/*
702
* goya_early_fini - GOYA early finalization code
703
*
704
* @hdev: pointer to hl_device structure
705
*
706
* Unmap PCI bars
707
*
708
*/
709
static int goya_early_fini(struct hl_device *hdev)
710
{
711
kfree(hdev->asic_prop.hw_queues_props);
712
hl_pci_fini(hdev);
713
714
return 0;
715
}
716
717
static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
718
{
719
/* mask to zero the MMBP and ASID bits */
720
WREG32_AND(reg, ~0x7FF);
721
WREG32_OR(reg, asid);
722
}
723
724
static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
725
{
726
struct goya_device *goya = hdev->asic_specific;
727
728
if (!(goya->hw_cap_initialized & HW_CAP_MMU))
729
return;
730
731
if (secure)
732
WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
733
else
734
WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
735
736
RREG32(mmDMA_QM_0_GLBL_PROT);
737
}
738
739
/*
740
* goya_fetch_psoc_frequency - Fetch PSOC frequency values
741
*
742
* @hdev: pointer to hl_device structure
743
*
744
*/
745
static void goya_fetch_psoc_frequency(struct hl_device *hdev)
746
{
747
struct asic_fixed_properties *prop = &hdev->asic_prop;
748
u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
749
u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
750
int rc;
751
752
if (hdev->asic_prop.fw_security_enabled) {
753
struct goya_device *goya = hdev->asic_specific;
754
755
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
756
return;
757
758
rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
759
pll_freq_arr);
760
761
if (rc)
762
return;
763
764
freq = pll_freq_arr[1];
765
} else {
766
div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
767
div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
768
nr = RREG32(mmPSOC_PCI_PLL_NR);
769
nf = RREG32(mmPSOC_PCI_PLL_NF);
770
od = RREG32(mmPSOC_PCI_PLL_OD);
771
772
if (div_sel == DIV_SEL_REF_CLK ||
773
div_sel == DIV_SEL_DIVIDED_REF) {
774
if (div_sel == DIV_SEL_REF_CLK)
775
freq = PLL_REF_CLK;
776
else
777
freq = PLL_REF_CLK / (div_fctr + 1);
778
} else if (div_sel == DIV_SEL_PLL_CLK ||
779
div_sel == DIV_SEL_DIVIDED_PLL) {
780
pll_clk = PLL_REF_CLK * (nf + 1) /
781
((nr + 1) * (od + 1));
782
if (div_sel == DIV_SEL_PLL_CLK)
783
freq = pll_clk;
784
else
785
freq = pll_clk / (div_fctr + 1);
786
} else {
787
dev_warn(hdev->dev,
788
"Received invalid div select value: %d",
789
div_sel);
790
freq = 0;
791
}
792
}
793
794
prop->psoc_timestamp_frequency = freq;
795
prop->psoc_pci_pll_nr = nr;
796
prop->psoc_pci_pll_nf = nf;
797
prop->psoc_pci_pll_od = od;
798
prop->psoc_pci_pll_div_factor = div_fctr;
799
}
800
801
/*
802
* goya_set_frequency - set the frequency of the device
803
*
804
* @hdev: pointer to habanalabs device structure
805
* @freq: the new frequency value
806
*
807
* Change the frequency if needed. This function has no protection against
808
* concurrency, therefore it is assumed that the calling function has protected
809
* itself against the case of calling this function from multiple threads with
810
* different values
811
*
812
* Returns 0 if no change was done, otherwise returns 1
813
*/
814
int goya_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
815
{
816
struct goya_device *goya = hdev->asic_specific;
817
818
if ((goya->pm_mng_profile == PM_MANUAL) ||
819
(goya->curr_pll_profile == freq))
820
return 0;
821
822
dev_dbg(hdev->dev, "Changing device frequency to %s\n",
823
freq == PLL_HIGH ? "high" : "low");
824
825
goya_set_pll_profile(hdev, freq);
826
827
goya->curr_pll_profile = freq;
828
829
return 1;
830
}
831
832
static void goya_set_freq_to_low_job(struct work_struct *work)
833
{
834
struct goya_work_freq *goya_work = container_of(work,
835
struct goya_work_freq,
836
work_freq.work);
837
struct hl_device *hdev = goya_work->hdev;
838
839
mutex_lock(&hdev->fpriv_list_lock);
840
841
if (!hdev->is_compute_ctx_active)
842
goya_set_frequency(hdev, PLL_LOW);
843
844
mutex_unlock(&hdev->fpriv_list_lock);
845
846
schedule_delayed_work(&goya_work->work_freq,
847
usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
848
}
849
850
int goya_late_init(struct hl_device *hdev)
851
{
852
struct asic_fixed_properties *prop = &hdev->asic_prop;
853
struct goya_device *goya = hdev->asic_specific;
854
int rc;
855
856
goya_fetch_psoc_frequency(hdev);
857
858
rc = goya_mmu_clear_pgt_range(hdev);
859
if (rc) {
860
dev_err(hdev->dev,
861
"Failed to clear MMU page tables range %d\n", rc);
862
return rc;
863
}
864
865
rc = goya_mmu_set_dram_default_page(hdev);
866
if (rc) {
867
dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
868
return rc;
869
}
870
871
rc = goya_mmu_add_mappings_for_device_cpu(hdev);
872
if (rc)
873
return rc;
874
875
rc = goya_init_cpu_queues(hdev);
876
if (rc)
877
return rc;
878
879
rc = goya_test_cpu_queue(hdev);
880
if (rc)
881
return rc;
882
883
rc = goya_cpucp_info_get(hdev);
884
if (rc) {
885
dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc);
886
return rc;
887
}
888
889
/* Now that we have the DRAM size in ASIC prop, we need to check
890
* its size and configure the DMA_IF DDR wrap protection (which is in
891
* the MMU block) accordingly. The value is the log2 of the DRAM size
892
*/
893
WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
894
895
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
896
if (rc)
897
return rc;
898
899
/* force setting to low frequency */
900
goya->curr_pll_profile = PLL_LOW;
901
902
goya->pm_mng_profile = PM_AUTO;
903
904
goya_set_pll_profile(hdev, PLL_LOW);
905
906
schedule_delayed_work(&goya->goya_work->work_freq,
907
usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
908
909
return 0;
910
}
911
912
/*
913
* goya_late_fini - GOYA late tear-down code
914
*
915
* @hdev: pointer to hl_device structure
916
*
917
* Free sensors allocated structures
918
*/
919
void goya_late_fini(struct hl_device *hdev)
920
{
921
struct goya_device *goya = hdev->asic_specific;
922
923
cancel_delayed_work_sync(&goya->goya_work->work_freq);
924
925
hl_hwmon_release_resources(hdev);
926
}
927
928
static void goya_set_pci_memory_regions(struct hl_device *hdev)
929
{
930
struct asic_fixed_properties *prop = &hdev->asic_prop;
931
struct pci_mem_region *region;
932
933
/* CFG */
934
region = &hdev->pci_mem_region[PCI_REGION_CFG];
935
region->region_base = CFG_BASE;
936
region->region_size = CFG_SIZE;
937
region->offset_in_bar = CFG_BASE - SRAM_BASE_ADDR;
938
region->bar_size = CFG_BAR_SIZE;
939
region->bar_id = SRAM_CFG_BAR_ID;
940
region->used = 1;
941
942
/* SRAM */
943
region = &hdev->pci_mem_region[PCI_REGION_SRAM];
944
region->region_base = SRAM_BASE_ADDR;
945
region->region_size = SRAM_SIZE;
946
region->offset_in_bar = 0;
947
region->bar_size = CFG_BAR_SIZE;
948
region->bar_id = SRAM_CFG_BAR_ID;
949
region->used = 1;
950
951
/* DRAM */
952
region = &hdev->pci_mem_region[PCI_REGION_DRAM];
953
region->region_base = DRAM_PHYS_BASE;
954
region->region_size = hdev->asic_prop.dram_size;
955
region->offset_in_bar = 0;
956
region->bar_size = prop->dram_pci_bar_size;
957
region->bar_id = DDR_BAR_ID;
958
region->used = 1;
959
}
960
961
/*
962
* goya_sw_init - Goya software initialization code
963
*
964
* @hdev: pointer to hl_device structure
965
*
966
*/
967
static int goya_sw_init(struct hl_device *hdev)
968
{
969
struct goya_device *goya;
970
int rc;
971
972
/* Allocate device structure */
973
goya = kzalloc(sizeof(*goya), GFP_KERNEL);
974
if (!goya)
975
return -ENOMEM;
976
977
/* according to goya_init_iatu */
978
goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
979
980
goya->mme_clk = GOYA_PLL_FREQ_LOW;
981
goya->tpc_clk = GOYA_PLL_FREQ_LOW;
982
goya->ic_clk = GOYA_PLL_FREQ_LOW;
983
984
hdev->asic_specific = goya;
985
986
/* Create DMA pool for small allocations */
987
hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
988
&hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
989
if (!hdev->dma_pool) {
990
dev_err(hdev->dev, "failed to create DMA pool\n");
991
rc = -ENOMEM;
992
goto free_goya_device;
993
}
994
995
hdev->cpu_accessible_dma_mem = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
996
&hdev->cpu_accessible_dma_address,
997
GFP_KERNEL | __GFP_ZERO);
998
999
if (!hdev->cpu_accessible_dma_mem) {
1000
rc = -ENOMEM;
1001
goto free_dma_pool;
1002
}
1003
1004
dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
1005
&hdev->cpu_accessible_dma_address);
1006
1007
hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1008
if (!hdev->cpu_accessible_dma_pool) {
1009
dev_err(hdev->dev,
1010
"Failed to create CPU accessible DMA pool\n");
1011
rc = -ENOMEM;
1012
goto free_cpu_dma_mem;
1013
}
1014
1015
rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1016
(uintptr_t) hdev->cpu_accessible_dma_mem,
1017
HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1018
if (rc) {
1019
dev_err(hdev->dev,
1020
"Failed to add memory to CPU accessible DMA pool\n");
1021
rc = -EFAULT;
1022
goto free_cpu_accessible_dma_pool;
1023
}
1024
1025
spin_lock_init(&goya->hw_queues_lock);
1026
hdev->supports_coresight = true;
1027
hdev->asic_prop.supports_compute_reset = true;
1028
hdev->asic_prop.allow_inference_soft_reset = true;
1029
hdev->supports_wait_for_multi_cs = false;
1030
hdev->supports_ctx_switch = true;
1031
1032
hdev->asic_funcs->set_pci_memory_regions(hdev);
1033
1034
goya->goya_work = kmalloc(sizeof(struct goya_work_freq), GFP_KERNEL);
1035
if (!goya->goya_work) {
1036
rc = -ENOMEM;
1037
goto free_cpu_accessible_dma_pool;
1038
}
1039
1040
goya->goya_work->hdev = hdev;
1041
INIT_DELAYED_WORK(&goya->goya_work->work_freq, goya_set_freq_to_low_job);
1042
1043
return 0;
1044
1045
free_cpu_accessible_dma_pool:
1046
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1047
free_cpu_dma_mem:
1048
hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1049
hdev->cpu_accessible_dma_address);
1050
free_dma_pool:
1051
dma_pool_destroy(hdev->dma_pool);
1052
free_goya_device:
1053
kfree(goya);
1054
1055
return rc;
1056
}
1057
1058
/*
1059
* goya_sw_fini - Goya software tear-down code
1060
*
1061
* @hdev: pointer to hl_device structure
1062
*
1063
*/
1064
static int goya_sw_fini(struct hl_device *hdev)
1065
{
1066
struct goya_device *goya = hdev->asic_specific;
1067
1068
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1069
1070
hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1071
hdev->cpu_accessible_dma_address);
1072
1073
dma_pool_destroy(hdev->dma_pool);
1074
1075
kfree(goya->goya_work);
1076
kfree(goya);
1077
1078
return 0;
1079
}
1080
1081
static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
1082
dma_addr_t bus_address)
1083
{
1084
struct goya_device *goya = hdev->asic_specific;
1085
u32 mtr_base_lo, mtr_base_hi;
1086
u32 so_base_lo, so_base_hi;
1087
u32 gic_base_lo, gic_base_hi;
1088
u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
1089
u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN;
1090
1091
mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1092
mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1093
so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1094
so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1095
1096
gic_base_lo =
1097
lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1098
gic_base_hi =
1099
upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1100
1101
WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
1102
WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
1103
1104
WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
1105
WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
1106
WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
1107
1108
WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1109
WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1110
WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1111
WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1112
WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1113
WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1114
WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
1115
GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
1116
1117
/* PQ has buffer of 2 cache lines, while CQ has 8 lines */
1118
WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
1119
WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
1120
1121
if (goya->hw_cap_initialized & HW_CAP_MMU)
1122
WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
1123
else
1124
WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
1125
1126
if (hdev->stop_on_err)
1127
dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT;
1128
1129
WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg);
1130
WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
1131
}
1132
1133
static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
1134
{
1135
u32 gic_base_lo, gic_base_hi;
1136
u64 sob_addr;
1137
u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
1138
1139
gic_base_lo =
1140
lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1141
gic_base_hi =
1142
upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1143
1144
WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
1145
WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
1146
WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
1147
GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
1148
1149
if (dma_id)
1150
sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
1151
(dma_id - 1) * 4;
1152
else
1153
sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
1154
1155
WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
1156
WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
1157
}
1158
1159
/*
1160
* goya_init_dma_qmans - Initialize QMAN DMA registers
1161
*
1162
* @hdev: pointer to hl_device structure
1163
*
1164
* Initialize the H/W registers of the QMAN DMA channels
1165
*
1166
*/
1167
void goya_init_dma_qmans(struct hl_device *hdev)
1168
{
1169
struct goya_device *goya = hdev->asic_specific;
1170
struct hl_hw_queue *q;
1171
int i;
1172
1173
if (goya->hw_cap_initialized & HW_CAP_DMA)
1174
return;
1175
1176
q = &hdev->kernel_queues[0];
1177
1178
for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
1179
q->cq_id = q->msi_vec = i;
1180
goya_init_dma_qman(hdev, i, q->bus_address);
1181
goya_init_dma_ch(hdev, i);
1182
}
1183
1184
goya->hw_cap_initialized |= HW_CAP_DMA;
1185
}
1186
1187
/*
1188
* goya_disable_external_queues - Disable external queues
1189
*
1190
* @hdev: pointer to hl_device structure
1191
*
1192
*/
1193
static void goya_disable_external_queues(struct hl_device *hdev)
1194
{
1195
struct goya_device *goya = hdev->asic_specific;
1196
1197
if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1198
return;
1199
1200
WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
1201
WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
1202
WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
1203
WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
1204
WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
1205
}
1206
1207
static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
1208
u32 cp_sts_reg, u32 glbl_sts0_reg)
1209
{
1210
int rc;
1211
u32 status;
1212
1213
/* use the values of TPC0 as they are all the same*/
1214
1215
WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
1216
1217
status = RREG32(cp_sts_reg);
1218
if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
1219
rc = hl_poll_timeout(
1220
hdev,
1221
cp_sts_reg,
1222
status,
1223
!(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
1224
1000,
1225
QMAN_FENCE_TIMEOUT_USEC);
1226
1227
/* if QMAN is stuck in fence no need to check for stop */
1228
if (rc)
1229
return 0;
1230
}
1231
1232
rc = hl_poll_timeout(
1233
hdev,
1234
glbl_sts0_reg,
1235
status,
1236
(status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
1237
1000,
1238
QMAN_STOP_TIMEOUT_USEC);
1239
1240
if (rc) {
1241
dev_err(hdev->dev,
1242
"Timeout while waiting for QMAN to stop\n");
1243
return -EINVAL;
1244
}
1245
1246
return 0;
1247
}
1248
1249
/*
1250
* goya_stop_external_queues - Stop external queues
1251
*
1252
* @hdev: pointer to hl_device structure
1253
*
1254
* Returns 0 on success
1255
*
1256
*/
1257
static int goya_stop_external_queues(struct hl_device *hdev)
1258
{
1259
int rc, retval = 0;
1260
1261
struct goya_device *goya = hdev->asic_specific;
1262
1263
if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1264
return retval;
1265
1266
rc = goya_stop_queue(hdev,
1267
mmDMA_QM_0_GLBL_CFG1,
1268
mmDMA_QM_0_CP_STS,
1269
mmDMA_QM_0_GLBL_STS0);
1270
1271
if (rc) {
1272
dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
1273
retval = -EIO;
1274
}
1275
1276
rc = goya_stop_queue(hdev,
1277
mmDMA_QM_1_GLBL_CFG1,
1278
mmDMA_QM_1_CP_STS,
1279
mmDMA_QM_1_GLBL_STS0);
1280
1281
if (rc) {
1282
dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
1283
retval = -EIO;
1284
}
1285
1286
rc = goya_stop_queue(hdev,
1287
mmDMA_QM_2_GLBL_CFG1,
1288
mmDMA_QM_2_CP_STS,
1289
mmDMA_QM_2_GLBL_STS0);
1290
1291
if (rc) {
1292
dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
1293
retval = -EIO;
1294
}
1295
1296
rc = goya_stop_queue(hdev,
1297
mmDMA_QM_3_GLBL_CFG1,
1298
mmDMA_QM_3_CP_STS,
1299
mmDMA_QM_3_GLBL_STS0);
1300
1301
if (rc) {
1302
dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
1303
retval = -EIO;
1304
}
1305
1306
rc = goya_stop_queue(hdev,
1307
mmDMA_QM_4_GLBL_CFG1,
1308
mmDMA_QM_4_CP_STS,
1309
mmDMA_QM_4_GLBL_STS0);
1310
1311
if (rc) {
1312
dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1313
retval = -EIO;
1314
}
1315
1316
return retval;
1317
}
1318
1319
/*
1320
* goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1321
*
1322
* @hdev: pointer to hl_device structure
1323
*
1324
* Returns 0 on success
1325
*
1326
*/
1327
int goya_init_cpu_queues(struct hl_device *hdev)
1328
{
1329
struct goya_device *goya = hdev->asic_specific;
1330
struct asic_fixed_properties *prop = &hdev->asic_prop;
1331
struct hl_eq *eq;
1332
u32 status;
1333
struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1334
int err;
1335
1336
if (!hdev->cpu_queues_enable)
1337
return 0;
1338
1339
if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1340
return 0;
1341
1342
eq = &hdev->event_queue;
1343
1344
WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
1345
WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
1346
1347
WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
1348
WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
1349
1350
WREG32(mmCPU_CQ_BASE_ADDR_LOW,
1351
lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1352
WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
1353
upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1354
1355
WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
1356
WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
1357
WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
1358
1359
/* Used for EQ CI */
1360
WREG32(mmCPU_EQ_CI, 0);
1361
1362
WREG32(mmCPU_IF_PF_PQ_PI, 0);
1363
1364
WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
1365
1366
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1367
GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1368
1369
err = hl_poll_timeout(
1370
hdev,
1371
mmCPU_PQ_INIT_STATUS,
1372
status,
1373
(status == PQ_INIT_STATUS_READY_FOR_HOST),
1374
1000,
1375
GOYA_CPU_TIMEOUT_USEC);
1376
1377
if (err) {
1378
dev_err(hdev->dev,
1379
"Failed to setup communication with device CPU\n");
1380
return -EIO;
1381
}
1382
1383
/* update FW application security bits */
1384
if (prop->fw_cpu_boot_dev_sts0_valid)
1385
prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
1386
1387
if (prop->fw_cpu_boot_dev_sts1_valid)
1388
prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
1389
1390
goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1391
return 0;
1392
}
1393
1394
static void goya_set_pll_refclk(struct hl_device *hdev)
1395
{
1396
WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1397
WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1398
WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1399
WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1400
1401
WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1402
WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1403
WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1404
WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1405
1406
WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1407
WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1408
WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1409
WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1410
1411
WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1412
WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1413
WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1414
WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1415
1416
WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1417
WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1418
WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1419
WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1420
1421
WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1422
WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1423
WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1424
WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1425
1426
WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1427
WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1428
WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1429
WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1430
}
1431
1432
static void goya_disable_clk_rlx(struct hl_device *hdev)
1433
{
1434
WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1435
WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1436
}
1437
1438
static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1439
{
1440
u64 tpc_eml_address;
1441
u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1442
int err, slm_index;
1443
1444
tpc_offset = tpc_id * 0x40000;
1445
tpc_eml_offset = tpc_id * 0x200000;
1446
tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1447
tpc_slm_offset = tpc_eml_address + 0x100000;
1448
1449
/*
1450
* Workaround for Bug H2 #2443 :
1451
* "TPC SB is not initialized on chip reset"
1452
*/
1453
1454
val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1455
if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1456
dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1457
tpc_id);
1458
1459
WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1460
1461
WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1462
WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1463
WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1464
WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1465
WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1466
WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1467
WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1468
WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1469
WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1470
WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1471
1472
WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1473
1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1474
1475
err = hl_poll_timeout(
1476
hdev,
1477
mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1478
val,
1479
(val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1480
1000,
1481
HL_DEVICE_TIMEOUT_USEC);
1482
1483
if (err)
1484
dev_err(hdev->dev,
1485
"Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1486
1487
WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1488
1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1489
1490
msleep(GOYA_RESET_WAIT_MSEC);
1491
1492
WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1493
~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1494
1495
msleep(GOYA_RESET_WAIT_MSEC);
1496
1497
for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1498
WREG32(tpc_slm_offset + (slm_index << 2), 0);
1499
1500
val = RREG32(tpc_slm_offset);
1501
}
1502
1503
static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1504
{
1505
struct goya_device *goya = hdev->asic_specific;
1506
int i;
1507
1508
if (hdev->pldm)
1509
return;
1510
1511
if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1512
return;
1513
1514
/* Workaround for H2 #2443 */
1515
1516
for (i = 0 ; i < TPC_MAX_NUM ; i++)
1517
_goya_tpc_mbist_workaround(hdev, i);
1518
1519
goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1520
}
1521
1522
/*
1523
* goya_init_golden_registers - Initialize golden registers
1524
*
1525
* @hdev: pointer to hl_device structure
1526
*
1527
* Initialize the H/W registers of the device
1528
*
1529
*/
1530
static void goya_init_golden_registers(struct hl_device *hdev)
1531
{
1532
struct goya_device *goya = hdev->asic_specific;
1533
u32 polynom[10], tpc_intr_mask, offset;
1534
int i;
1535
1536
if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1537
return;
1538
1539
polynom[0] = 0x00020080;
1540
polynom[1] = 0x00401000;
1541
polynom[2] = 0x00200800;
1542
polynom[3] = 0x00002000;
1543
polynom[4] = 0x00080200;
1544
polynom[5] = 0x00040100;
1545
polynom[6] = 0x00100400;
1546
polynom[7] = 0x00004000;
1547
polynom[8] = 0x00010000;
1548
polynom[9] = 0x00008000;
1549
1550
/* Mask all arithmetic interrupts from TPC */
1551
tpc_intr_mask = 0x7FFF;
1552
1553
for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1554
WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1555
WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1556
WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1557
WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1558
WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1559
1560
WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1561
WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1562
WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1563
WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1564
WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1565
1566
1567
WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1568
WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1569
WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1570
WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1571
WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1572
1573
WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1574
WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1575
WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1576
WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1577
WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1578
1579
WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1580
WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1581
WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1582
WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1583
WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1584
1585
WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1586
WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1587
WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1588
WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1589
WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1590
}
1591
1592
WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1593
WREG32(mmMME_AGU, 0x0f0f0f10);
1594
WREG32(mmMME_SEI_MASK, ~0x0);
1595
1596
WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1597
WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1598
WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1599
WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1600
WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1601
WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1602
WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1603
WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1604
WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1605
WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1606
WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1607
WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1608
WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1609
WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1610
WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1611
WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1612
WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1613
WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1614
WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1615
WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1616
WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1617
WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1618
WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1619
WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1620
WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1621
WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1622
WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1623
WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1624
WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1625
WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1626
WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1627
WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1628
WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1629
WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1630
WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1631
WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1632
WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1633
WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1634
WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1635
WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1636
WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1637
WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1638
WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1639
WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1640
WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1641
WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1642
WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1643
WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1644
WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1645
WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1646
WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1647
WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1648
WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1649
WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1650
WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1651
WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1652
WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1653
WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1654
WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1655
WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1656
WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1657
WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1658
WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1659
WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1660
WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1661
WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1662
WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1663
WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1664
WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1665
WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1666
WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1667
WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1668
WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1669
WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1670
WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1671
WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1672
WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1673
WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1674
WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1675
WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1676
WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1677
WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1678
WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1679
WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1680
1681
WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1682
WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1683
WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1684
WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1685
WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1686
WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1687
WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1688
WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1689
WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1690
WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1691
WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1692
WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1693
1694
WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1695
WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1696
WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1697
WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1698
WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1699
WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1700
WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1701
WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1702
WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1703
WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1704
WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1705
WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1706
1707
WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1708
WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1709
WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1710
WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1711
WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1712
WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1713
WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1714
WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1715
WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1716
WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1717
WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1718
WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1719
1720
WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1721
WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1722
WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1723
WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1724
WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1725
WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1726
WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1727
WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1728
WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1729
WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1730
WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1731
WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1732
1733
WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1734
WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1735
WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1736
WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1737
WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1738
WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1739
WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1740
WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1741
WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1742
WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1743
WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1744
WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1745
1746
WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1747
WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1748
WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1749
WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1750
WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1751
WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1752
WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1753
WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1754
WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1755
WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1756
WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1757
WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1758
1759
for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1760
WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1761
WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1762
WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1763
WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1764
WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1765
WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1766
1767
WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1768
WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1769
WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1770
WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1771
WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1772
WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1773
WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1774
WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1775
1776
WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1777
WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1778
}
1779
1780
for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1781
WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1782
1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1783
WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1784
1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1785
}
1786
1787
for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1788
/*
1789
* Workaround for Bug H2 #2441 :
1790
* "ST.NOP set trace event illegal opcode"
1791
*/
1792
WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1793
1794
WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1795
1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1796
WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1797
1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1798
1799
WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
1800
ICACHE_FETCH_LINE_NUM, 2);
1801
}
1802
1803
WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1804
WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1805
1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1806
1807
WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1808
WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1809
1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1810
1811
/*
1812
* Workaround for H2 #HW-23 bug
1813
* Set DMA max outstanding read requests to 240 on DMA CH 1.
1814
* This limitation is still large enough to not affect Gen4 bandwidth.
1815
* We need to only limit that DMA channel because the user can only read
1816
* from Host using DMA CH 1
1817
*/
1818
WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1819
1820
WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
1821
1822
goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1823
}
1824
1825
static void goya_init_mme_qman(struct hl_device *hdev)
1826
{
1827
u32 mtr_base_lo, mtr_base_hi;
1828
u32 so_base_lo, so_base_hi;
1829
u32 gic_base_lo, gic_base_hi;
1830
u64 qman_base_addr;
1831
1832
mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1833
mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1834
so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1835
so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1836
1837
gic_base_lo =
1838
lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1839
gic_base_hi =
1840
upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1841
1842
qman_base_addr = hdev->asic_prop.sram_base_address +
1843
MME_QMAN_BASE_OFFSET;
1844
1845
WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1846
WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1847
WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1848
WREG32(mmMME_QM_PQ_PI, 0);
1849
WREG32(mmMME_QM_PQ_CI, 0);
1850
WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1851
WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1852
WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1853
WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1854
1855
WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1856
WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1857
WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1858
WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1859
1860
/* QMAN CQ has 8 cache lines */
1861
WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1862
1863
WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1864
WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1865
1866
WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1867
1868
WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1869
1870
WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1871
1872
WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1873
}
1874
1875
static void goya_init_mme_cmdq(struct hl_device *hdev)
1876
{
1877
u32 mtr_base_lo, mtr_base_hi;
1878
u32 so_base_lo, so_base_hi;
1879
u32 gic_base_lo, gic_base_hi;
1880
1881
mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1882
mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1883
so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1884
so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1885
1886
gic_base_lo =
1887
lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1888
gic_base_hi =
1889
upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1890
1891
WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1892
WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1893
WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1894
WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1895
1896
/* CMDQ CQ has 20 cache lines */
1897
WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1898
1899
WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1900
WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1901
1902
WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1903
1904
WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1905
1906
WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1907
1908
WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1909
}
1910
1911
void goya_init_mme_qmans(struct hl_device *hdev)
1912
{
1913
struct goya_device *goya = hdev->asic_specific;
1914
u32 so_base_lo, so_base_hi;
1915
1916
if (goya->hw_cap_initialized & HW_CAP_MME)
1917
return;
1918
1919
so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1920
so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1921
1922
WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1923
WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1924
1925
goya_init_mme_qman(hdev);
1926
goya_init_mme_cmdq(hdev);
1927
1928
goya->hw_cap_initialized |= HW_CAP_MME;
1929
}
1930
1931
static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1932
{
1933
u32 mtr_base_lo, mtr_base_hi;
1934
u32 so_base_lo, so_base_hi;
1935
u32 gic_base_lo, gic_base_hi;
1936
u64 qman_base_addr;
1937
u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1938
1939
mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1940
mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1941
so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1942
so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1943
1944
gic_base_lo =
1945
lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1946
gic_base_hi =
1947
upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1948
1949
qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1950
1951
WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1952
WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1953
WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1954
WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1955
WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1956
WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1957
WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1958
WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1959
WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1960
1961
WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1962
WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1963
WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1964
WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1965
1966
WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1967
1968
WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1969
WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1970
1971
WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1972
GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1973
1974
WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1975
1976
WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1977
1978
WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1979
}
1980
1981
static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1982
{
1983
u32 mtr_base_lo, mtr_base_hi;
1984
u32 so_base_lo, so_base_hi;
1985
u32 gic_base_lo, gic_base_hi;
1986
u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1987
1988
mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1989
mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1990
so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1991
so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1992
1993
gic_base_lo =
1994
lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1995
gic_base_hi =
1996
upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1997
1998
WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1999
WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
2000
WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
2001
WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
2002
2003
WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
2004
2005
WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
2006
WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
2007
2008
WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
2009
GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
2010
2011
WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
2012
2013
WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
2014
2015
WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
2016
}
2017
2018
void goya_init_tpc_qmans(struct hl_device *hdev)
2019
{
2020
struct goya_device *goya = hdev->asic_specific;
2021
u32 so_base_lo, so_base_hi;
2022
u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
2023
mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
2024
int i;
2025
2026
if (goya->hw_cap_initialized & HW_CAP_TPC)
2027
return;
2028
2029
so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2030
so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2031
2032
for (i = 0 ; i < TPC_MAX_NUM ; i++) {
2033
WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
2034
so_base_lo);
2035
WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
2036
so_base_hi);
2037
}
2038
2039
goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
2040
goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
2041
goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
2042
goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
2043
goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
2044
goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
2045
goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
2046
goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
2047
2048
for (i = 0 ; i < TPC_MAX_NUM ; i++)
2049
goya_init_tpc_cmdq(hdev, i);
2050
2051
goya->hw_cap_initialized |= HW_CAP_TPC;
2052
}
2053
2054
/*
2055
* goya_disable_internal_queues - Disable internal queues
2056
*
2057
* @hdev: pointer to hl_device structure
2058
*
2059
*/
2060
static void goya_disable_internal_queues(struct hl_device *hdev)
2061
{
2062
struct goya_device *goya = hdev->asic_specific;
2063
2064
if (!(goya->hw_cap_initialized & HW_CAP_MME))
2065
goto disable_tpc;
2066
2067
WREG32(mmMME_QM_GLBL_CFG0, 0);
2068
WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
2069
2070
disable_tpc:
2071
if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2072
return;
2073
2074
WREG32(mmTPC0_QM_GLBL_CFG0, 0);
2075
WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
2076
2077
WREG32(mmTPC1_QM_GLBL_CFG0, 0);
2078
WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
2079
2080
WREG32(mmTPC2_QM_GLBL_CFG0, 0);
2081
WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
2082
2083
WREG32(mmTPC3_QM_GLBL_CFG0, 0);
2084
WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
2085
2086
WREG32(mmTPC4_QM_GLBL_CFG0, 0);
2087
WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
2088
2089
WREG32(mmTPC5_QM_GLBL_CFG0, 0);
2090
WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
2091
2092
WREG32(mmTPC6_QM_GLBL_CFG0, 0);
2093
WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
2094
2095
WREG32(mmTPC7_QM_GLBL_CFG0, 0);
2096
WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
2097
}
2098
2099
/*
2100
* goya_stop_internal_queues - Stop internal queues
2101
*
2102
* @hdev: pointer to hl_device structure
2103
*
2104
* Returns 0 on success
2105
*
2106
*/
2107
static int goya_stop_internal_queues(struct hl_device *hdev)
2108
{
2109
struct goya_device *goya = hdev->asic_specific;
2110
int rc, retval = 0;
2111
2112
if (!(goya->hw_cap_initialized & HW_CAP_MME))
2113
goto stop_tpc;
2114
2115
/*
2116
* Each queue (QMAN) is a separate H/W logic. That means that each
2117
* QMAN can be stopped independently and failure to stop one does NOT
2118
* mandate we should not try to stop other QMANs
2119
*/
2120
2121
rc = goya_stop_queue(hdev,
2122
mmMME_QM_GLBL_CFG1,
2123
mmMME_QM_CP_STS,
2124
mmMME_QM_GLBL_STS0);
2125
2126
if (rc) {
2127
dev_err(hdev->dev, "failed to stop MME QMAN\n");
2128
retval = -EIO;
2129
}
2130
2131
rc = goya_stop_queue(hdev,
2132
mmMME_CMDQ_GLBL_CFG1,
2133
mmMME_CMDQ_CP_STS,
2134
mmMME_CMDQ_GLBL_STS0);
2135
2136
if (rc) {
2137
dev_err(hdev->dev, "failed to stop MME CMDQ\n");
2138
retval = -EIO;
2139
}
2140
2141
stop_tpc:
2142
if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2143
return retval;
2144
2145
rc = goya_stop_queue(hdev,
2146
mmTPC0_QM_GLBL_CFG1,
2147
mmTPC0_QM_CP_STS,
2148
mmTPC0_QM_GLBL_STS0);
2149
2150
if (rc) {
2151
dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
2152
retval = -EIO;
2153
}
2154
2155
rc = goya_stop_queue(hdev,
2156
mmTPC0_CMDQ_GLBL_CFG1,
2157
mmTPC0_CMDQ_CP_STS,
2158
mmTPC0_CMDQ_GLBL_STS0);
2159
2160
if (rc) {
2161
dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
2162
retval = -EIO;
2163
}
2164
2165
rc = goya_stop_queue(hdev,
2166
mmTPC1_QM_GLBL_CFG1,
2167
mmTPC1_QM_CP_STS,
2168
mmTPC1_QM_GLBL_STS0);
2169
2170
if (rc) {
2171
dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
2172
retval = -EIO;
2173
}
2174
2175
rc = goya_stop_queue(hdev,
2176
mmTPC1_CMDQ_GLBL_CFG1,
2177
mmTPC1_CMDQ_CP_STS,
2178
mmTPC1_CMDQ_GLBL_STS0);
2179
2180
if (rc) {
2181
dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
2182
retval = -EIO;
2183
}
2184
2185
rc = goya_stop_queue(hdev,
2186
mmTPC2_QM_GLBL_CFG1,
2187
mmTPC2_QM_CP_STS,
2188
mmTPC2_QM_GLBL_STS0);
2189
2190
if (rc) {
2191
dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
2192
retval = -EIO;
2193
}
2194
2195
rc = goya_stop_queue(hdev,
2196
mmTPC2_CMDQ_GLBL_CFG1,
2197
mmTPC2_CMDQ_CP_STS,
2198
mmTPC2_CMDQ_GLBL_STS0);
2199
2200
if (rc) {
2201
dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
2202
retval = -EIO;
2203
}
2204
2205
rc = goya_stop_queue(hdev,
2206
mmTPC3_QM_GLBL_CFG1,
2207
mmTPC3_QM_CP_STS,
2208
mmTPC3_QM_GLBL_STS0);
2209
2210
if (rc) {
2211
dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
2212
retval = -EIO;
2213
}
2214
2215
rc = goya_stop_queue(hdev,
2216
mmTPC3_CMDQ_GLBL_CFG1,
2217
mmTPC3_CMDQ_CP_STS,
2218
mmTPC3_CMDQ_GLBL_STS0);
2219
2220
if (rc) {
2221
dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
2222
retval = -EIO;
2223
}
2224
2225
rc = goya_stop_queue(hdev,
2226
mmTPC4_QM_GLBL_CFG1,
2227
mmTPC4_QM_CP_STS,
2228
mmTPC4_QM_GLBL_STS0);
2229
2230
if (rc) {
2231
dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
2232
retval = -EIO;
2233
}
2234
2235
rc = goya_stop_queue(hdev,
2236
mmTPC4_CMDQ_GLBL_CFG1,
2237
mmTPC4_CMDQ_CP_STS,
2238
mmTPC4_CMDQ_GLBL_STS0);
2239
2240
if (rc) {
2241
dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
2242
retval = -EIO;
2243
}
2244
2245
rc = goya_stop_queue(hdev,
2246
mmTPC5_QM_GLBL_CFG1,
2247
mmTPC5_QM_CP_STS,
2248
mmTPC5_QM_GLBL_STS0);
2249
2250
if (rc) {
2251
dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
2252
retval = -EIO;
2253
}
2254
2255
rc = goya_stop_queue(hdev,
2256
mmTPC5_CMDQ_GLBL_CFG1,
2257
mmTPC5_CMDQ_CP_STS,
2258
mmTPC5_CMDQ_GLBL_STS0);
2259
2260
if (rc) {
2261
dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
2262
retval = -EIO;
2263
}
2264
2265
rc = goya_stop_queue(hdev,
2266
mmTPC6_QM_GLBL_CFG1,
2267
mmTPC6_QM_CP_STS,
2268
mmTPC6_QM_GLBL_STS0);
2269
2270
if (rc) {
2271
dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
2272
retval = -EIO;
2273
}
2274
2275
rc = goya_stop_queue(hdev,
2276
mmTPC6_CMDQ_GLBL_CFG1,
2277
mmTPC6_CMDQ_CP_STS,
2278
mmTPC6_CMDQ_GLBL_STS0);
2279
2280
if (rc) {
2281
dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
2282
retval = -EIO;
2283
}
2284
2285
rc = goya_stop_queue(hdev,
2286
mmTPC7_QM_GLBL_CFG1,
2287
mmTPC7_QM_CP_STS,
2288
mmTPC7_QM_GLBL_STS0);
2289
2290
if (rc) {
2291
dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
2292
retval = -EIO;
2293
}
2294
2295
rc = goya_stop_queue(hdev,
2296
mmTPC7_CMDQ_GLBL_CFG1,
2297
mmTPC7_CMDQ_CP_STS,
2298
mmTPC7_CMDQ_GLBL_STS0);
2299
2300
if (rc) {
2301
dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
2302
retval = -EIO;
2303
}
2304
2305
return retval;
2306
}
2307
2308
static void goya_dma_stall(struct hl_device *hdev)
2309
{
2310
struct goya_device *goya = hdev->asic_specific;
2311
2312
if (!(goya->hw_cap_initialized & HW_CAP_DMA))
2313
return;
2314
2315
WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
2316
WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
2317
WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
2318
WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
2319
WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
2320
}
2321
2322
static void goya_tpc_stall(struct hl_device *hdev)
2323
{
2324
struct goya_device *goya = hdev->asic_specific;
2325
2326
if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2327
return;
2328
2329
WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2330
WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2331
WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2332
WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2333
WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2334
WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2335
WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2336
WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2337
}
2338
2339
static void goya_mme_stall(struct hl_device *hdev)
2340
{
2341
struct goya_device *goya = hdev->asic_specific;
2342
2343
if (!(goya->hw_cap_initialized & HW_CAP_MME))
2344
return;
2345
2346
WREG32(mmMME_STALL, 0xFFFFFFFF);
2347
}
2348
2349
static int goya_enable_msix(struct hl_device *hdev)
2350
{
2351
struct goya_device *goya = hdev->asic_specific;
2352
int cq_cnt = hdev->asic_prop.completion_queues_count;
2353
int rc, i, irq_cnt_init, irq;
2354
2355
if (goya->hw_cap_initialized & HW_CAP_MSIX)
2356
return 0;
2357
2358
rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2359
GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2360
if (rc < 0) {
2361
dev_err(hdev->dev,
2362
"MSI-X: Failed to enable support -- %d/%d\n",
2363
GOYA_MSIX_ENTRIES, rc);
2364
return rc;
2365
}
2366
2367
for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2368
irq = pci_irq_vector(hdev->pdev, i);
2369
rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2370
&hdev->completion_queue[i]);
2371
if (rc) {
2372
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2373
goto free_irqs;
2374
}
2375
}
2376
2377
irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2378
2379
rc = request_irq(irq, hl_irq_handler_eq, 0,
2380
goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
2381
&hdev->event_queue);
2382
if (rc) {
2383
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2384
goto free_irqs;
2385
}
2386
2387
goya->hw_cap_initialized |= HW_CAP_MSIX;
2388
return 0;
2389
2390
free_irqs:
2391
for (i = 0 ; i < irq_cnt_init ; i++)
2392
free_irq(pci_irq_vector(hdev->pdev, i),
2393
&hdev->completion_queue[i]);
2394
2395
pci_free_irq_vectors(hdev->pdev);
2396
return rc;
2397
}
2398
2399
static void goya_sync_irqs(struct hl_device *hdev)
2400
{
2401
struct goya_device *goya = hdev->asic_specific;
2402
int i;
2403
2404
if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2405
return;
2406
2407
/* Wait for all pending IRQs to be finished */
2408
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2409
synchronize_irq(pci_irq_vector(hdev->pdev, i));
2410
2411
synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
2412
}
2413
2414
static void goya_disable_msix(struct hl_device *hdev)
2415
{
2416
struct goya_device *goya = hdev->asic_specific;
2417
int i, irq;
2418
2419
if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2420
return;
2421
2422
goya_sync_irqs(hdev);
2423
2424
irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2425
free_irq(irq, &hdev->event_queue);
2426
2427
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2428
irq = pci_irq_vector(hdev->pdev, i);
2429
free_irq(irq, &hdev->completion_queue[i]);
2430
}
2431
2432
pci_free_irq_vectors(hdev->pdev);
2433
2434
goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2435
}
2436
2437
static void goya_enable_timestamp(struct hl_device *hdev)
2438
{
2439
/* Disable the timestamp counter */
2440
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2441
2442
/* Zero the lower/upper parts of the 64-bit counter */
2443
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2444
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2445
2446
/* Enable the counter */
2447
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2448
}
2449
2450
static void goya_disable_timestamp(struct hl_device *hdev)
2451
{
2452
/* Disable the timestamp counter */
2453
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2454
}
2455
2456
static void goya_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
2457
{
2458
u32 wait_timeout_ms;
2459
2460
if (hdev->pldm)
2461
wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2462
else
2463
wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2464
2465
goya_stop_external_queues(hdev);
2466
goya_stop_internal_queues(hdev);
2467
2468
msleep(wait_timeout_ms);
2469
2470
goya_dma_stall(hdev);
2471
goya_tpc_stall(hdev);
2472
goya_mme_stall(hdev);
2473
2474
msleep(wait_timeout_ms);
2475
2476
goya_disable_external_queues(hdev);
2477
goya_disable_internal_queues(hdev);
2478
2479
goya_disable_timestamp(hdev);
2480
2481
if (hard_reset) {
2482
goya_disable_msix(hdev);
2483
goya_mmu_remove_device_cpu_mappings(hdev);
2484
} else {
2485
goya_sync_irqs(hdev);
2486
}
2487
}
2488
2489
/*
2490
* goya_load_firmware_to_device() - Load LINUX FW code to device.
2491
* @hdev: Pointer to hl_device structure.
2492
*
2493
* Copy LINUX fw code from firmware file to HBM BAR.
2494
*
2495
* Return: 0 on success, non-zero for failure.
2496
*/
2497
static int goya_load_firmware_to_device(struct hl_device *hdev)
2498
{
2499
void __iomem *dst;
2500
2501
dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2502
2503
return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst, 0, 0);
2504
}
2505
2506
/*
2507
* goya_load_boot_fit_to_device() - Load boot fit to device.
2508
* @hdev: Pointer to hl_device structure.
2509
*
2510
* Copy boot fit file to SRAM BAR.
2511
*
2512
* Return: 0 on success, non-zero for failure.
2513
*/
2514
static int goya_load_boot_fit_to_device(struct hl_device *hdev)
2515
{
2516
void __iomem *dst;
2517
2518
dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2519
2520
return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst, 0, 0);
2521
}
2522
2523
static void goya_init_dynamic_firmware_loader(struct hl_device *hdev)
2524
{
2525
struct dynamic_fw_load_mgr *dynamic_loader;
2526
struct cpu_dyn_regs *dyn_regs;
2527
2528
dynamic_loader = &hdev->fw_loader.dynamic_loader;
2529
2530
/*
2531
* here we update initial values for few specific dynamic regs (as
2532
* before reading the first descriptor from FW those value has to be
2533
* hard-coded) in later stages of the protocol those values will be
2534
* updated automatically by reading the FW descriptor so data there
2535
* will always be up-to-date
2536
*/
2537
dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
2538
dyn_regs->kmd_msg_to_cpu =
2539
cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
2540
dyn_regs->cpu_cmd_status_to_host =
2541
cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
2542
2543
dynamic_loader->wait_for_bl_timeout = GOYA_WAIT_FOR_BL_TIMEOUT_USEC;
2544
}
2545
2546
static void goya_init_static_firmware_loader(struct hl_device *hdev)
2547
{
2548
struct static_fw_load_mgr *static_loader;
2549
2550
static_loader = &hdev->fw_loader.static_loader;
2551
2552
static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
2553
static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
2554
static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
2555
static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
2556
static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
2557
static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
2558
static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
2559
static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
2560
static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
2561
static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
2562
static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
2563
static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
2564
}
2565
2566
static void goya_init_firmware_preload_params(struct hl_device *hdev)
2567
{
2568
struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
2569
2570
pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
2571
pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
2572
pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
2573
pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
2574
pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
2575
pre_fw_load->wait_for_preboot_timeout = GOYA_BOOT_FIT_REQ_TIMEOUT_USEC;
2576
}
2577
2578
static void goya_init_firmware_loader(struct hl_device *hdev)
2579
{
2580
struct asic_fixed_properties *prop = &hdev->asic_prop;
2581
struct fw_load_mgr *fw_loader = &hdev->fw_loader;
2582
2583
/* fill common fields */
2584
fw_loader->fw_comp_loaded = FW_TYPE_NONE;
2585
fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
2586
fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
2587
fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
2588
fw_loader->boot_fit_timeout = GOYA_BOOT_FIT_REQ_TIMEOUT_USEC;
2589
fw_loader->skip_bmc = false;
2590
fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
2591
fw_loader->dram_bar_id = DDR_BAR_ID;
2592
2593
if (prop->dynamic_fw_load)
2594
goya_init_dynamic_firmware_loader(hdev);
2595
else
2596
goya_init_static_firmware_loader(hdev);
2597
}
2598
2599
static int goya_init_cpu(struct hl_device *hdev)
2600
{
2601
struct goya_device *goya = hdev->asic_specific;
2602
int rc;
2603
2604
if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
2605
return 0;
2606
2607
if (goya->hw_cap_initialized & HW_CAP_CPU)
2608
return 0;
2609
2610
/*
2611
* Before pushing u-boot/linux to device, need to set the ddr bar to
2612
* base address of dram
2613
*/
2614
if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2615
dev_err(hdev->dev,
2616
"failed to map DDR bar to DRAM base address\n");
2617
return -EIO;
2618
}
2619
2620
rc = hl_fw_init_cpu(hdev);
2621
2622
if (rc)
2623
return rc;
2624
2625
goya->hw_cap_initialized |= HW_CAP_CPU;
2626
2627
return 0;
2628
}
2629
2630
static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2631
u64 phys_addr)
2632
{
2633
u32 status, timeout_usec;
2634
int rc;
2635
2636
if (hdev->pldm)
2637
timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2638
else
2639
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2640
2641
WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2642
WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2643
WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2644
2645
rc = hl_poll_timeout(
2646
hdev,
2647
MMU_ASID_BUSY,
2648
status,
2649
!(status & 0x80000000),
2650
1000,
2651
timeout_usec);
2652
2653
if (rc) {
2654
dev_err(hdev->dev,
2655
"Timeout during MMU hop0 config of asid %d\n", asid);
2656
return rc;
2657
}
2658
2659
return 0;
2660
}
2661
2662
int goya_mmu_init(struct hl_device *hdev)
2663
{
2664
struct asic_fixed_properties *prop = &hdev->asic_prop;
2665
struct goya_device *goya = hdev->asic_specific;
2666
u64 hop0_addr;
2667
int rc, i;
2668
2669
if (goya->hw_cap_initialized & HW_CAP_MMU)
2670
return 0;
2671
2672
hdev->dram_default_page_mapping = true;
2673
2674
for (i = 0 ; i < prop->max_asid ; i++) {
2675
hop0_addr = prop->mmu_pgt_addr +
2676
(i * prop->dmmu.hop_table_size);
2677
2678
rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2679
if (rc) {
2680
dev_err(hdev->dev,
2681
"failed to set hop0 addr for asid %d\n", i);
2682
goto err;
2683
}
2684
}
2685
2686
goya->hw_cap_initialized |= HW_CAP_MMU;
2687
2688
/* init MMU cache manage page */
2689
WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2690
lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2691
WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2692
2693
/* Remove follower feature due to performance bug */
2694
WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2695
(~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2696
2697
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
2698
2699
WREG32(mmMMU_MMU_ENABLE, 1);
2700
WREG32(mmMMU_SPI_MASK, 0xF);
2701
2702
return 0;
2703
2704
err:
2705
return rc;
2706
}
2707
2708
/*
2709
* goya_hw_init - Goya hardware initialization code
2710
*
2711
* @hdev: pointer to hl_device structure
2712
*
2713
* Returns 0 on success
2714
*
2715
*/
2716
static int goya_hw_init(struct hl_device *hdev)
2717
{
2718
struct asic_fixed_properties *prop = &hdev->asic_prop;
2719
int rc;
2720
2721
/* Perform read from the device to make sure device is up */
2722
RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2723
2724
/*
2725
* Let's mark in the H/W that we have reached this point. We check
2726
* this value in the reset_before_init function to understand whether
2727
* we need to reset the chip before doing H/W init. This register is
2728
* cleared by the H/W upon H/W reset
2729
*/
2730
WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2731
2732
rc = goya_init_cpu(hdev);
2733
if (rc) {
2734
dev_err(hdev->dev, "failed to initialize CPU\n");
2735
return rc;
2736
}
2737
2738
goya_tpc_mbist_workaround(hdev);
2739
2740
goya_init_golden_registers(hdev);
2741
2742
/*
2743
* After CPU initialization is finished, change DDR bar mapping inside
2744
* iATU to point to the start address of the MMU page tables
2745
*/
2746
if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
2747
~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
2748
dev_err(hdev->dev,
2749
"failed to map DDR bar to MMU page tables\n");
2750
return -EIO;
2751
}
2752
2753
rc = goya_mmu_init(hdev);
2754
if (rc)
2755
return rc;
2756
2757
goya_init_security(hdev);
2758
2759
goya_init_dma_qmans(hdev);
2760
2761
goya_init_mme_qmans(hdev);
2762
2763
goya_init_tpc_qmans(hdev);
2764
2765
goya_enable_timestamp(hdev);
2766
2767
/* MSI-X must be enabled before CPU queues are initialized */
2768
rc = goya_enable_msix(hdev);
2769
if (rc)
2770
goto disable_queues;
2771
2772
/* Perform read from the device to flush all MSI-X configuration */
2773
RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2774
2775
return 0;
2776
2777
disable_queues:
2778
goya_disable_internal_queues(hdev);
2779
goya_disable_external_queues(hdev);
2780
2781
return rc;
2782
}
2783
2784
static int goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
2785
{
2786
struct goya_device *goya = hdev->asic_specific;
2787
u32 reset_timeout_ms, cpu_timeout_ms, status;
2788
2789
if (hdev->pldm) {
2790
reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2791
cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2792
} else {
2793
reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2794
cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2795
}
2796
2797
if (hard_reset) {
2798
/* I don't know what is the state of the CPU so make sure it is
2799
* stopped in any means necessary
2800
*/
2801
WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2802
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2803
GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2804
2805
msleep(cpu_timeout_ms);
2806
2807
goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2808
goya_disable_clk_rlx(hdev);
2809
goya_set_pll_refclk(hdev);
2810
2811
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2812
dev_dbg(hdev->dev,
2813
"Issued HARD reset command, going to wait %dms\n",
2814
reset_timeout_ms);
2815
} else {
2816
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2817
dev_dbg(hdev->dev,
2818
"Issued SOFT reset command, going to wait %dms\n",
2819
reset_timeout_ms);
2820
}
2821
2822
/*
2823
* After hard reset, we can't poll the BTM_FSM register because the PSOC
2824
* itself is in reset. In either reset we need to wait until the reset
2825
* is deasserted
2826
*/
2827
msleep(reset_timeout_ms);
2828
2829
status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2830
if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
2831
dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
2832
return -ETIMEDOUT;
2833
}
2834
2835
if (!hard_reset && goya) {
2836
goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2837
HW_CAP_GOLDEN | HW_CAP_TPC);
2838
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2839
GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2840
return 0;
2841
}
2842
2843
/* Chicken bit to re-initiate boot sequencer flow */
2844
WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2845
1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2846
/* Move boot manager FSM to pre boot sequencer init state */
2847
WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2848
0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2849
2850
if (goya) {
2851
goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2852
HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2853
HW_CAP_DMA | HW_CAP_MME |
2854
HW_CAP_MMU | HW_CAP_TPC_MBIST |
2855
HW_CAP_GOLDEN | HW_CAP_TPC);
2856
2857
memset(goya->events_stat, 0, sizeof(goya->events_stat));
2858
}
2859
return 0;
2860
}
2861
2862
int goya_suspend(struct hl_device *hdev)
2863
{
2864
return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
2865
}
2866
2867
int goya_resume(struct hl_device *hdev)
2868
{
2869
return goya_init_iatu(hdev);
2870
}
2871
2872
static int goya_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2873
void *cpu_addr, dma_addr_t dma_addr, size_t size)
2874
{
2875
int rc;
2876
2877
vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2878
VM_DONTCOPY | VM_NORESERVE);
2879
2880
rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
2881
(dma_addr - HOST_PHYS_BASE), size);
2882
if (rc)
2883
dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
2884
2885
return rc;
2886
}
2887
2888
void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2889
{
2890
u32 db_reg_offset, db_value;
2891
2892
switch (hw_queue_id) {
2893
case GOYA_QUEUE_ID_DMA_0:
2894
db_reg_offset = mmDMA_QM_0_PQ_PI;
2895
break;
2896
2897
case GOYA_QUEUE_ID_DMA_1:
2898
db_reg_offset = mmDMA_QM_1_PQ_PI;
2899
break;
2900
2901
case GOYA_QUEUE_ID_DMA_2:
2902
db_reg_offset = mmDMA_QM_2_PQ_PI;
2903
break;
2904
2905
case GOYA_QUEUE_ID_DMA_3:
2906
db_reg_offset = mmDMA_QM_3_PQ_PI;
2907
break;
2908
2909
case GOYA_QUEUE_ID_DMA_4:
2910
db_reg_offset = mmDMA_QM_4_PQ_PI;
2911
break;
2912
2913
case GOYA_QUEUE_ID_CPU_PQ:
2914
db_reg_offset = mmCPU_IF_PF_PQ_PI;
2915
break;
2916
2917
case GOYA_QUEUE_ID_MME:
2918
db_reg_offset = mmMME_QM_PQ_PI;
2919
break;
2920
2921
case GOYA_QUEUE_ID_TPC0:
2922
db_reg_offset = mmTPC0_QM_PQ_PI;
2923
break;
2924
2925
case GOYA_QUEUE_ID_TPC1:
2926
db_reg_offset = mmTPC1_QM_PQ_PI;
2927
break;
2928
2929
case GOYA_QUEUE_ID_TPC2:
2930
db_reg_offset = mmTPC2_QM_PQ_PI;
2931
break;
2932
2933
case GOYA_QUEUE_ID_TPC3:
2934
db_reg_offset = mmTPC3_QM_PQ_PI;
2935
break;
2936
2937
case GOYA_QUEUE_ID_TPC4:
2938
db_reg_offset = mmTPC4_QM_PQ_PI;
2939
break;
2940
2941
case GOYA_QUEUE_ID_TPC5:
2942
db_reg_offset = mmTPC5_QM_PQ_PI;
2943
break;
2944
2945
case GOYA_QUEUE_ID_TPC6:
2946
db_reg_offset = mmTPC6_QM_PQ_PI;
2947
break;
2948
2949
case GOYA_QUEUE_ID_TPC7:
2950
db_reg_offset = mmTPC7_QM_PQ_PI;
2951
break;
2952
2953
default:
2954
/* Should never get here */
2955
dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
2956
hw_queue_id);
2957
return;
2958
}
2959
2960
db_value = pi;
2961
2962
/* ring the doorbell */
2963
WREG32(db_reg_offset, db_value);
2964
2965
if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) {
2966
/* make sure device CPU will read latest data from host */
2967
mb();
2968
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2969
GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2970
}
2971
}
2972
2973
void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
2974
{
2975
/* The QMANs are on the SRAM so need to copy to IO space */
2976
memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
2977
}
2978
2979
static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
2980
dma_addr_t *dma_handle, gfp_t flags)
2981
{
2982
void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
2983
dma_handle, flags);
2984
2985
/* Shift to the device's base physical address of host memory */
2986
if (kernel_addr)
2987
*dma_handle += HOST_PHYS_BASE;
2988
2989
return kernel_addr;
2990
}
2991
2992
static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
2993
void *cpu_addr, dma_addr_t dma_handle)
2994
{
2995
/* Cancel the device's base physical address of host memory */
2996
dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
2997
2998
dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
2999
}
3000
3001
int goya_scrub_device_mem(struct hl_device *hdev)
3002
{
3003
return 0;
3004
}
3005
3006
void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
3007
dma_addr_t *dma_handle, u16 *queue_len)
3008
{
3009
void *base;
3010
u32 offset;
3011
3012
*dma_handle = hdev->asic_prop.sram_base_address;
3013
3014
base = (__force void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
3015
3016
switch (queue_id) {
3017
case GOYA_QUEUE_ID_MME:
3018
offset = MME_QMAN_BASE_OFFSET;
3019
*queue_len = MME_QMAN_LENGTH;
3020
break;
3021
case GOYA_QUEUE_ID_TPC0:
3022
offset = TPC0_QMAN_BASE_OFFSET;
3023
*queue_len = TPC_QMAN_LENGTH;
3024
break;
3025
case GOYA_QUEUE_ID_TPC1:
3026
offset = TPC1_QMAN_BASE_OFFSET;
3027
*queue_len = TPC_QMAN_LENGTH;
3028
break;
3029
case GOYA_QUEUE_ID_TPC2:
3030
offset = TPC2_QMAN_BASE_OFFSET;
3031
*queue_len = TPC_QMAN_LENGTH;
3032
break;
3033
case GOYA_QUEUE_ID_TPC3:
3034
offset = TPC3_QMAN_BASE_OFFSET;
3035
*queue_len = TPC_QMAN_LENGTH;
3036
break;
3037
case GOYA_QUEUE_ID_TPC4:
3038
offset = TPC4_QMAN_BASE_OFFSET;
3039
*queue_len = TPC_QMAN_LENGTH;
3040
break;
3041
case GOYA_QUEUE_ID_TPC5:
3042
offset = TPC5_QMAN_BASE_OFFSET;
3043
*queue_len = TPC_QMAN_LENGTH;
3044
break;
3045
case GOYA_QUEUE_ID_TPC6:
3046
offset = TPC6_QMAN_BASE_OFFSET;
3047
*queue_len = TPC_QMAN_LENGTH;
3048
break;
3049
case GOYA_QUEUE_ID_TPC7:
3050
offset = TPC7_QMAN_BASE_OFFSET;
3051
*queue_len = TPC_QMAN_LENGTH;
3052
break;
3053
default:
3054
dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3055
return NULL;
3056
}
3057
3058
base += offset;
3059
*dma_handle += offset;
3060
3061
return base;
3062
}
3063
3064
static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
3065
{
3066
struct packet_msg_prot *fence_pkt;
3067
u32 *fence_ptr;
3068
dma_addr_t fence_dma_addr;
3069
struct hl_cb *cb;
3070
u32 tmp, timeout;
3071
int rc;
3072
3073
if (hdev->pldm)
3074
timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
3075
else
3076
timeout = HL_DEVICE_TIMEOUT_USEC;
3077
3078
if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
3079
dev_err_ratelimited(hdev->dev,
3080
"Can't send driver job on QMAN0 because the device is not idle\n");
3081
return -EBUSY;
3082
}
3083
3084
fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
3085
if (!fence_ptr) {
3086
dev_err(hdev->dev,
3087
"Failed to allocate fence memory for QMAN0\n");
3088
return -ENOMEM;
3089
}
3090
3091
goya_qman0_set_security(hdev, true);
3092
3093
cb = job->patched_cb;
3094
3095
fence_pkt = cb->kernel_address +
3096
job->job_cb_size - sizeof(struct packet_msg_prot);
3097
3098
tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3099
(1 << GOYA_PKT_CTL_EB_SHIFT) |
3100
(1 << GOYA_PKT_CTL_MB_SHIFT);
3101
fence_pkt->ctl = cpu_to_le32(tmp);
3102
fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
3103
fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3104
3105
rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
3106
job->job_cb_size, cb->bus_address);
3107
if (rc) {
3108
dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
3109
goto free_fence_ptr;
3110
}
3111
3112
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
3113
(tmp == GOYA_QMAN0_FENCE_VAL), 1000,
3114
timeout, true);
3115
3116
hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
3117
3118
if (rc == -ETIMEDOUT) {
3119
dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
3120
goto free_fence_ptr;
3121
}
3122
3123
free_fence_ptr:
3124
hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
3125
3126
goya_qman0_set_security(hdev, false);
3127
3128
return rc;
3129
}
3130
3131
int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
3132
u32 timeout, u64 *result)
3133
{
3134
struct goya_device *goya = hdev->asic_specific;
3135
3136
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
3137
if (result)
3138
*result = 0;
3139
return 0;
3140
}
3141
3142
if (!timeout)
3143
timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
3144
3145
return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
3146
timeout, result);
3147
}
3148
3149
int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3150
{
3151
struct packet_msg_prot *fence_pkt;
3152
dma_addr_t pkt_dma_addr;
3153
u32 fence_val, tmp;
3154
dma_addr_t fence_dma_addr;
3155
u32 *fence_ptr;
3156
int rc;
3157
3158
fence_val = GOYA_QMAN0_FENCE_VAL;
3159
3160
fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
3161
if (!fence_ptr) {
3162
dev_err(hdev->dev,
3163
"Failed to allocate memory for H/W queue %d testing\n",
3164
hw_queue_id);
3165
return -ENOMEM;
3166
}
3167
3168
*fence_ptr = 0;
3169
3170
fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
3171
&pkt_dma_addr);
3172
if (!fence_pkt) {
3173
dev_err(hdev->dev,
3174
"Failed to allocate packet for H/W queue %d testing\n",
3175
hw_queue_id);
3176
rc = -ENOMEM;
3177
goto free_fence_ptr;
3178
}
3179
3180
tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3181
(1 << GOYA_PKT_CTL_EB_SHIFT) |
3182
(1 << GOYA_PKT_CTL_MB_SHIFT);
3183
fence_pkt->ctl = cpu_to_le32(tmp);
3184
fence_pkt->value = cpu_to_le32(fence_val);
3185
fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3186
3187
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3188
sizeof(struct packet_msg_prot),
3189
pkt_dma_addr);
3190
if (rc) {
3191
dev_err(hdev->dev,
3192
"Failed to send fence packet to H/W queue %d\n",
3193
hw_queue_id);
3194
goto free_pkt;
3195
}
3196
3197
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3198
1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
3199
3200
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3201
3202
if (rc == -ETIMEDOUT) {
3203
dev_err(hdev->dev,
3204
"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3205
hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3206
rc = -EIO;
3207
}
3208
3209
free_pkt:
3210
hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
3211
free_fence_ptr:
3212
hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
3213
return rc;
3214
}
3215
3216
int goya_test_cpu_queue(struct hl_device *hdev)
3217
{
3218
struct goya_device *goya = hdev->asic_specific;
3219
3220
/*
3221
* check capability here as send_cpu_message() won't update the result
3222
* value if no capability
3223
*/
3224
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
3225
return 0;
3226
3227
return hl_fw_test_cpu_queue(hdev);
3228
}
3229
3230
int goya_test_queues(struct hl_device *hdev)
3231
{
3232
int i, rc, ret_val = 0;
3233
3234
for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3235
rc = goya_test_queue(hdev, i);
3236
if (rc)
3237
ret_val = -EINVAL;
3238
}
3239
3240
return ret_val;
3241
}
3242
3243
static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3244
gfp_t mem_flags, dma_addr_t *dma_handle)
3245
{
3246
void *kernel_addr;
3247
3248
if (size > GOYA_DMA_POOL_BLK_SIZE)
3249
return NULL;
3250
3251
kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3252
3253
/* Shift to the device's base physical address of host memory */
3254
if (kernel_addr)
3255
*dma_handle += HOST_PHYS_BASE;
3256
3257
return kernel_addr;
3258
}
3259
3260
static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3261
dma_addr_t dma_addr)
3262
{
3263
/* Cancel the device's base physical address of host memory */
3264
dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3265
3266
dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3267
}
3268
3269
void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3270
dma_addr_t *dma_handle)
3271
{
3272
void *vaddr;
3273
3274
vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3275
*dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
3276
VA_CPU_ACCESSIBLE_MEM_ADDR;
3277
3278
return vaddr;
3279
}
3280
3281
void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3282
void *vaddr)
3283
{
3284
hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3285
}
3286
3287
u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3288
{
3289
struct scatterlist *sg, *sg_next_iter;
3290
u32 count, dma_desc_cnt;
3291
u64 len, len_next;
3292
dma_addr_t addr, addr_next;
3293
3294
dma_desc_cnt = 0;
3295
3296
for_each_sgtable_dma_sg(sgt, sg, count) {
3297
len = sg_dma_len(sg);
3298
addr = sg_dma_address(sg);
3299
3300
if (len == 0)
3301
break;
3302
3303
while ((count + 1) < sgt->nents) {
3304
sg_next_iter = sg_next(sg);
3305
len_next = sg_dma_len(sg_next_iter);
3306
addr_next = sg_dma_address(sg_next_iter);
3307
3308
if (len_next == 0)
3309
break;
3310
3311
if ((addr + len == addr_next) &&
3312
(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3313
len += len_next;
3314
count++;
3315
sg = sg_next_iter;
3316
} else {
3317
break;
3318
}
3319
}
3320
3321
dma_desc_cnt++;
3322
}
3323
3324
return dma_desc_cnt * sizeof(struct packet_lin_dma);
3325
}
3326
3327
static int goya_pin_memory_before_cs(struct hl_device *hdev,
3328
struct hl_cs_parser *parser,
3329
struct packet_lin_dma *user_dma_pkt,
3330
u64 addr, enum dma_data_direction dir)
3331
{
3332
struct hl_userptr *userptr;
3333
int rc;
3334
3335
if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3336
parser->job_userptr_list, &userptr))
3337
goto already_pinned;
3338
3339
userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
3340
if (!userptr)
3341
return -ENOMEM;
3342
3343
rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3344
userptr);
3345
if (rc)
3346
goto free_userptr;
3347
3348
list_add_tail(&userptr->job_node, parser->job_userptr_list);
3349
3350
rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
3351
if (rc) {
3352
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3353
goto unpin_memory;
3354
}
3355
3356
userptr->dma_mapped = true;
3357
userptr->dir = dir;
3358
3359
already_pinned:
3360
parser->patched_cb_size +=
3361
goya_get_dma_desc_list_size(hdev, userptr->sgt);
3362
3363
return 0;
3364
3365
unpin_memory:
3366
list_del(&userptr->job_node);
3367
hl_unpin_host_memory(hdev, userptr);
3368
free_userptr:
3369
kfree(userptr);
3370
return rc;
3371
}
3372
3373
static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3374
struct hl_cs_parser *parser,
3375
struct packet_lin_dma *user_dma_pkt)
3376
{
3377
u64 device_memory_addr, addr;
3378
enum dma_data_direction dir;
3379
enum hl_goya_dma_direction user_dir;
3380
bool sram_addr = true;
3381
bool skip_host_mem_pin = false;
3382
bool user_memset;
3383
u32 ctl;
3384
int rc = 0;
3385
3386
ctl = le32_to_cpu(user_dma_pkt->ctl);
3387
3388
user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3389
GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3390
3391
user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3392
GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3393
3394
switch (user_dir) {
3395
case HL_DMA_HOST_TO_DRAM:
3396
dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3397
dir = DMA_TO_DEVICE;
3398
sram_addr = false;
3399
addr = le64_to_cpu(user_dma_pkt->src_addr);
3400
device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3401
if (user_memset)
3402
skip_host_mem_pin = true;
3403
break;
3404
3405
case HL_DMA_DRAM_TO_HOST:
3406
dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3407
dir = DMA_FROM_DEVICE;
3408
sram_addr = false;
3409
addr = le64_to_cpu(user_dma_pkt->dst_addr);
3410
device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3411
break;
3412
3413
case HL_DMA_HOST_TO_SRAM:
3414
dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3415
dir = DMA_TO_DEVICE;
3416
addr = le64_to_cpu(user_dma_pkt->src_addr);
3417
device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3418
if (user_memset)
3419
skip_host_mem_pin = true;
3420
break;
3421
3422
case HL_DMA_SRAM_TO_HOST:
3423
dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3424
dir = DMA_FROM_DEVICE;
3425
addr = le64_to_cpu(user_dma_pkt->dst_addr);
3426
device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3427
break;
3428
default:
3429
dev_err(hdev->dev, "DMA direction %d is unsupported/undefined\n", user_dir);
3430
return -EFAULT;
3431
}
3432
3433
if (sram_addr) {
3434
if (!hl_mem_area_inside_range(device_memory_addr,
3435
le32_to_cpu(user_dma_pkt->tsize),
3436
hdev->asic_prop.sram_user_base_address,
3437
hdev->asic_prop.sram_end_address)) {
3438
3439
dev_err(hdev->dev,
3440
"SRAM address 0x%llx + 0x%x is invalid\n",
3441
device_memory_addr,
3442
user_dma_pkt->tsize);
3443
return -EFAULT;
3444
}
3445
} else {
3446
if (!hl_mem_area_inside_range(device_memory_addr,
3447
le32_to_cpu(user_dma_pkt->tsize),
3448
hdev->asic_prop.dram_user_base_address,
3449
hdev->asic_prop.dram_end_address)) {
3450
3451
dev_err(hdev->dev,
3452
"DRAM address 0x%llx + 0x%x is invalid\n",
3453
device_memory_addr,
3454
user_dma_pkt->tsize);
3455
return -EFAULT;
3456
}
3457
}
3458
3459
if (skip_host_mem_pin)
3460
parser->patched_cb_size += sizeof(*user_dma_pkt);
3461
else {
3462
if ((dir == DMA_TO_DEVICE) &&
3463
(parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3464
dev_err(hdev->dev,
3465
"Can't DMA from host on queue other then 1\n");
3466
return -EFAULT;
3467
}
3468
3469
rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3470
addr, dir);
3471
}
3472
3473
return rc;
3474
}
3475
3476
static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3477
struct hl_cs_parser *parser,
3478
struct packet_lin_dma *user_dma_pkt)
3479
{
3480
u64 sram_memory_addr, dram_memory_addr;
3481
enum hl_goya_dma_direction user_dir;
3482
u32 ctl;
3483
3484
ctl = le32_to_cpu(user_dma_pkt->ctl);
3485
user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3486
GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3487
3488
if (user_dir == HL_DMA_DRAM_TO_SRAM) {
3489
dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3490
dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3491
sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3492
} else {
3493
dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3494
sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3495
dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3496
}
3497
3498
if (!hl_mem_area_inside_range(sram_memory_addr,
3499
le32_to_cpu(user_dma_pkt->tsize),
3500
hdev->asic_prop.sram_user_base_address,
3501
hdev->asic_prop.sram_end_address)) {
3502
dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3503
sram_memory_addr, user_dma_pkt->tsize);
3504
return -EFAULT;
3505
}
3506
3507
if (!hl_mem_area_inside_range(dram_memory_addr,
3508
le32_to_cpu(user_dma_pkt->tsize),
3509
hdev->asic_prop.dram_user_base_address,
3510
hdev->asic_prop.dram_end_address)) {
3511
dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3512
dram_memory_addr, user_dma_pkt->tsize);
3513
return -EFAULT;
3514
}
3515
3516
parser->patched_cb_size += sizeof(*user_dma_pkt);
3517
3518
return 0;
3519
}
3520
3521
static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3522
struct hl_cs_parser *parser,
3523
struct packet_lin_dma *user_dma_pkt)
3524
{
3525
enum hl_goya_dma_direction user_dir;
3526
u32 ctl;
3527
int rc;
3528
3529
dev_dbg(hdev->dev, "DMA packet details:\n");
3530
dev_dbg(hdev->dev, "source == 0x%llx\n",
3531
le64_to_cpu(user_dma_pkt->src_addr));
3532
dev_dbg(hdev->dev, "destination == 0x%llx\n",
3533
le64_to_cpu(user_dma_pkt->dst_addr));
3534
dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3535
3536
ctl = le32_to_cpu(user_dma_pkt->ctl);
3537
user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3538
GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3539
3540
/*
3541
* Special handling for DMA with size 0. The H/W has a bug where
3542
* this can cause the QMAN DMA to get stuck, so block it here.
3543
*/
3544
if (user_dma_pkt->tsize == 0) {
3545
dev_err(hdev->dev,
3546
"Got DMA with size 0, might reset the device\n");
3547
return -EINVAL;
3548
}
3549
3550
if ((user_dir == HL_DMA_DRAM_TO_SRAM) || (user_dir == HL_DMA_SRAM_TO_DRAM))
3551
rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3552
else
3553
rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3554
3555
return rc;
3556
}
3557
3558
static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3559
struct hl_cs_parser *parser,
3560
struct packet_lin_dma *user_dma_pkt)
3561
{
3562
dev_dbg(hdev->dev, "DMA packet details:\n");
3563
dev_dbg(hdev->dev, "source == 0x%llx\n",
3564
le64_to_cpu(user_dma_pkt->src_addr));
3565
dev_dbg(hdev->dev, "destination == 0x%llx\n",
3566
le64_to_cpu(user_dma_pkt->dst_addr));
3567
dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3568
3569
/*
3570
* WA for HW-23.
3571
* We can't allow user to read from Host using QMANs other than 1.
3572
* PMMU and HPMMU addresses are equal, check only one of them.
3573
*/
3574
if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
3575
hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3576
le32_to_cpu(user_dma_pkt->tsize),
3577
hdev->asic_prop.pmmu.start_addr,
3578
hdev->asic_prop.pmmu.end_addr)) {
3579
dev_err(hdev->dev,
3580
"Can't DMA from host on queue other then 1\n");
3581
return -EFAULT;
3582
}
3583
3584
if (user_dma_pkt->tsize == 0) {
3585
dev_err(hdev->dev,
3586
"Got DMA with size 0, might reset the device\n");
3587
return -EINVAL;
3588
}
3589
3590
parser->patched_cb_size += sizeof(*user_dma_pkt);
3591
3592
return 0;
3593
}
3594
3595
static int goya_validate_wreg32(struct hl_device *hdev,
3596
struct hl_cs_parser *parser,
3597
struct packet_wreg32 *wreg_pkt)
3598
{
3599
struct goya_device *goya = hdev->asic_specific;
3600
u32 sob_start_addr, sob_end_addr;
3601
u16 reg_offset;
3602
3603
reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3604
GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3605
3606
dev_dbg(hdev->dev, "WREG32 packet details:\n");
3607
dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3608
dev_dbg(hdev->dev, "value == 0x%x\n",
3609
le32_to_cpu(wreg_pkt->value));
3610
3611
if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
3612
dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3613
reg_offset);
3614
return -EPERM;
3615
}
3616
3617
/*
3618
* With MMU, DMA channels are not secured, so it doesn't matter where
3619
* the WR COMP will be written to because it will go out with
3620
* non-secured property
3621
*/
3622
if (goya->hw_cap_initialized & HW_CAP_MMU)
3623
return 0;
3624
3625
sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3626
sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3627
3628
if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3629
(le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
3630
3631
dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3632
wreg_pkt->value);
3633
return -EPERM;
3634
}
3635
3636
return 0;
3637
}
3638
3639
static int goya_validate_cb(struct hl_device *hdev,
3640
struct hl_cs_parser *parser, bool is_mmu)
3641
{
3642
u32 cb_parsed_length = 0;
3643
int rc = 0;
3644
3645
parser->patched_cb_size = 0;
3646
3647
/* cb_user_size is more than 0 so loop will always be executed */
3648
while (cb_parsed_length < parser->user_cb_size) {
3649
enum packet_id pkt_id;
3650
u16 pkt_size;
3651
struct goya_packet *user_pkt;
3652
3653
user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3654
3655
pkt_id = (enum packet_id) (
3656
(le64_to_cpu(user_pkt->header) &
3657
PACKET_HEADER_PACKET_ID_MASK) >>
3658
PACKET_HEADER_PACKET_ID_SHIFT);
3659
3660
if (!validate_packet_id(pkt_id)) {
3661
dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3662
rc = -EINVAL;
3663
break;
3664
}
3665
3666
pkt_size = goya_packet_sizes[pkt_id];
3667
cb_parsed_length += pkt_size;
3668
if (cb_parsed_length > parser->user_cb_size) {
3669
dev_err(hdev->dev,
3670
"packet 0x%x is out of CB boundary\n", pkt_id);
3671
rc = -EINVAL;
3672
break;
3673
}
3674
3675
switch (pkt_id) {
3676
case PACKET_WREG_32:
3677
/*
3678
* Although it is validated after copy in patch_cb(),
3679
* need to validate here as well because patch_cb() is
3680
* not called in MMU path while this function is called
3681
*/
3682
rc = goya_validate_wreg32(hdev,
3683
parser, (struct packet_wreg32 *) user_pkt);
3684
parser->patched_cb_size += pkt_size;
3685
break;
3686
3687
case PACKET_WREG_BULK:
3688
dev_err(hdev->dev,
3689
"User not allowed to use WREG_BULK\n");
3690
rc = -EPERM;
3691
break;
3692
3693
case PACKET_MSG_PROT:
3694
dev_err(hdev->dev,
3695
"User not allowed to use MSG_PROT\n");
3696
rc = -EPERM;
3697
break;
3698
3699
case PACKET_CP_DMA:
3700
dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3701
rc = -EPERM;
3702
break;
3703
3704
case PACKET_STOP:
3705
dev_err(hdev->dev, "User not allowed to use STOP\n");
3706
rc = -EPERM;
3707
break;
3708
3709
case PACKET_LIN_DMA:
3710
if (is_mmu)
3711
rc = goya_validate_dma_pkt_mmu(hdev, parser,
3712
(struct packet_lin_dma *) user_pkt);
3713
else
3714
rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3715
(struct packet_lin_dma *) user_pkt);
3716
break;
3717
3718
case PACKET_MSG_LONG:
3719
case PACKET_MSG_SHORT:
3720
case PACKET_FENCE:
3721
case PACKET_NOP:
3722
parser->patched_cb_size += pkt_size;
3723
break;
3724
3725
default:
3726
dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3727
pkt_id);
3728
rc = -EINVAL;
3729
break;
3730
}
3731
3732
if (rc)
3733
break;
3734
}
3735
3736
/*
3737
* The new CB should have space at the end for two MSG_PROT packets:
3738
* 1. A packet that will act as a completion packet
3739
* 2. A packet that will generate MSI-X interrupt
3740
*/
3741
parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3742
3743
return rc;
3744
}
3745
3746
static int goya_patch_dma_packet(struct hl_device *hdev,
3747
struct hl_cs_parser *parser,
3748
struct packet_lin_dma *user_dma_pkt,
3749
struct packet_lin_dma *new_dma_pkt,
3750
u32 *new_dma_pkt_size)
3751
{
3752
struct hl_userptr *userptr;
3753
struct scatterlist *sg, *sg_next_iter;
3754
u32 count, dma_desc_cnt;
3755
u64 len, len_next;
3756
dma_addr_t dma_addr, dma_addr_next;
3757
enum hl_goya_dma_direction user_dir;
3758
u64 device_memory_addr, addr;
3759
enum dma_data_direction dir;
3760
struct sg_table *sgt;
3761
bool skip_host_mem_pin = false;
3762
bool user_memset;
3763
u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
3764
3765
ctl = le32_to_cpu(user_dma_pkt->ctl);
3766
3767
user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3768
GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3769
3770
user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3771
GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3772
3773
if ((user_dir == HL_DMA_DRAM_TO_SRAM) || (user_dir == HL_DMA_SRAM_TO_DRAM) ||
3774
(user_dma_pkt->tsize == 0)) {
3775
memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3776
*new_dma_pkt_size = sizeof(*new_dma_pkt);
3777
return 0;
3778
}
3779
3780
if ((user_dir == HL_DMA_HOST_TO_DRAM) || (user_dir == HL_DMA_HOST_TO_SRAM)) {
3781
addr = le64_to_cpu(user_dma_pkt->src_addr);
3782
device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3783
dir = DMA_TO_DEVICE;
3784
if (user_memset)
3785
skip_host_mem_pin = true;
3786
} else {
3787
addr = le64_to_cpu(user_dma_pkt->dst_addr);
3788
device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3789
dir = DMA_FROM_DEVICE;
3790
}
3791
3792
if ((!skip_host_mem_pin) &&
3793
(hl_userptr_is_pinned(hdev, addr,
3794
le32_to_cpu(user_dma_pkt->tsize),
3795
parser->job_userptr_list, &userptr) == false)) {
3796
dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3797
addr, user_dma_pkt->tsize);
3798
return -EFAULT;
3799
}
3800
3801
if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3802
memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3803
*new_dma_pkt_size = sizeof(*user_dma_pkt);
3804
return 0;
3805
}
3806
3807
user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
3808
3809
user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
3810
3811
sgt = userptr->sgt;
3812
dma_desc_cnt = 0;
3813
3814
for_each_sgtable_dma_sg(sgt, sg, count) {
3815
len = sg_dma_len(sg);
3816
dma_addr = sg_dma_address(sg);
3817
3818
if (len == 0)
3819
break;
3820
3821
while ((count + 1) < sgt->nents) {
3822
sg_next_iter = sg_next(sg);
3823
len_next = sg_dma_len(sg_next_iter);
3824
dma_addr_next = sg_dma_address(sg_next_iter);
3825
3826
if (len_next == 0)
3827
break;
3828
3829
if ((dma_addr + len == dma_addr_next) &&
3830
(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3831
len += len_next;
3832
count++;
3833
sg = sg_next_iter;
3834
} else {
3835
break;
3836
}
3837
}
3838
3839
ctl = le32_to_cpu(user_dma_pkt->ctl);
3840
if (likely(dma_desc_cnt))
3841
ctl &= ~GOYA_PKT_CTL_EB_MASK;
3842
ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3843
GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3844
new_dma_pkt->ctl = cpu_to_le32(ctl);
3845
new_dma_pkt->tsize = cpu_to_le32((u32) len);
3846
3847
if (dir == DMA_TO_DEVICE) {
3848
new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3849
new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3850
} else {
3851
new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3852
new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3853
}
3854
3855
if (!user_memset)
3856
device_memory_addr += len;
3857
dma_desc_cnt++;
3858
new_dma_pkt++;
3859
}
3860
3861
if (!dma_desc_cnt) {
3862
dev_err(hdev->dev,
3863
"Error of 0 SG entries when patching DMA packet\n");
3864
return -EFAULT;
3865
}
3866
3867
/* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3868
new_dma_pkt--;
3869
new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
3870
3871
*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3872
3873
return 0;
3874
}
3875
3876
static int goya_patch_cb(struct hl_device *hdev,
3877
struct hl_cs_parser *parser)
3878
{
3879
u32 cb_parsed_length = 0;
3880
u32 cb_patched_cur_length = 0;
3881
int rc = 0;
3882
3883
/* cb_user_size is more than 0 so loop will always be executed */
3884
while (cb_parsed_length < parser->user_cb_size) {
3885
enum packet_id pkt_id;
3886
u16 pkt_size;
3887
u32 new_pkt_size = 0;
3888
struct goya_packet *user_pkt, *kernel_pkt;
3889
3890
user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3891
kernel_pkt = parser->patched_cb->kernel_address +
3892
cb_patched_cur_length;
3893
3894
pkt_id = (enum packet_id) (
3895
(le64_to_cpu(user_pkt->header) &
3896
PACKET_HEADER_PACKET_ID_MASK) >>
3897
PACKET_HEADER_PACKET_ID_SHIFT);
3898
3899
if (!validate_packet_id(pkt_id)) {
3900
dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3901
rc = -EINVAL;
3902
break;
3903
}
3904
3905
pkt_size = goya_packet_sizes[pkt_id];
3906
cb_parsed_length += pkt_size;
3907
if (cb_parsed_length > parser->user_cb_size) {
3908
dev_err(hdev->dev,
3909
"packet 0x%x is out of CB boundary\n", pkt_id);
3910
rc = -EINVAL;
3911
break;
3912
}
3913
3914
switch (pkt_id) {
3915
case PACKET_LIN_DMA:
3916
rc = goya_patch_dma_packet(hdev, parser,
3917
(struct packet_lin_dma *) user_pkt,
3918
(struct packet_lin_dma *) kernel_pkt,
3919
&new_pkt_size);
3920
cb_patched_cur_length += new_pkt_size;
3921
break;
3922
3923
case PACKET_WREG_32:
3924
memcpy(kernel_pkt, user_pkt, pkt_size);
3925
cb_patched_cur_length += pkt_size;
3926
rc = goya_validate_wreg32(hdev, parser,
3927
(struct packet_wreg32 *) kernel_pkt);
3928
break;
3929
3930
case PACKET_WREG_BULK:
3931
dev_err(hdev->dev,
3932
"User not allowed to use WREG_BULK\n");
3933
rc = -EPERM;
3934
break;
3935
3936
case PACKET_MSG_PROT:
3937
dev_err(hdev->dev,
3938
"User not allowed to use MSG_PROT\n");
3939
rc = -EPERM;
3940
break;
3941
3942
case PACKET_CP_DMA:
3943
dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3944
rc = -EPERM;
3945
break;
3946
3947
case PACKET_STOP:
3948
dev_err(hdev->dev, "User not allowed to use STOP\n");
3949
rc = -EPERM;
3950
break;
3951
3952
case PACKET_MSG_LONG:
3953
case PACKET_MSG_SHORT:
3954
case PACKET_FENCE:
3955
case PACKET_NOP:
3956
memcpy(kernel_pkt, user_pkt, pkt_size);
3957
cb_patched_cur_length += pkt_size;
3958
break;
3959
3960
default:
3961
dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3962
pkt_id);
3963
rc = -EINVAL;
3964
break;
3965
}
3966
3967
if (rc)
3968
break;
3969
}
3970
3971
return rc;
3972
}
3973
3974
static int goya_parse_cb_mmu(struct hl_device *hdev,
3975
struct hl_cs_parser *parser)
3976
{
3977
u64 handle;
3978
u32 patched_cb_size;
3979
struct hl_cb *user_cb;
3980
int rc;
3981
3982
/*
3983
* The new CB should have space at the end for two MSG_PROT pkt:
3984
* 1. A packet that will act as a completion packet
3985
* 2. A packet that will generate MSI-X interrupt
3986
*/
3987
parser->patched_cb_size = parser->user_cb_size +
3988
sizeof(struct packet_msg_prot) * 2;
3989
3990
rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
3991
parser->patched_cb_size, false, false,
3992
&handle);
3993
3994
if (rc) {
3995
dev_err(hdev->dev,
3996
"Failed to allocate patched CB for DMA CS %d\n",
3997
rc);
3998
return rc;
3999
}
4000
4001
parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
4002
/* hl_cb_get should never fail here */
4003
if (!parser->patched_cb) {
4004
dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
4005
rc = -EFAULT;
4006
goto out;
4007
}
4008
4009
/*
4010
* The check that parser->user_cb_size <= parser->user_cb->size was done
4011
* in validate_queue_index().
4012
*/
4013
memcpy(parser->patched_cb->kernel_address,
4014
parser->user_cb->kernel_address,
4015
parser->user_cb_size);
4016
4017
patched_cb_size = parser->patched_cb_size;
4018
4019
/* validate patched CB instead of user CB */
4020
user_cb = parser->user_cb;
4021
parser->user_cb = parser->patched_cb;
4022
rc = goya_validate_cb(hdev, parser, true);
4023
parser->user_cb = user_cb;
4024
4025
if (rc) {
4026
hl_cb_put(parser->patched_cb);
4027
goto out;
4028
}
4029
4030
if (patched_cb_size != parser->patched_cb_size) {
4031
dev_err(hdev->dev, "user CB size mismatch\n");
4032
hl_cb_put(parser->patched_cb);
4033
rc = -EINVAL;
4034
goto out;
4035
}
4036
4037
out:
4038
/*
4039
* Always call cb destroy here because we still have 1 reference
4040
* to it by calling cb_get earlier. After the job will be completed,
4041
* cb_put will release it, but here we want to remove it from the
4042
* idr
4043
*/
4044
hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
4045
4046
return rc;
4047
}
4048
4049
static int goya_parse_cb_no_mmu(struct hl_device *hdev,
4050
struct hl_cs_parser *parser)
4051
{
4052
u64 handle;
4053
int rc;
4054
4055
rc = goya_validate_cb(hdev, parser, false);
4056
4057
if (rc)
4058
goto free_userptr;
4059
4060
rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
4061
parser->patched_cb_size, false, false,
4062
&handle);
4063
if (rc) {
4064
dev_err(hdev->dev,
4065
"Failed to allocate patched CB for DMA CS %d\n", rc);
4066
goto free_userptr;
4067
}
4068
4069
parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
4070
/* hl_cb_get should never fail here */
4071
if (!parser->patched_cb) {
4072
dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
4073
rc = -EFAULT;
4074
goto out;
4075
}
4076
4077
rc = goya_patch_cb(hdev, parser);
4078
4079
if (rc)
4080
hl_cb_put(parser->patched_cb);
4081
4082
out:
4083
/*
4084
* Always call cb destroy here because we still have 1 reference
4085
* to it by calling cb_get earlier. After the job will be completed,
4086
* cb_put will release it, but here we want to remove it from the
4087
* idr
4088
*/
4089
hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
4090
4091
free_userptr:
4092
if (rc)
4093
hl_userptr_delete_list(hdev, parser->job_userptr_list);
4094
return rc;
4095
}
4096
4097
static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
4098
struct hl_cs_parser *parser)
4099
{
4100
struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4101
struct goya_device *goya = hdev->asic_specific;
4102
4103
if (goya->hw_cap_initialized & HW_CAP_MMU)
4104
return 0;
4105
4106
/* For internal queue jobs, just check if CB address is valid */
4107
if (hl_mem_area_inside_range(
4108
(u64) (uintptr_t) parser->user_cb,
4109
parser->user_cb_size,
4110
asic_prop->sram_user_base_address,
4111
asic_prop->sram_end_address))
4112
return 0;
4113
4114
if (hl_mem_area_inside_range(
4115
(u64) (uintptr_t) parser->user_cb,
4116
parser->user_cb_size,
4117
asic_prop->dram_user_base_address,
4118
asic_prop->dram_end_address))
4119
return 0;
4120
4121
dev_err(hdev->dev,
4122
"Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
4123
parser->user_cb, parser->user_cb_size);
4124
4125
return -EFAULT;
4126
}
4127
4128
int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4129
{
4130
struct goya_device *goya = hdev->asic_specific;
4131
4132
if (parser->queue_type == QUEUE_TYPE_INT)
4133
return goya_parse_cb_no_ext_queue(hdev, parser);
4134
4135
if (goya->hw_cap_initialized & HW_CAP_MMU)
4136
return goya_parse_cb_mmu(hdev, parser);
4137
else
4138
return goya_parse_cb_no_mmu(hdev, parser);
4139
}
4140
4141
void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
4142
u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
4143
u32 msix_vec, bool eb)
4144
{
4145
struct packet_msg_prot *cq_pkt;
4146
u32 tmp;
4147
4148
cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
4149
4150
tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4151
(1 << GOYA_PKT_CTL_EB_SHIFT) |
4152
(1 << GOYA_PKT_CTL_MB_SHIFT);
4153
cq_pkt->ctl = cpu_to_le32(tmp);
4154
cq_pkt->value = cpu_to_le32(cq_val);
4155
cq_pkt->addr = cpu_to_le64(cq_addr);
4156
4157
cq_pkt++;
4158
4159
tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4160
(1 << GOYA_PKT_CTL_MB_SHIFT);
4161
cq_pkt->ctl = cpu_to_le32(tmp);
4162
cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
4163
cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
4164
}
4165
4166
void goya_update_eq_ci(struct hl_device *hdev, u32 val)
4167
{
4168
WREG32(mmCPU_EQ_CI, val);
4169
}
4170
4171
void goya_restore_phase_topology(struct hl_device *hdev)
4172
{
4173
4174
}
4175
4176
static void goya_clear_sm_regs(struct hl_device *hdev)
4177
{
4178
int i, num_of_sob_in_longs, num_of_mon_in_longs;
4179
4180
num_of_sob_in_longs =
4181
((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
4182
4183
num_of_mon_in_longs =
4184
((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
4185
4186
for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
4187
WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
4188
4189
for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
4190
WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
4191
4192
/* Flush all WREG to prevent race */
4193
i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
4194
}
4195
4196
static int goya_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
4197
{
4198
dev_err(hdev->dev, "Reading via DMA is unimplemented yet\n");
4199
return -EPERM;
4200
}
4201
4202
static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4203
{
4204
struct goya_device *goya = hdev->asic_specific;
4205
4206
if (hdev->reset_info.hard_reset_pending)
4207
return U64_MAX;
4208
4209
return readq(hdev->pcie_bar[DDR_BAR_ID] +
4210
(addr - goya->ddr_bar_cur_addr));
4211
}
4212
4213
static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4214
{
4215
struct goya_device *goya = hdev->asic_specific;
4216
4217
if (hdev->reset_info.hard_reset_pending)
4218
return;
4219
4220
writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4221
(addr - goya->ddr_bar_cur_addr));
4222
}
4223
4224
static const char *_goya_get_event_desc(u16 event_type)
4225
{
4226
switch (event_type) {
4227
case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4228
return "PCIe_if";
4229
case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4230
case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4231
case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4232
case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4233
case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4234
case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4235
case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4236
case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4237
return "TPC%d_ecc";
4238
case GOYA_ASYNC_EVENT_ID_MME_ECC:
4239
return "MME_ecc";
4240
case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4241
return "MME_ecc_ext";
4242
case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4243
return "MMU_ecc";
4244
case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4245
return "DMA_macro";
4246
case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4247
return "DMA_ecc";
4248
case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4249
return "CPU_if_ecc";
4250
case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4251
return "PSOC_mem";
4252
case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4253
return "PSOC_coresight";
4254
case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4255
return "SRAM%d";
4256
case GOYA_ASYNC_EVENT_ID_GIC500:
4257
return "GIC500";
4258
case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4259
return "PLL%d";
4260
case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4261
return "AXI_ecc";
4262
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4263
return "L2_ram_ecc";
4264
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4265
return "PSOC_gpio_05_sw_reset";
4266
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4267
return "PSOC_gpio_10_vrhot_icrit";
4268
case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4269
return "PCIe_dec";
4270
case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4271
case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4272
case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4273
case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4274
case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4275
case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4276
case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4277
case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4278
return "TPC%d_dec";
4279
case GOYA_ASYNC_EVENT_ID_MME_WACS:
4280
return "MME_wacs";
4281
case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4282
return "MME_wacsd";
4283
case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4284
return "CPU_axi_splitter";
4285
case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4286
return "PSOC_axi_dec";
4287
case GOYA_ASYNC_EVENT_ID_PSOC:
4288
return "PSOC";
4289
case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4290
case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4291
case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4292
case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4293
case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4294
case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4295
case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4296
case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4297
return "TPC%d_krn_err";
4298
case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4299
return "TPC%d_cq";
4300
case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4301
return "TPC%d_qm";
4302
case GOYA_ASYNC_EVENT_ID_MME_QM:
4303
return "MME_qm";
4304
case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4305
return "MME_cq";
4306
case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4307
return "DMA%d_qm";
4308
case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4309
return "DMA%d_ch";
4310
case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4311
case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4312
case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4313
case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4314
case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4315
case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4316
case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4317
case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4318
return "TPC%d_bmon_spmu";
4319
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4320
return "DMA_bm_ch%d";
4321
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4322
return "POWER_ENV_S";
4323
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4324
return "POWER_ENV_E";
4325
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4326
return "THERMAL_ENV_S";
4327
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4328
return "THERMAL_ENV_E";
4329
case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
4330
return "QUEUE_OUT_OF_SYNC";
4331
default:
4332
return "N/A";
4333
}
4334
}
4335
4336
static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
4337
{
4338
u8 index;
4339
4340
switch (event_type) {
4341
case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4342
case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4343
case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4344
case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4345
case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4346
case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4347
case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4348
case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4349
index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
4350
snprintf(desc, size, _goya_get_event_desc(event_type), index);
4351
break;
4352
case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4353
index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
4354
snprintf(desc, size, _goya_get_event_desc(event_type), index);
4355
break;
4356
case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4357
index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
4358
snprintf(desc, size, _goya_get_event_desc(event_type), index);
4359
break;
4360
case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4361
case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4362
case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4363
case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4364
case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4365
case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4366
case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4367
case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4368
index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4369
snprintf(desc, size, _goya_get_event_desc(event_type), index);
4370
break;
4371
case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4372
case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4373
case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4374
case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4375
case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4376
case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4377
case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4378
case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4379
index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4380
snprintf(desc, size, _goya_get_event_desc(event_type), index);
4381
break;
4382
case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4383
index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4384
snprintf(desc, size, _goya_get_event_desc(event_type), index);
4385
break;
4386
case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4387
index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4388
snprintf(desc, size, _goya_get_event_desc(event_type), index);
4389
break;
4390
case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4391
index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4392
snprintf(desc, size, _goya_get_event_desc(event_type), index);
4393
break;
4394
case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4395
index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4396
snprintf(desc, size, _goya_get_event_desc(event_type), index);
4397
break;
4398
case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4399
case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4400
case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4401
case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4402
case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4403
case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4404
case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4405
case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4406
index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
4407
snprintf(desc, size, _goya_get_event_desc(event_type), index);
4408
break;
4409
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4410
index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
4411
snprintf(desc, size, _goya_get_event_desc(event_type), index);
4412
break;
4413
case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
4414
snprintf(desc, size, _goya_get_event_desc(event_type));
4415
break;
4416
default:
4417
snprintf(desc, size, _goya_get_event_desc(event_type));
4418
break;
4419
}
4420
}
4421
4422
static void goya_print_razwi_info(struct hl_device *hdev)
4423
{
4424
if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4425
dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
4426
WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4427
}
4428
4429
if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4430
dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
4431
WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4432
}
4433
4434
if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4435
dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
4436
WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4437
}
4438
4439
if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4440
dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
4441
WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4442
}
4443
}
4444
4445
static void goya_print_mmu_error_info(struct hl_device *hdev)
4446
{
4447
struct goya_device *goya = hdev->asic_specific;
4448
u64 addr;
4449
u32 val;
4450
4451
if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4452
return;
4453
4454
val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4455
if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4456
addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4457
addr <<= 32;
4458
addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4459
4460
dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
4461
addr);
4462
4463
WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4464
}
4465
}
4466
4467
static void goya_print_out_of_sync_info(struct hl_device *hdev,
4468
struct cpucp_pkt_sync_err *sync_err)
4469
{
4470
struct hl_hw_queue *q = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
4471
4472
dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
4473
le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
4474
}
4475
4476
static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
4477
bool razwi)
4478
{
4479
char desc[20] = "";
4480
4481
goya_get_event_desc(event_type, desc, sizeof(desc));
4482
dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
4483
event_type, desc);
4484
4485
if (razwi) {
4486
goya_print_razwi_info(hdev);
4487
goya_print_mmu_error_info(hdev);
4488
}
4489
}
4490
4491
static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4492
size_t irq_arr_size)
4493
{
4494
struct cpucp_unmask_irq_arr_packet *pkt;
4495
size_t total_pkt_size;
4496
u64 result;
4497
int rc;
4498
int irq_num_entries, irq_arr_index;
4499
__le32 *goya_irq_arr;
4500
4501
total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
4502
irq_arr_size;
4503
4504
/* data should be aligned to 8 bytes in order to CPU-CP to copy it */
4505
total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4506
4507
/* total_pkt_size is casted to u16 later on */
4508
if (total_pkt_size > USHRT_MAX) {
4509
dev_err(hdev->dev, "too many elements in IRQ array\n");
4510
return -EINVAL;
4511
}
4512
4513
pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4514
if (!pkt)
4515
return -ENOMEM;
4516
4517
irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
4518
pkt->length = cpu_to_le32(irq_num_entries);
4519
4520
/* We must perform any necessary endianness conversation on the irq
4521
* array being passed to the goya hardware
4522
*/
4523
for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
4524
irq_arr_index < irq_num_entries ; irq_arr_index++)
4525
goya_irq_arr[irq_arr_index] =
4526
cpu_to_le32(irq_arr[irq_arr_index]);
4527
4528
pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4529
CPUCP_PKT_CTL_OPCODE_SHIFT);
4530
4531
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
4532
total_pkt_size, 0, &result);
4533
4534
if (rc)
4535
dev_err(hdev->dev, "failed to unmask IRQ array\n");
4536
4537
kfree(pkt);
4538
4539
return rc;
4540
}
4541
4542
static int goya_compute_reset_late_init(struct hl_device *hdev)
4543
{
4544
/*
4545
* Unmask all IRQs since some could have been received
4546
* during the soft reset
4547
*/
4548
return goya_unmask_irq_arr(hdev, goya_all_events,
4549
sizeof(goya_all_events));
4550
}
4551
4552
static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4553
{
4554
struct cpucp_packet pkt;
4555
u64 result;
4556
int rc;
4557
4558
memset(&pkt, 0, sizeof(pkt));
4559
4560
pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
4561
CPUCP_PKT_CTL_OPCODE_SHIFT);
4562
pkt.value = cpu_to_le64(event_type);
4563
4564
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4565
0, &result);
4566
4567
if (rc)
4568
dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4569
4570
return rc;
4571
}
4572
4573
static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
4574
{
4575
ktime_t zero_time = ktime_set(0, 0);
4576
4577
mutex_lock(&hdev->clk_throttling.lock);
4578
4579
switch (event_type) {
4580
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4581
hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
4582
hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
4583
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
4584
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
4585
dev_info_ratelimited(hdev->dev,
4586
"Clock throttling due to power consumption\n");
4587
break;
4588
4589
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4590
hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
4591
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
4592
dev_info_ratelimited(hdev->dev,
4593
"Power envelop is safe, back to optimal clock\n");
4594
break;
4595
4596
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4597
hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
4598
hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
4599
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
4600
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
4601
dev_info_ratelimited(hdev->dev,
4602
"Clock throttling due to overheating\n");
4603
break;
4604
4605
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4606
hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
4607
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
4608
dev_info_ratelimited(hdev->dev,
4609
"Thermal envelop is safe, back to optimal clock\n");
4610
break;
4611
4612
default:
4613
dev_err(hdev->dev, "Received invalid clock change event %d\n",
4614
event_type);
4615
break;
4616
}
4617
4618
mutex_unlock(&hdev->clk_throttling.lock);
4619
}
4620
4621
void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4622
{
4623
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
4624
u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
4625
>> EQ_CTL_EVENT_TYPE_SHIFT);
4626
struct goya_device *goya = hdev->asic_specific;
4627
4628
if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) {
4629
dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
4630
event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1);
4631
return;
4632
}
4633
4634
goya->events_stat[event_type]++;
4635
goya->events_stat_aggregate[event_type]++;
4636
4637
switch (event_type) {
4638
case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4639
case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4640
case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4641
case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4642
case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4643
case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4644
case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4645
case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4646
case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4647
case GOYA_ASYNC_EVENT_ID_MME_ECC:
4648
case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4649
case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4650
case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4651
case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4652
case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4653
case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4654
case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4655
case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4656
case GOYA_ASYNC_EVENT_ID_GIC500:
4657
case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4658
case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4659
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4660
goya_print_irq_info(hdev, event_type, false);
4661
if (hdev->hard_reset_on_fw_events)
4662
hl_device_reset(hdev, (HL_DRV_RESET_HARD |
4663
HL_DRV_RESET_FW_FATAL_ERR));
4664
break;
4665
4666
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4667
goya_print_irq_info(hdev, event_type, false);
4668
if (hdev->hard_reset_on_fw_events)
4669
hl_device_reset(hdev, HL_DRV_RESET_HARD);
4670
break;
4671
4672
case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4673
case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4674
case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4675
case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4676
case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4677
case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4678
case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4679
case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4680
case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4681
case GOYA_ASYNC_EVENT_ID_MME_WACS:
4682
case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4683
case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4684
case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4685
case GOYA_ASYNC_EVENT_ID_PSOC:
4686
case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4687
case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4688
case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4689
case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4690
case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4691
case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4692
case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4693
case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4694
case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4695
case GOYA_ASYNC_EVENT_ID_MME_QM:
4696
case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4697
case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4698
case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4699
goya_print_irq_info(hdev, event_type, true);
4700
goya_unmask_irq(hdev, event_type);
4701
break;
4702
4703
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4704
case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4705
case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4706
case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4707
case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4708
case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4709
case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4710
case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4711
case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4712
case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4713
goya_print_irq_info(hdev, event_type, false);
4714
goya_unmask_irq(hdev, event_type);
4715
break;
4716
4717
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4718
case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4719
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4720
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4721
goya_print_clk_change_info(hdev, event_type);
4722
goya_unmask_irq(hdev, event_type);
4723
break;
4724
4725
case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
4726
goya_print_irq_info(hdev, event_type, false);
4727
goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
4728
if (hdev->hard_reset_on_fw_events)
4729
hl_device_reset(hdev, HL_DRV_RESET_HARD);
4730
else
4731
hl_fw_unmask_irq(hdev, event_type);
4732
break;
4733
4734
default:
4735
dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4736
event_type);
4737
break;
4738
}
4739
}
4740
4741
void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
4742
{
4743
struct goya_device *goya = hdev->asic_specific;
4744
4745
if (aggregate) {
4746
*size = (u32) sizeof(goya->events_stat_aggregate);
4747
return goya->events_stat_aggregate;
4748
}
4749
4750
*size = (u32) sizeof(goya->events_stat);
4751
return goya->events_stat;
4752
}
4753
4754
static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
4755
u64 val, bool is_dram)
4756
{
4757
struct packet_lin_dma *lin_dma_pkt;
4758
struct hl_cs_job *job;
4759
u32 cb_size, ctl;
4760
struct hl_cb *cb;
4761
int rc, lin_dma_pkts_cnt;
4762
4763
lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
4764
cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
4765
sizeof(struct packet_msg_prot);
4766
cb = hl_cb_kernel_create(hdev, cb_size, false);
4767
if (!cb)
4768
return -ENOMEM;
4769
4770
lin_dma_pkt = cb->kernel_address;
4771
4772
do {
4773
memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4774
4775
ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4776
(1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4777
(1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4778
(1 << GOYA_PKT_CTL_RB_SHIFT) |
4779
(1 << GOYA_PKT_CTL_MB_SHIFT));
4780
ctl |= (is_dram ? HL_DMA_HOST_TO_DRAM : HL_DMA_HOST_TO_SRAM) <<
4781
GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
4782
lin_dma_pkt->ctl = cpu_to_le32(ctl);
4783
4784
lin_dma_pkt->src_addr = cpu_to_le64(val);
4785
lin_dma_pkt->dst_addr = cpu_to_le64(addr);
4786
if (lin_dma_pkts_cnt > 1)
4787
lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
4788
else
4789
lin_dma_pkt->tsize = cpu_to_le32(size);
4790
4791
size -= SZ_2G;
4792
addr += SZ_2G;
4793
lin_dma_pkt++;
4794
} while (--lin_dma_pkts_cnt);
4795
4796
job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4797
if (!job) {
4798
dev_err(hdev->dev, "Failed to allocate a new job\n");
4799
rc = -ENOMEM;
4800
goto release_cb;
4801
}
4802
4803
job->id = 0;
4804
job->user_cb = cb;
4805
atomic_inc(&job->user_cb->cs_cnt);
4806
job->user_cb_size = cb_size;
4807
job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4808
job->patched_cb = job->user_cb;
4809
job->job_cb_size = job->user_cb_size;
4810
4811
hl_debugfs_add_job(hdev, job);
4812
4813
rc = goya_send_job_on_qman0(hdev, job);
4814
4815
hl_debugfs_remove_job(hdev, job);
4816
kfree(job);
4817
atomic_dec(&cb->cs_cnt);
4818
4819
release_cb:
4820
hl_cb_put(cb);
4821
hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
4822
4823
return rc;
4824
}
4825
4826
int goya_context_switch(struct hl_device *hdev, u32 asid)
4827
{
4828
struct asic_fixed_properties *prop = &hdev->asic_prop;
4829
u64 addr = prop->sram_base_address, sob_addr;
4830
u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
4831
u64 val = 0x7777777777777777ull;
4832
int rc, dma_id;
4833
u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
4834
mmDMA_CH_0_WR_COMP_ADDR_LO;
4835
4836
rc = goya_memset_device_memory(hdev, addr, size, val, false);
4837
if (rc) {
4838
dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4839
return rc;
4840
}
4841
4842
/* we need to reset registers that the user is allowed to change */
4843
sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
4844
WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
4845
4846
for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
4847
sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
4848
(dma_id - 1) * 4;
4849
WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
4850
lower_32_bits(sob_addr));
4851
}
4852
4853
WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
4854
4855
goya_clear_sm_regs(hdev);
4856
4857
return 0;
4858
}
4859
4860
static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
4861
{
4862
struct asic_fixed_properties *prop = &hdev->asic_prop;
4863
struct goya_device *goya = hdev->asic_specific;
4864
u64 addr = prop->mmu_pgt_addr;
4865
u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
4866
MMU_CACHE_MNG_SIZE;
4867
4868
if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4869
return 0;
4870
4871
return goya_memset_device_memory(hdev, addr, size, 0, true);
4872
}
4873
4874
static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
4875
{
4876
struct goya_device *goya = hdev->asic_specific;
4877
u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
4878
u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
4879
u64 val = 0x9999999999999999ull;
4880
4881
if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4882
return 0;
4883
4884
return goya_memset_device_memory(hdev, addr, size, val, true);
4885
}
4886
4887
static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
4888
{
4889
struct asic_fixed_properties *prop = &hdev->asic_prop;
4890
struct goya_device *goya = hdev->asic_specific;
4891
s64 off, cpu_off;
4892
int rc;
4893
4894
if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4895
return 0;
4896
4897
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
4898
rc = hl_mmu_map_page(hdev->kernel_ctx,
4899
prop->dram_base_address + off,
4900
prop->dram_base_address + off, PAGE_SIZE_2MB,
4901
(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
4902
if (rc) {
4903
dev_err(hdev->dev, "Map failed for address 0x%llx\n",
4904
prop->dram_base_address + off);
4905
goto unmap;
4906
}
4907
}
4908
4909
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4910
rc = hl_mmu_map_page(hdev->kernel_ctx,
4911
VA_CPU_ACCESSIBLE_MEM_ADDR,
4912
hdev->cpu_accessible_dma_address,
4913
PAGE_SIZE_2MB, true);
4914
4915
if (rc) {
4916
dev_err(hdev->dev,
4917
"Map failed for CPU accessible memory\n");
4918
off -= PAGE_SIZE_2MB;
4919
goto unmap;
4920
}
4921
} else {
4922
for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
4923
rc = hl_mmu_map_page(hdev->kernel_ctx,
4924
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4925
hdev->cpu_accessible_dma_address + cpu_off,
4926
PAGE_SIZE_4KB, true);
4927
if (rc) {
4928
dev_err(hdev->dev,
4929
"Map failed for CPU accessible memory\n");
4930
cpu_off -= PAGE_SIZE_4KB;
4931
goto unmap_cpu;
4932
}
4933
}
4934
}
4935
4936
goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
4937
goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
4938
WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
4939
WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
4940
4941
/* Make sure configuration is flushed to device */
4942
RREG32(mmCPU_IF_AWUSER_OVR_EN);
4943
4944
goya->device_cpu_mmu_mappings_done = true;
4945
4946
return 0;
4947
4948
unmap_cpu:
4949
for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
4950
if (hl_mmu_unmap_page(hdev->kernel_ctx,
4951
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4952
PAGE_SIZE_4KB, true))
4953
dev_warn_ratelimited(hdev->dev,
4954
"failed to unmap address 0x%llx\n",
4955
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4956
unmap:
4957
for (; off >= 0 ; off -= PAGE_SIZE_2MB)
4958
if (hl_mmu_unmap_page(hdev->kernel_ctx,
4959
prop->dram_base_address + off, PAGE_SIZE_2MB,
4960
true))
4961
dev_warn_ratelimited(hdev->dev,
4962
"failed to unmap address 0x%llx\n",
4963
prop->dram_base_address + off);
4964
4965
return rc;
4966
}
4967
4968
void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
4969
{
4970
struct asic_fixed_properties *prop = &hdev->asic_prop;
4971
struct goya_device *goya = hdev->asic_specific;
4972
u32 off, cpu_off;
4973
4974
if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4975
return;
4976
4977
if (!goya->device_cpu_mmu_mappings_done)
4978
return;
4979
4980
WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
4981
WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
4982
4983
if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4984
if (hl_mmu_unmap_page(hdev->kernel_ctx,
4985
VA_CPU_ACCESSIBLE_MEM_ADDR,
4986
PAGE_SIZE_2MB, true))
4987
dev_warn(hdev->dev,
4988
"Failed to unmap CPU accessible memory\n");
4989
} else {
4990
for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
4991
if (hl_mmu_unmap_page(hdev->kernel_ctx,
4992
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4993
PAGE_SIZE_4KB,
4994
(cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
4995
dev_warn_ratelimited(hdev->dev,
4996
"failed to unmap address 0x%llx\n",
4997
VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4998
}
4999
5000
for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
5001
if (hl_mmu_unmap_page(hdev->kernel_ctx,
5002
prop->dram_base_address + off, PAGE_SIZE_2MB,
5003
(off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
5004
dev_warn_ratelimited(hdev->dev,
5005
"Failed to unmap address 0x%llx\n",
5006
prop->dram_base_address + off);
5007
5008
goya->device_cpu_mmu_mappings_done = false;
5009
}
5010
5011
static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
5012
{
5013
struct goya_device *goya = hdev->asic_specific;
5014
int i;
5015
5016
if (!(goya->hw_cap_initialized & HW_CAP_MMU))
5017
return;
5018
5019
if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
5020
dev_crit(hdev->dev, "asid %u is too big\n", asid);
5021
return;
5022
}
5023
5024
/* zero the MMBP and ASID bits and then set the ASID */
5025
for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
5026
goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
5027
}
5028
5029
static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5030
u32 flags)
5031
{
5032
struct goya_device *goya = hdev->asic_specific;
5033
u32 status, timeout_usec;
5034
int rc;
5035
5036
if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
5037
hdev->reset_info.hard_reset_pending)
5038
return 0;
5039
5040
/* no need in L1 only invalidation in Goya */
5041
if (!is_hard)
5042
return 0;
5043
5044
if (hdev->pldm)
5045
timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5046
else
5047
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5048
5049
/* L0 & L1 invalidation */
5050
WREG32(mmSTLB_INV_ALL_START, 1);
5051
5052
rc = hl_poll_timeout(
5053
hdev,
5054
mmSTLB_INV_ALL_START,
5055
status,
5056
!status,
5057
1000,
5058
timeout_usec);
5059
5060
return rc;
5061
}
5062
5063
static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
5064
bool is_hard, u32 flags,
5065
u32 asid, u64 va, u64 size)
5066
{
5067
/* Treat as invalidate all because there is no range invalidation
5068
* in Goya
5069
*/
5070
return hl_mmu_invalidate_cache(hdev, is_hard, flags);
5071
}
5072
5073
int goya_send_heartbeat(struct hl_device *hdev)
5074
{
5075
struct goya_device *goya = hdev->asic_specific;
5076
5077
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5078
return 0;
5079
5080
return hl_fw_send_heartbeat(hdev);
5081
}
5082
5083
int goya_cpucp_info_get(struct hl_device *hdev)
5084
{
5085
struct goya_device *goya = hdev->asic_specific;
5086
struct asic_fixed_properties *prop = &hdev->asic_prop;
5087
u64 dram_size;
5088
int rc;
5089
5090
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5091
return 0;
5092
5093
rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
5094
mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
5095
mmCPU_BOOT_ERR1);
5096
if (rc)
5097
return rc;
5098
5099
dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
5100
if (dram_size) {
5101
if ((!is_power_of_2(dram_size)) ||
5102
(dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
5103
dev_err(hdev->dev,
5104
"F/W reported invalid DRAM size %llu. Trying to use default size\n",
5105
dram_size);
5106
dram_size = DRAM_PHYS_DEFAULT_SIZE;
5107
}
5108
5109
prop->dram_size = dram_size;
5110
prop->dram_end_address = prop->dram_base_address + dram_size;
5111
}
5112
5113
if (!strlen(prop->cpucp_info.card_name))
5114
strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
5115
CARD_NAME_MAX_LEN);
5116
5117
return 0;
5118
}
5119
5120
static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
5121
struct engines_data *e)
5122
{
5123
const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
5124
const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
5125
unsigned long *mask = (unsigned long *)mask_arr;
5126
u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
5127
mme_arch_sts;
5128
bool is_idle = true, is_eng_idle;
5129
u64 offset;
5130
int i;
5131
5132
if (e)
5133
hl_engine_data_sprintf(e, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
5134
"--- ------- ------------ -------------\n");
5135
5136
offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5137
5138
for (i = 0 ; i < DMA_MAX_NUM ; i++) {
5139
qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
5140
dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
5141
is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
5142
IS_DMA_IDLE(dma_core_sts0);
5143
is_idle &= is_eng_idle;
5144
5145
if (mask && !is_eng_idle)
5146
set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask);
5147
if (e)
5148
hl_engine_data_sprintf(e, dma_fmt, i, is_eng_idle ? "Y" : "N",
5149
qm_glbl_sts0, dma_core_sts0);
5150
}
5151
5152
if (e)
5153
hl_engine_data_sprintf(e,
5154
"\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n"
5155
"--- ------- ------------ -------------- ----------\n");
5156
5157
offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5158
5159
for (i = 0 ; i < TPC_MAX_NUM ; i++) {
5160
qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
5161
cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
5162
tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
5163
is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
5164
IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
5165
IS_TPC_IDLE(tpc_cfg_sts);
5166
is_idle &= is_eng_idle;
5167
5168
if (mask && !is_eng_idle)
5169
set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask);
5170
if (e)
5171
hl_engine_data_sprintf(e, fmt, i, is_eng_idle ? "Y" : "N",
5172
qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
5173
}
5174
5175
if (e)
5176
hl_engine_data_sprintf(e,
5177
"\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n"
5178
"--- ------- ------------ -------------- -----------\n");
5179
5180
qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
5181
cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
5182
mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
5183
is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
5184
IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
5185
IS_MME_IDLE(mme_arch_sts);
5186
is_idle &= is_eng_idle;
5187
5188
if (mask && !is_eng_idle)
5189
set_bit(GOYA_ENGINE_ID_MME_0, mask);
5190
if (e) {
5191
hl_engine_data_sprintf(e, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
5192
cmdq_glbl_sts0, mme_arch_sts);
5193
hl_engine_data_sprintf(e, "\n");
5194
}
5195
5196
return is_idle;
5197
}
5198
5199
static void goya_hw_queues_lock(struct hl_device *hdev)
5200
__acquires(&goya->hw_queues_lock)
5201
{
5202
struct goya_device *goya = hdev->asic_specific;
5203
5204
spin_lock(&goya->hw_queues_lock);
5205
}
5206
5207
static void goya_hw_queues_unlock(struct hl_device *hdev)
5208
__releases(&goya->hw_queues_lock)
5209
{
5210
struct goya_device *goya = hdev->asic_specific;
5211
5212
spin_unlock(&goya->hw_queues_lock);
5213
}
5214
5215
static u32 goya_get_pci_id(struct hl_device *hdev)
5216
{
5217
return hdev->pdev->device;
5218
}
5219
5220
static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5221
size_t max_size)
5222
{
5223
struct goya_device *goya = hdev->asic_specific;
5224
5225
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5226
return 0;
5227
5228
return hl_fw_get_eeprom_data(hdev, data, max_size);
5229
}
5230
5231
static void goya_cpu_init_scrambler_dram(struct hl_device *hdev)
5232
{
5233
5234
}
5235
5236
static int goya_ctx_init(struct hl_ctx *ctx)
5237
{
5238
if (ctx->asid != HL_KERNEL_ASID_ID)
5239
goya_mmu_prepare(ctx->hdev, ctx->asid);
5240
5241
return 0;
5242
}
5243
5244
static int goya_pre_schedule_cs(struct hl_cs *cs)
5245
{
5246
return 0;
5247
}
5248
5249
u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
5250
{
5251
return cq_idx;
5252
}
5253
5254
static u32 goya_get_signal_cb_size(struct hl_device *hdev)
5255
{
5256
return 0;
5257
}
5258
5259
static u32 goya_get_wait_cb_size(struct hl_device *hdev)
5260
{
5261
return 0;
5262
}
5263
5264
static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
5265
u32 size, bool eb)
5266
{
5267
return 0;
5268
}
5269
5270
static u32 goya_gen_wait_cb(struct hl_device *hdev,
5271
struct hl_gen_wait_properties *prop)
5272
{
5273
return 0;
5274
}
5275
5276
static void goya_reset_sob(struct hl_device *hdev, void *data)
5277
{
5278
5279
}
5280
5281
static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group)
5282
{
5283
5284
}
5285
5286
u64 goya_get_device_time(struct hl_device *hdev)
5287
{
5288
u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
5289
5290
return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
5291
}
5292
5293
static int goya_collective_wait_init_cs(struct hl_cs *cs)
5294
{
5295
return 0;
5296
}
5297
5298
static int goya_collective_wait_create_jobs(struct hl_device *hdev,
5299
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
5300
u32 collective_engine_id, u32 encaps_signal_offset)
5301
{
5302
return -EINVAL;
5303
}
5304
5305
static void goya_ctx_fini(struct hl_ctx *ctx)
5306
{
5307
5308
}
5309
5310
static int goya_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
5311
u32 *block_size, u32 *block_id)
5312
{
5313
return -EPERM;
5314
}
5315
5316
static int goya_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
5317
u32 block_id, u32 block_size)
5318
{
5319
return -EPERM;
5320
}
5321
5322
static void goya_enable_events_from_fw(struct hl_device *hdev)
5323
{
5324
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
5325
GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
5326
}
5327
5328
static int goya_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
5329
{
5330
return -EINVAL;
5331
}
5332
5333
static int goya_map_pll_idx_to_fw_idx(u32 pll_idx)
5334
{
5335
switch (pll_idx) {
5336
case HL_GOYA_CPU_PLL: return CPU_PLL;
5337
case HL_GOYA_PCI_PLL: return PCI_PLL;
5338
case HL_GOYA_MME_PLL: return MME_PLL;
5339
case HL_GOYA_TPC_PLL: return TPC_PLL;
5340
case HL_GOYA_IC_PLL: return IC_PLL;
5341
case HL_GOYA_MC_PLL: return MC_PLL;
5342
case HL_GOYA_EMMC_PLL: return EMMC_PLL;
5343
default: return -EINVAL;
5344
}
5345
}
5346
5347
static int goya_gen_sync_to_engine_map(struct hl_device *hdev,
5348
struct hl_sync_to_engine_map *map)
5349
{
5350
/* Not implemented */
5351
return 0;
5352
}
5353
5354
static int goya_monitor_valid(struct hl_mon_state_dump *mon)
5355
{
5356
/* Not implemented */
5357
return 0;
5358
}
5359
5360
static int goya_print_single_monitor(char **buf, size_t *size, size_t *offset,
5361
struct hl_device *hdev,
5362
struct hl_mon_state_dump *mon)
5363
{
5364
/* Not implemented */
5365
return 0;
5366
}
5367
5368
5369
static int goya_print_fences_single_engine(
5370
struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
5371
enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
5372
size_t *size, size_t *offset)
5373
{
5374
/* Not implemented */
5375
return 0;
5376
}
5377
5378
5379
static struct hl_state_dump_specs_funcs goya_state_dump_funcs = {
5380
.monitor_valid = goya_monitor_valid,
5381
.print_single_monitor = goya_print_single_monitor,
5382
.gen_sync_to_engine_map = goya_gen_sync_to_engine_map,
5383
.print_fences_single_engine = goya_print_fences_single_engine,
5384
};
5385
5386
static void goya_state_dump_init(struct hl_device *hdev)
5387
{
5388
/* Not implemented */
5389
hdev->state_dump_specs.props = goya_state_dump_specs_props;
5390
hdev->state_dump_specs.funcs = goya_state_dump_funcs;
5391
}
5392
5393
static u32 goya_get_sob_addr(struct hl_device *hdev, u32 sob_id)
5394
{
5395
return 0;
5396
}
5397
5398
static u32 *goya_get_stream_master_qid_arr(void)
5399
{
5400
return NULL;
5401
}
5402
5403
static int goya_get_monitor_dump(struct hl_device *hdev, void *data)
5404
{
5405
return -EOPNOTSUPP;
5406
}
5407
5408
static void goya_check_if_razwi_happened(struct hl_device *hdev)
5409
{
5410
}
5411
5412
static int goya_scrub_device_dram(struct hl_device *hdev, u64 val)
5413
{
5414
return -EOPNOTSUPP;
5415
}
5416
5417
static int goya_set_dram_properties(struct hl_device *hdev)
5418
{
5419
return 0;
5420
}
5421
5422
static int goya_set_binning_masks(struct hl_device *hdev)
5423
{
5424
return 0;
5425
}
5426
5427
static int goya_send_device_activity(struct hl_device *hdev, bool open)
5428
{
5429
return 0;
5430
}
5431
5432
static const struct hl_asic_funcs goya_funcs = {
5433
.early_init = goya_early_init,
5434
.early_fini = goya_early_fini,
5435
.late_init = goya_late_init,
5436
.late_fini = goya_late_fini,
5437
.sw_init = goya_sw_init,
5438
.sw_fini = goya_sw_fini,
5439
.hw_init = goya_hw_init,
5440
.hw_fini = goya_hw_fini,
5441
.halt_engines = goya_halt_engines,
5442
.suspend = goya_suspend,
5443
.resume = goya_resume,
5444
.mmap = goya_mmap,
5445
.ring_doorbell = goya_ring_doorbell,
5446
.pqe_write = goya_pqe_write,
5447
.asic_dma_alloc_coherent = goya_dma_alloc_coherent,
5448
.asic_dma_free_coherent = goya_dma_free_coherent,
5449
.scrub_device_mem = goya_scrub_device_mem,
5450
.scrub_device_dram = goya_scrub_device_dram,
5451
.get_int_queue_base = goya_get_int_queue_base,
5452
.test_queues = goya_test_queues,
5453
.asic_dma_pool_zalloc = goya_dma_pool_zalloc,
5454
.asic_dma_pool_free = goya_dma_pool_free,
5455
.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5456
.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
5457
.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
5458
.cs_parser = goya_cs_parser,
5459
.dma_map_sgtable = hl_asic_dma_map_sgtable,
5460
.add_end_of_cb_packets = goya_add_end_of_cb_packets,
5461
.update_eq_ci = goya_update_eq_ci,
5462
.context_switch = goya_context_switch,
5463
.restore_phase_topology = goya_restore_phase_topology,
5464
.debugfs_read_dma = goya_debugfs_read_dma,
5465
.add_device_attr = goya_add_device_attr,
5466
.handle_eqe = goya_handle_eqe,
5467
.get_events_stat = goya_get_events_stat,
5468
.read_pte = goya_read_pte,
5469
.write_pte = goya_write_pte,
5470
.mmu_invalidate_cache = goya_mmu_invalidate_cache,
5471
.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
5472
.mmu_prefetch_cache_range = NULL,
5473
.send_heartbeat = goya_send_heartbeat,
5474
.debug_coresight = goya_debug_coresight,
5475
.is_device_idle = goya_is_device_idle,
5476
.compute_reset_late_init = goya_compute_reset_late_init,
5477
.hw_queues_lock = goya_hw_queues_lock,
5478
.hw_queues_unlock = goya_hw_queues_unlock,
5479
.get_pci_id = goya_get_pci_id,
5480
.get_eeprom_data = goya_get_eeprom_data,
5481
.get_monitor_dump = goya_get_monitor_dump,
5482
.send_cpu_message = goya_send_cpu_message,
5483
.pci_bars_map = goya_pci_bars_map,
5484
.init_iatu = goya_init_iatu,
5485
.rreg = hl_rreg,
5486
.wreg = hl_wreg,
5487
.halt_coresight = goya_halt_coresight,
5488
.ctx_init = goya_ctx_init,
5489
.ctx_fini = goya_ctx_fini,
5490
.pre_schedule_cs = goya_pre_schedule_cs,
5491
.get_queue_id_for_cq = goya_get_queue_id_for_cq,
5492
.load_firmware_to_device = goya_load_firmware_to_device,
5493
.load_boot_fit_to_device = goya_load_boot_fit_to_device,
5494
.get_signal_cb_size = goya_get_signal_cb_size,
5495
.get_wait_cb_size = goya_get_wait_cb_size,
5496
.gen_signal_cb = goya_gen_signal_cb,
5497
.gen_wait_cb = goya_gen_wait_cb,
5498
.reset_sob = goya_reset_sob,
5499
.reset_sob_group = goya_reset_sob_group,
5500
.get_device_time = goya_get_device_time,
5501
.pb_print_security_errors = NULL,
5502
.collective_wait_init_cs = goya_collective_wait_init_cs,
5503
.collective_wait_create_jobs = goya_collective_wait_create_jobs,
5504
.get_dec_base_addr = NULL,
5505
.scramble_addr = hl_mmu_scramble_addr,
5506
.descramble_addr = hl_mmu_descramble_addr,
5507
.ack_protection_bits_errors = goya_ack_protection_bits_errors,
5508
.get_hw_block_id = goya_get_hw_block_id,
5509
.hw_block_mmap = goya_block_mmap,
5510
.enable_events_from_fw = goya_enable_events_from_fw,
5511
.ack_mmu_errors = goya_ack_mmu_page_fault_or_access_error,
5512
.map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
5513
.init_firmware_preload_params = goya_init_firmware_preload_params,
5514
.init_firmware_loader = goya_init_firmware_loader,
5515
.init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram,
5516
.state_dump_init = goya_state_dump_init,
5517
.get_sob_addr = &goya_get_sob_addr,
5518
.set_pci_memory_regions = goya_set_pci_memory_regions,
5519
.get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
5520
.check_if_razwi_happened = goya_check_if_razwi_happened,
5521
.mmu_get_real_page_size = hl_mmu_get_real_page_size,
5522
.access_dev_mem = hl_access_dev_mem,
5523
.set_dram_bar_base = goya_set_ddr_bar_base,
5524
.send_device_activity = goya_send_device_activity,
5525
.set_dram_properties = goya_set_dram_properties,
5526
.set_binning_masks = goya_set_binning_masks,
5527
};
5528
5529
/*
5530
* goya_set_asic_funcs - set Goya function pointers
5531
*
5532
* @*hdev: pointer to hl_device structure
5533
*
5534
*/
5535
void goya_set_asic_funcs(struct hl_device *hdev)
5536
{
5537
hdev->asic_funcs = &goya_funcs;
5538
}
5539
5540