Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/accel/habanalabs/gaudi/gaudi.c
26439 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
/*
4
* Copyright 2016-2022 HabanaLabs, Ltd.
5
* All Rights Reserved.
6
*/
7
8
#include "gaudiP.h"
9
#include "../include/hw_ip/mmu/mmu_general.h"
10
#include "../include/hw_ip/mmu/mmu_v1_1.h"
11
#include "../include/gaudi/gaudi_masks.h"
12
#include "../include/gaudi/gaudi_fw_if.h"
13
#include "../include/gaudi/gaudi_reg_map.h"
14
#include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16
#include <linux/module.h>
17
#include <linux/pci.h>
18
#include <linux/firmware.h>
19
#include <linux/hwmon.h>
20
#include <linux/iommu.h>
21
#include <linux/seq_file.h>
22
23
/*
24
* Gaudi security scheme:
25
*
26
* 1. Host is protected by:
27
* - Range registers
28
* - MMU
29
*
30
* 2. DDR is protected by:
31
* - Range registers (protect the first 512MB)
32
*
33
* 3. Configuration is protected by:
34
* - Range registers
35
* - Protection bits
36
*
37
* MMU is always enabled.
38
*
39
* QMAN DMA channels 0,1 (PCI DMAN):
40
* - DMA is not secured.
41
* - PQ and CQ are secured.
42
* - CP is secured: The driver needs to parse CB but WREG should be allowed
43
* because of TDMA (tensor DMA). Hence, WREG is always not
44
* secured.
45
*
46
* When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47
* channel 0 to be secured, execute the DMA and change it back to not secured.
48
* Currently, the driver doesn't use the DMA while there are compute jobs
49
* running.
50
*
51
* The current use cases for the driver to use the DMA are:
52
* - Clear SRAM on context switch (happens on context switch when device is
53
* idle)
54
* - MMU page tables area clear (happens on init)
55
*
56
* QMAN DMA 2-7, TPC, MME, NIC:
57
* PQ is secured and is located on the Host (HBM CON TPC3 bug)
58
* CQ, CP and the engine are not secured
59
*
60
*/
61
62
#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63
#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64
#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66
MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
67
MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
68
MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
69
70
#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
71
72
#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
73
#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
74
#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
75
#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
76
77
#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
78
#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
79
#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
80
#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
81
#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82
#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
83
#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
84
#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
85
#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
86
87
#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
88
89
#define GAUDI_MAX_STRING_LEN 20
90
91
#define GAUDI_CB_POOL_CB_CNT 512
92
#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
93
94
#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
95
96
#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
97
98
#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
99
100
#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
101
102
#define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
103
104
#define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
105
106
#define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
107
108
#define MONITOR_SOB_STRING_SIZE 256
109
110
static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
111
GAUDI_QUEUE_ID_DMA_0_0,
112
GAUDI_QUEUE_ID_DMA_0_1,
113
GAUDI_QUEUE_ID_DMA_0_2,
114
GAUDI_QUEUE_ID_DMA_0_3,
115
GAUDI_QUEUE_ID_DMA_1_0,
116
GAUDI_QUEUE_ID_DMA_1_1,
117
GAUDI_QUEUE_ID_DMA_1_2,
118
GAUDI_QUEUE_ID_DMA_1_3
119
};
120
121
static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
122
[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
123
[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
124
[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
125
[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
126
[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
127
[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
128
[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
129
[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
130
};
131
132
static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
133
[0] = GAUDI_QUEUE_ID_DMA_0_0,
134
[1] = GAUDI_QUEUE_ID_DMA_0_1,
135
[2] = GAUDI_QUEUE_ID_DMA_0_2,
136
[3] = GAUDI_QUEUE_ID_DMA_0_3,
137
[4] = GAUDI_QUEUE_ID_DMA_1_0,
138
[5] = GAUDI_QUEUE_ID_DMA_1_1,
139
[6] = GAUDI_QUEUE_ID_DMA_1_2,
140
[7] = GAUDI_QUEUE_ID_DMA_1_3,
141
};
142
143
static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
144
[PACKET_WREG_32] = sizeof(struct packet_wreg32),
145
[PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
146
[PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
147
[PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
148
[PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
149
[PACKET_REPEAT] = sizeof(struct packet_repeat),
150
[PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
151
[PACKET_FENCE] = sizeof(struct packet_fence),
152
[PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
153
[PACKET_NOP] = sizeof(struct packet_nop),
154
[PACKET_STOP] = sizeof(struct packet_stop),
155
[PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
156
[PACKET_WAIT] = sizeof(struct packet_wait),
157
[PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
158
};
159
160
static inline bool validate_packet_id(enum packet_id id)
161
{
162
switch (id) {
163
case PACKET_WREG_32:
164
case PACKET_WREG_BULK:
165
case PACKET_MSG_LONG:
166
case PACKET_MSG_SHORT:
167
case PACKET_CP_DMA:
168
case PACKET_REPEAT:
169
case PACKET_MSG_PROT:
170
case PACKET_FENCE:
171
case PACKET_LIN_DMA:
172
case PACKET_NOP:
173
case PACKET_STOP:
174
case PACKET_ARB_POINT:
175
case PACKET_WAIT:
176
case PACKET_LOAD_AND_EXE:
177
return true;
178
default:
179
return false;
180
}
181
}
182
183
static const char * const
184
gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
185
"tpc_address_exceed_slm",
186
"tpc_div_by_0",
187
"tpc_spu_mac_overflow",
188
"tpc_spu_addsub_overflow",
189
"tpc_spu_abs_overflow",
190
"tpc_spu_fp_dst_nan_inf",
191
"tpc_spu_fp_dst_denorm",
192
"tpc_vpu_mac_overflow",
193
"tpc_vpu_addsub_overflow",
194
"tpc_vpu_abs_overflow",
195
"tpc_vpu_fp_dst_nan_inf",
196
"tpc_vpu_fp_dst_denorm",
197
"tpc_assertions",
198
"tpc_illegal_instruction",
199
"tpc_pc_wrap_around",
200
"tpc_qm_sw_err",
201
"tpc_hbw_rresp_err",
202
"tpc_hbw_bresp_err",
203
"tpc_lbw_rresp_err",
204
"tpc_lbw_bresp_err"
205
};
206
207
static const char * const
208
gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209
"PQ AXI HBW error",
210
"CQ AXI HBW error",
211
"CP AXI HBW error",
212
"CP error due to undefined OPCODE",
213
"CP encountered STOP OPCODE",
214
"CP AXI LBW error",
215
"CP WRREG32 or WRBULK returned error",
216
"N/A",
217
"FENCE 0 inc over max value and clipped",
218
"FENCE 1 inc over max value and clipped",
219
"FENCE 2 inc over max value and clipped",
220
"FENCE 3 inc over max value and clipped",
221
"FENCE 0 dec under min value and clipped",
222
"FENCE 1 dec under min value and clipped",
223
"FENCE 2 dec under min value and clipped",
224
"FENCE 3 dec under min value and clipped"
225
};
226
227
static const char * const
228
gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
229
"Choice push while full error",
230
"Choice Q watchdog error",
231
"MSG AXI LBW returned with error"
232
};
233
234
static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243
QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
256
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
260
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
308
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
348
};
349
350
static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
351
{ .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
352
{ .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
353
{ .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
354
{ .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
355
{ .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
356
{ .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
357
{ .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
358
{ .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
359
{ .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
360
{ .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
361
{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
362
{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
363
{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
364
{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
365
{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
366
{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
367
{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
368
{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
369
{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
370
{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
371
{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
372
{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
373
{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
374
{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
375
{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
376
{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
377
{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
378
};
379
380
static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
381
{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
382
{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
383
{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
384
{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
385
{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
386
{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
387
{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
388
{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
389
{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
390
{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
391
{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
392
};
393
394
static s64 gaudi_state_dump_specs_props[] = {
395
[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
396
[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
397
[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
398
[SP_MON_OBJ_WR_ADDR_LOW] =
399
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
400
[SP_MON_OBJ_WR_ADDR_HIGH] =
401
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
402
[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
403
[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
404
[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
405
[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
406
[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
407
[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
408
[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
409
[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
410
[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
411
[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
412
[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
413
[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
414
[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
415
[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
416
[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
417
[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
418
[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
419
[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
420
[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
421
[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
422
[SP_FENCE0_CNT_OFFSET] =
423
mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
424
[SP_FENCE0_RDATA_OFFSET] =
425
mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
426
[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
427
[SP_NUM_CORES] = 1,
428
};
429
430
static const int gaudi_queue_id_to_engine_id[] = {
431
[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
432
[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
433
[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
434
[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
435
[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
436
[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
437
[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
438
[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
439
[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
440
[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
441
[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
442
[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
443
[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
444
[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
445
[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
446
[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
447
[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
448
[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
449
[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
450
[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
451
[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
452
[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
453
[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
454
[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
455
[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
456
[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
457
[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
458
[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
459
[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
460
};
461
462
/* The order here is opposite to the order of the indexing in the h/w.
463
* i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
464
*/
465
static const char * const gaudi_sync_manager_names[] = {
466
"SYNC_MGR_E_N",
467
"SYNC_MGR_W_N",
468
"SYNC_MGR_E_S",
469
"SYNC_MGR_W_S",
470
NULL
471
};
472
473
struct ecc_info_extract_params {
474
u64 block_address;
475
u32 num_memories;
476
bool derr;
477
};
478
479
static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
480
u64 phys_addr);
481
static int gaudi_send_job_on_qman0(struct hl_device *hdev,
482
struct hl_cs_job *job);
483
static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
484
u32 size, u64 val);
485
static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
486
u32 num_regs, u32 val);
487
static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
488
u32 tpc_id);
489
static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
490
static int gaudi_cpucp_info_get(struct hl_device *hdev);
491
static void gaudi_disable_clock_gating(struct hl_device *hdev);
492
static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
493
static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
494
u32 size, bool eb);
495
static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
496
struct hl_gen_wait_properties *prop);
497
static inline enum hl_collective_mode
498
get_collective_mode(struct hl_device *hdev, u32 queue_id)
499
{
500
if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
501
return HL_COLLECTIVE_MASTER;
502
503
if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
504
queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
505
return HL_COLLECTIVE_SLAVE;
506
507
if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
508
queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
509
return HL_COLLECTIVE_SLAVE;
510
511
if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
512
queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
513
return HL_COLLECTIVE_SLAVE;
514
515
return HL_COLLECTIVE_NOT_SUPPORTED;
516
}
517
518
static inline void set_default_power_values(struct hl_device *hdev)
519
{
520
struct asic_fixed_properties *prop = &hdev->asic_prop;
521
522
if (hdev->card_type == cpucp_card_type_pmc) {
523
prop->max_power_default = MAX_POWER_DEFAULT_PMC;
524
525
if (prop->fw_security_enabled)
526
prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
527
else
528
prop->dc_power_default = DC_POWER_DEFAULT_PMC;
529
} else {
530
prop->max_power_default = MAX_POWER_DEFAULT_PCI;
531
prop->dc_power_default = DC_POWER_DEFAULT_PCI;
532
}
533
}
534
535
static int gaudi_set_fixed_properties(struct hl_device *hdev)
536
{
537
struct asic_fixed_properties *prop = &hdev->asic_prop;
538
u32 num_sync_stream_queues = 0;
539
int i;
540
541
prop->max_queues = GAUDI_QUEUE_ID_SIZE;
542
prop->hw_queues_props = kcalloc(prop->max_queues,
543
sizeof(struct hw_queue_properties),
544
GFP_KERNEL);
545
546
if (!prop->hw_queues_props)
547
return -ENOMEM;
548
549
for (i = 0 ; i < prop->max_queues ; i++) {
550
if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
551
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
552
prop->hw_queues_props[i].driver_only = 0;
553
prop->hw_queues_props[i].supports_sync_stream = 1;
554
prop->hw_queues_props[i].cb_alloc_flags =
555
CB_ALLOC_KERNEL;
556
num_sync_stream_queues++;
557
} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
558
prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
559
prop->hw_queues_props[i].driver_only = 1;
560
prop->hw_queues_props[i].supports_sync_stream = 0;
561
prop->hw_queues_props[i].cb_alloc_flags =
562
CB_ALLOC_KERNEL;
563
} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
564
prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
565
prop->hw_queues_props[i].driver_only = 0;
566
prop->hw_queues_props[i].supports_sync_stream = 0;
567
prop->hw_queues_props[i].cb_alloc_flags =
568
CB_ALLOC_USER;
569
570
}
571
prop->hw_queues_props[i].collective_mode =
572
get_collective_mode(hdev, i);
573
}
574
575
prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
576
prop->cfg_base_address = CFG_BASE;
577
prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
578
prop->host_base_address = HOST_PHYS_BASE;
579
prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
580
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
581
prop->completion_mode = HL_COMPLETION_MODE_JOB;
582
prop->collective_first_sob = 0;
583
prop->collective_first_mon = 0;
584
585
/* 2 SOBs per internal queue stream are reserved for collective */
586
prop->sync_stream_first_sob =
587
ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
588
* QMAN_STREAMS * HL_RSVD_SOBS;
589
590
/* 1 monitor per internal queue stream are reserved for collective
591
* 2 monitors per external queue stream are reserved for collective
592
*/
593
prop->sync_stream_first_mon =
594
(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
595
(NUMBER_OF_EXT_HW_QUEUES * 2);
596
597
prop->dram_base_address = DRAM_PHYS_BASE;
598
prop->dram_size = GAUDI_HBM_SIZE_32GB;
599
prop->dram_end_address = prop->dram_base_address + prop->dram_size;
600
prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
601
602
prop->sram_base_address = SRAM_BASE_ADDR;
603
prop->sram_size = SRAM_SIZE;
604
prop->sram_end_address = prop->sram_base_address + prop->sram_size;
605
prop->sram_user_base_address =
606
prop->sram_base_address + SRAM_USER_BASE_OFFSET;
607
608
prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
609
prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
610
611
prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
612
if (hdev->pldm)
613
prop->mmu_pgt_size = 0x800000; /* 8MB */
614
else
615
prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
616
prop->mmu_pte_size = HL_PTE_SIZE;
617
prop->dram_page_size = PAGE_SIZE_2MB;
618
prop->device_mem_alloc_default_page_size = prop->dram_page_size;
619
prop->dram_supports_virtual_memory = false;
620
621
prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
622
prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
623
prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
624
prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
625
prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
626
prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
627
prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
628
prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
629
prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
630
prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
631
prop->pmmu.start_addr = VA_HOST_SPACE_START;
632
prop->pmmu.end_addr =
633
(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
634
prop->pmmu.page_size = PAGE_SIZE_4KB;
635
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
636
prop->pmmu.last_mask = LAST_MASK;
637
/* TODO: will be duplicated until implementing per-MMU props */
638
prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
639
prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
640
641
/* PMMU and HPMMU are the same except of page size */
642
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
643
prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
644
645
/* shifts and masks are the same in PMMU and DMMU */
646
memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
647
prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
648
prop->dmmu.end_addr = VA_HOST_SPACE_END;
649
prop->dmmu.page_size = PAGE_SIZE_2MB;
650
prop->dmmu.pgt_size = prop->mmu_pgt_size;
651
652
prop->cfg_size = CFG_SIZE;
653
prop->max_asid = MAX_ASID;
654
prop->num_of_events = GAUDI_EVENT_SIZE;
655
prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
656
prop->tpc_enabled_mask = TPC_ENABLED_MASK;
657
658
set_default_power_values(hdev);
659
660
prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
661
prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
662
663
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
664
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
665
666
strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
667
CARD_NAME_MAX_LEN);
668
669
prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
670
671
prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
672
prop->sync_stream_first_sob +
673
(num_sync_stream_queues * HL_RSVD_SOBS);
674
prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
675
prop->sync_stream_first_mon +
676
(num_sync_stream_queues * HL_RSVD_MONS);
677
678
prop->first_available_user_interrupt = USHRT_MAX;
679
prop->tpc_interrupt_id = USHRT_MAX;
680
681
/* single msi */
682
prop->eq_interrupt_id = 0;
683
684
for (i = 0 ; i < HL_MAX_DCORES ; i++)
685
prop->first_available_cq[i] = USHRT_MAX;
686
687
prop->fw_cpu_boot_dev_sts0_valid = false;
688
prop->fw_cpu_boot_dev_sts1_valid = false;
689
prop->hard_reset_done_by_fw = false;
690
prop->gic_interrupts_enable = true;
691
692
prop->server_type = HL_SERVER_TYPE_UNKNOWN;
693
694
prop->clk_pll_index = HL_GAUDI_MME_PLL;
695
prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
696
697
prop->use_get_power_for_reset_history = true;
698
699
prop->configurable_stop_on_err = true;
700
701
prop->set_max_power_on_device_init = true;
702
703
prop->dma_mask = 48;
704
705
prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
706
707
return 0;
708
}
709
710
static int gaudi_pci_bars_map(struct hl_device *hdev)
711
{
712
static const char * const name[] = {"SRAM", "CFG", "HBM"};
713
bool is_wc[3] = {false, false, true};
714
int rc;
715
716
rc = hl_pci_bars_map(hdev, name, is_wc);
717
if (rc)
718
return rc;
719
720
hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
721
(CFG_BASE - SPI_FLASH_BASE_ADDR);
722
723
return 0;
724
}
725
726
static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
727
{
728
struct gaudi_device *gaudi = hdev->asic_specific;
729
struct hl_inbound_pci_region pci_region;
730
u64 old_addr = addr;
731
int rc;
732
733
if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
734
return old_addr;
735
736
if (hdev->asic_prop.iatu_done_by_fw)
737
return U64_MAX;
738
739
/* Inbound Region 2 - Bar 4 - Point to HBM */
740
pci_region.mode = PCI_BAR_MATCH_MODE;
741
pci_region.bar = HBM_BAR_ID;
742
pci_region.addr = addr;
743
rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
744
if (rc)
745
return U64_MAX;
746
747
if (gaudi) {
748
old_addr = gaudi->hbm_bar_cur_addr;
749
gaudi->hbm_bar_cur_addr = addr;
750
}
751
752
return old_addr;
753
}
754
755
static int gaudi_init_iatu(struct hl_device *hdev)
756
{
757
struct hl_inbound_pci_region inbound_region;
758
struct hl_outbound_pci_region outbound_region;
759
int rc;
760
761
if (hdev->asic_prop.iatu_done_by_fw)
762
return 0;
763
764
/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
765
inbound_region.mode = PCI_BAR_MATCH_MODE;
766
inbound_region.bar = SRAM_BAR_ID;
767
inbound_region.addr = SRAM_BASE_ADDR;
768
rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
769
if (rc)
770
goto done;
771
772
/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
773
inbound_region.mode = PCI_BAR_MATCH_MODE;
774
inbound_region.bar = CFG_BAR_ID;
775
inbound_region.addr = SPI_FLASH_BASE_ADDR;
776
rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
777
if (rc)
778
goto done;
779
780
/* Inbound Region 2 - Bar 4 - Point to HBM */
781
inbound_region.mode = PCI_BAR_MATCH_MODE;
782
inbound_region.bar = HBM_BAR_ID;
783
inbound_region.addr = DRAM_PHYS_BASE;
784
rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
785
if (rc)
786
goto done;
787
788
/* Outbound Region 0 - Point to Host */
789
outbound_region.addr = HOST_PHYS_BASE;
790
outbound_region.size = HOST_PHYS_SIZE;
791
rc = hl_pci_set_outbound_region(hdev, &outbound_region);
792
793
done:
794
return rc;
795
}
796
797
static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
798
{
799
return RREG32(mmHW_STATE);
800
}
801
802
static int gaudi_early_init(struct hl_device *hdev)
803
{
804
struct asic_fixed_properties *prop = &hdev->asic_prop;
805
struct pci_dev *pdev = hdev->pdev;
806
resource_size_t pci_bar_size;
807
u32 fw_boot_status;
808
int rc;
809
810
rc = gaudi_set_fixed_properties(hdev);
811
if (rc) {
812
dev_err(hdev->dev, "Failed setting fixed properties\n");
813
return rc;
814
}
815
816
/* Check BAR sizes */
817
pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
818
819
if (pci_bar_size != SRAM_BAR_SIZE) {
820
dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
821
SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
822
rc = -ENODEV;
823
goto free_queue_props;
824
}
825
826
pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
827
828
if (pci_bar_size != CFG_BAR_SIZE) {
829
dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
830
CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
831
rc = -ENODEV;
832
goto free_queue_props;
833
}
834
835
prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
836
hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
837
838
/* If FW security is enabled at this point it means no access to ELBI */
839
if (hdev->asic_prop.fw_security_enabled) {
840
hdev->asic_prop.iatu_done_by_fw = true;
841
842
/*
843
* GIC-security-bit can ONLY be set by CPUCP, so in this stage
844
* decision can only be taken based on PCI ID security.
845
*/
846
hdev->asic_prop.gic_interrupts_enable = false;
847
goto pci_init;
848
}
849
850
rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
851
&fw_boot_status);
852
if (rc)
853
goto free_queue_props;
854
855
/* Check whether FW is configuring iATU */
856
if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
857
(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
858
hdev->asic_prop.iatu_done_by_fw = true;
859
860
pci_init:
861
rc = hl_pci_init(hdev);
862
if (rc)
863
goto free_queue_props;
864
865
/* Before continuing in the initialization, we need to read the preboot
866
* version to determine whether we run with a security-enabled firmware
867
*/
868
rc = hl_fw_read_preboot_status(hdev);
869
if (rc) {
870
if (hdev->reset_on_preboot_fail)
871
/* we are already on failure flow, so don't check if hw_fini fails. */
872
hdev->asic_funcs->hw_fini(hdev, true, false);
873
goto pci_fini;
874
}
875
876
if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
877
dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
878
rc = hdev->asic_funcs->hw_fini(hdev, true, false);
879
if (rc) {
880
dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
881
goto pci_fini;
882
}
883
}
884
885
return 0;
886
887
pci_fini:
888
hl_pci_fini(hdev);
889
free_queue_props:
890
kfree(hdev->asic_prop.hw_queues_props);
891
return rc;
892
}
893
894
static int gaudi_early_fini(struct hl_device *hdev)
895
{
896
kfree(hdev->asic_prop.hw_queues_props);
897
hl_pci_fini(hdev);
898
899
return 0;
900
}
901
902
/**
903
* gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
904
*
905
* @hdev: pointer to hl_device structure
906
*
907
*/
908
static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
909
{
910
u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
911
struct asic_fixed_properties *prop = &hdev->asic_prop;
912
u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
913
int rc;
914
915
if ((hdev->fw_components & FW_TYPE_LINUX) &&
916
(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
917
struct gaudi_device *gaudi = hdev->asic_specific;
918
919
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
920
return 0;
921
922
rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
923
924
if (rc)
925
return rc;
926
927
freq = pll_freq_arr[2];
928
} else {
929
/* Backward compatibility */
930
div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
931
div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
932
nr = RREG32(mmPSOC_CPU_PLL_NR);
933
nf = RREG32(mmPSOC_CPU_PLL_NF);
934
od = RREG32(mmPSOC_CPU_PLL_OD);
935
936
if (div_sel == DIV_SEL_REF_CLK ||
937
div_sel == DIV_SEL_DIVIDED_REF) {
938
if (div_sel == DIV_SEL_REF_CLK)
939
freq = PLL_REF_CLK;
940
else
941
freq = PLL_REF_CLK / (div_fctr + 1);
942
} else if (div_sel == DIV_SEL_PLL_CLK ||
943
div_sel == DIV_SEL_DIVIDED_PLL) {
944
pll_clk = PLL_REF_CLK * (nf + 1) /
945
((nr + 1) * (od + 1));
946
if (div_sel == DIV_SEL_PLL_CLK)
947
freq = pll_clk;
948
else
949
freq = pll_clk / (div_fctr + 1);
950
} else {
951
dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
952
freq = 0;
953
}
954
}
955
956
prop->psoc_timestamp_frequency = freq;
957
prop->psoc_pci_pll_nr = nr;
958
prop->psoc_pci_pll_nf = nf;
959
prop->psoc_pci_pll_od = od;
960
prop->psoc_pci_pll_div_factor = div_fctr;
961
962
return 0;
963
}
964
965
static int _gaudi_init_tpc_mem(struct hl_device *hdev,
966
dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
967
{
968
struct asic_fixed_properties *prop = &hdev->asic_prop;
969
struct packet_lin_dma *init_tpc_mem_pkt;
970
struct hl_cs_job *job;
971
struct hl_cb *cb;
972
u64 dst_addr;
973
u32 cb_size, ctl;
974
u8 tpc_id;
975
int rc;
976
977
cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
978
if (!cb)
979
return -EFAULT;
980
981
init_tpc_mem_pkt = cb->kernel_address;
982
cb_size = sizeof(*init_tpc_mem_pkt);
983
memset(init_tpc_mem_pkt, 0, cb_size);
984
985
init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
986
987
ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
988
ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
989
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
990
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
991
992
init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
993
994
init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
995
996
/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
997
dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
998
round_up(prop->sram_user_base_address, SZ_8K));
999
init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1000
1001
job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1002
if (!job) {
1003
dev_err(hdev->dev, "Failed to allocate a new job\n");
1004
rc = -ENOMEM;
1005
goto release_cb;
1006
}
1007
1008
job->id = 0;
1009
job->user_cb = cb;
1010
atomic_inc(&job->user_cb->cs_cnt);
1011
job->user_cb_size = cb_size;
1012
job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1013
job->patched_cb = job->user_cb;
1014
job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1015
1016
hl_debugfs_add_job(hdev, job);
1017
1018
rc = gaudi_send_job_on_qman0(hdev, job);
1019
1020
if (rc)
1021
goto free_job;
1022
1023
for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1024
rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1025
if (rc)
1026
break;
1027
}
1028
1029
free_job:
1030
hl_userptr_delete_list(hdev, &job->userptr_list);
1031
hl_debugfs_remove_job(hdev, job);
1032
kfree(job);
1033
atomic_dec(&cb->cs_cnt);
1034
1035
release_cb:
1036
hl_cb_put(cb);
1037
hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1038
1039
return rc;
1040
}
1041
1042
/*
1043
* gaudi_init_tpc_mem() - Initialize TPC memories.
1044
* @hdev: Pointer to hl_device structure.
1045
*
1046
* Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1047
*
1048
* Return: 0 for success, negative value for error.
1049
*/
1050
static int gaudi_init_tpc_mem(struct hl_device *hdev)
1051
{
1052
const struct firmware *fw;
1053
size_t fw_size;
1054
void *cpu_addr;
1055
dma_addr_t dma_handle;
1056
int rc, count = 5;
1057
1058
again:
1059
rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1060
if (rc == -EINTR && count-- > 0) {
1061
msleep(50);
1062
goto again;
1063
}
1064
1065
if (rc) {
1066
dev_err(hdev->dev, "Failed to load firmware file %s\n",
1067
GAUDI_TPC_FW_FILE);
1068
goto out;
1069
}
1070
1071
fw_size = fw->size;
1072
cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1073
if (!cpu_addr) {
1074
dev_err(hdev->dev,
1075
"Failed to allocate %zu of dma memory for TPC kernel\n",
1076
fw_size);
1077
rc = -ENOMEM;
1078
goto out;
1079
}
1080
1081
memcpy(cpu_addr, fw->data, fw_size);
1082
1083
rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1084
1085
hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1086
1087
out:
1088
release_firmware(fw);
1089
return rc;
1090
}
1091
1092
static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1093
{
1094
struct gaudi_device *gaudi = hdev->asic_specific;
1095
struct gaudi_collective_properties *prop = &gaudi->collective_props;
1096
struct hl_hw_queue *q;
1097
u32 i, sob_id, sob_group_id, queue_id;
1098
1099
/* Iterate through SOB groups and assign a SOB for each slave queue */
1100
sob_group_id =
1101
stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1102
sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1103
1104
queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1105
for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1106
q = &hdev->kernel_queues[queue_id + (4 * i)];
1107
q->sync_stream_prop.collective_sob_id = sob_id + i;
1108
}
1109
1110
/* Both DMA5 and TPC7 use the same resources since only a single
1111
* engine need to participate in the reduction process
1112
*/
1113
queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1114
q = &hdev->kernel_queues[queue_id];
1115
q->sync_stream_prop.collective_sob_id =
1116
sob_id + NIC_NUMBER_OF_ENGINES;
1117
1118
queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1119
q = &hdev->kernel_queues[queue_id];
1120
q->sync_stream_prop.collective_sob_id =
1121
sob_id + NIC_NUMBER_OF_ENGINES;
1122
}
1123
1124
static void gaudi_sob_group_hw_reset(struct kref *ref)
1125
{
1126
struct gaudi_hw_sob_group *hw_sob_group =
1127
container_of(ref, struct gaudi_hw_sob_group, kref);
1128
struct hl_device *hdev = hw_sob_group->hdev;
1129
int i;
1130
1131
for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1132
WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1133
(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1134
1135
kref_init(&hw_sob_group->kref);
1136
}
1137
1138
static void gaudi_sob_group_reset_error(struct kref *ref)
1139
{
1140
struct gaudi_hw_sob_group *hw_sob_group =
1141
container_of(ref, struct gaudi_hw_sob_group, kref);
1142
struct hl_device *hdev = hw_sob_group->hdev;
1143
1144
dev_crit(hdev->dev,
1145
"SOB release shouldn't be called here, base_sob_id: %d\n",
1146
hw_sob_group->base_sob_id);
1147
}
1148
1149
static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1150
{
1151
struct gaudi_collective_properties *prop;
1152
int i;
1153
1154
prop = &gaudi->collective_props;
1155
1156
memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1157
1158
for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1159
if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1160
prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1161
BIT(i % HL_MAX_SOBS_PER_MONITOR);
1162
/* Set collective engine bit */
1163
prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1164
BIT(i % HL_MAX_SOBS_PER_MONITOR);
1165
}
1166
1167
static int gaudi_collective_init(struct hl_device *hdev)
1168
{
1169
u32 i, sob_id, reserved_sobs_per_group;
1170
struct gaudi_collective_properties *prop;
1171
struct gaudi_device *gaudi;
1172
1173
gaudi = hdev->asic_specific;
1174
prop = &gaudi->collective_props;
1175
sob_id = hdev->asic_prop.collective_first_sob;
1176
1177
/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1178
reserved_sobs_per_group =
1179
ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1180
1181
/* Init SOB groups */
1182
for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1183
prop->hw_sob_group[i].hdev = hdev;
1184
prop->hw_sob_group[i].base_sob_id = sob_id;
1185
sob_id += reserved_sobs_per_group;
1186
gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1187
}
1188
1189
for (i = 0 ; i < QMAN_STREAMS; i++) {
1190
prop->next_sob_group_val[i] = 1;
1191
prop->curr_sob_group_idx[i] = 0;
1192
gaudi_collective_map_sobs(hdev, i);
1193
}
1194
1195
gaudi_collective_mstr_sob_mask_set(gaudi);
1196
1197
return 0;
1198
}
1199
1200
static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1201
{
1202
struct gaudi_device *gaudi = hdev->asic_specific;
1203
struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1204
1205
kref_put(&cprop->hw_sob_group[sob_group].kref,
1206
gaudi_sob_group_hw_reset);
1207
}
1208
1209
static void gaudi_collective_master_init_job(struct hl_device *hdev,
1210
struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1211
{
1212
u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1213
struct gaudi_collective_properties *cprop;
1214
struct hl_gen_wait_properties wait_prop;
1215
struct hl_sync_stream_properties *prop;
1216
struct gaudi_device *gaudi;
1217
1218
gaudi = hdev->asic_specific;
1219
cprop = &gaudi->collective_props;
1220
queue_id = job->hw_queue_id;
1221
prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1222
1223
master_sob_base =
1224
cprop->hw_sob_group[sob_group_offset].base_sob_id;
1225
master_monitor = prop->collective_mstr_mon_id[0];
1226
1227
cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1228
1229
dev_dbg(hdev->dev,
1230
"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1231
master_sob_base, cprop->mstr_sob_mask[0],
1232
cprop->next_sob_group_val[stream],
1233
master_monitor, queue_id);
1234
1235
wait_prop.data = (void *) job->patched_cb;
1236
wait_prop.sob_base = master_sob_base;
1237
wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1238
wait_prop.sob_val = cprop->next_sob_group_val[stream];
1239
wait_prop.mon_id = master_monitor;
1240
wait_prop.q_idx = queue_id;
1241
wait_prop.size = cb_size;
1242
cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1243
1244
master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1245
master_monitor = prop->collective_mstr_mon_id[1];
1246
1247
dev_dbg(hdev->dev,
1248
"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1249
master_sob_base, cprop->mstr_sob_mask[1],
1250
cprop->next_sob_group_val[stream],
1251
master_monitor, queue_id);
1252
1253
wait_prop.sob_base = master_sob_base;
1254
wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1255
wait_prop.mon_id = master_monitor;
1256
wait_prop.size = cb_size;
1257
cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1258
}
1259
1260
static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1261
struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1262
{
1263
struct hl_gen_wait_properties wait_prop;
1264
struct hl_sync_stream_properties *prop;
1265
u32 queue_id, cb_size = 0;
1266
1267
queue_id = job->hw_queue_id;
1268
prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1269
1270
if (job->cs->encaps_signals) {
1271
/* use the encaps signal handle store earlier in the flow
1272
* and set the SOB information from the encaps
1273
* signals handle
1274
*/
1275
hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1276
cs_cmpl);
1277
1278
dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1279
job->cs->sequence,
1280
cs_cmpl->hw_sob->sob_id,
1281
cs_cmpl->sob_val);
1282
}
1283
1284
/* Add to wait CBs using slave monitor */
1285
wait_prop.data = (void *) job->user_cb;
1286
wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1287
wait_prop.sob_mask = 0x1;
1288
wait_prop.sob_val = cs_cmpl->sob_val;
1289
wait_prop.mon_id = prop->collective_slave_mon_id;
1290
wait_prop.q_idx = queue_id;
1291
wait_prop.size = cb_size;
1292
1293
dev_dbg(hdev->dev,
1294
"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1295
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1296
prop->collective_slave_mon_id, queue_id);
1297
1298
cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1299
1300
dev_dbg(hdev->dev,
1301
"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1302
prop->collective_sob_id, queue_id);
1303
1304
cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1305
prop->collective_sob_id, cb_size, false);
1306
}
1307
1308
static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1309
{
1310
struct hl_cs_compl *signal_cs_cmpl =
1311
container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1312
struct hl_cs_compl *cs_cmpl =
1313
container_of(cs->fence, struct hl_cs_compl, base_fence);
1314
struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1315
struct gaudi_collective_properties *cprop;
1316
u32 stream, queue_id, sob_group_offset;
1317
struct gaudi_device *gaudi;
1318
struct hl_device *hdev;
1319
struct hl_cs_job *job;
1320
struct hl_ctx *ctx;
1321
1322
ctx = cs->ctx;
1323
hdev = ctx->hdev;
1324
gaudi = hdev->asic_specific;
1325
cprop = &gaudi->collective_props;
1326
1327
if (cs->encaps_signals) {
1328
cs_cmpl->hw_sob = handle->hw_sob;
1329
/* at this checkpoint we only need the hw_sob pointer
1330
* for the completion check before start going over the jobs
1331
* of the master/slaves, the sob_value will be taken later on
1332
* in gaudi_collective_slave_init_job depends on each
1333
* job wait offset value.
1334
*/
1335
cs_cmpl->sob_val = 0;
1336
} else {
1337
/* copy the SOB id and value of the signal CS */
1338
cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1339
cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1340
}
1341
1342
/* check again if the signal cs already completed.
1343
* if yes then don't send any wait cs since the hw_sob
1344
* could be in reset already. if signal is not completed
1345
* then get refcount to hw_sob to prevent resetting the sob
1346
* while wait cs is not submitted.
1347
* note that this check is protected by two locks,
1348
* hw queue lock and completion object lock,
1349
* and the same completion object lock also protects
1350
* the hw_sob reset handler function.
1351
* The hw_queue lock prevent out of sync of hw_sob
1352
* refcount value, changed by signal/wait flows.
1353
*/
1354
spin_lock(&signal_cs_cmpl->lock);
1355
1356
if (completion_done(&cs->signal_fence->completion)) {
1357
spin_unlock(&signal_cs_cmpl->lock);
1358
return -EINVAL;
1359
}
1360
/* Increment kref since all slave queues are now waiting on it */
1361
kref_get(&cs_cmpl->hw_sob->kref);
1362
1363
spin_unlock(&signal_cs_cmpl->lock);
1364
1365
/* Calculate the stream from collective master queue (1st job) */
1366
job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1367
stream = job->hw_queue_id % 4;
1368
sob_group_offset =
1369
stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1370
1371
list_for_each_entry(job, &cs->job_list, cs_node) {
1372
queue_id = job->hw_queue_id;
1373
1374
if (hdev->kernel_queues[queue_id].collective_mode ==
1375
HL_COLLECTIVE_MASTER)
1376
gaudi_collective_master_init_job(hdev, job, stream,
1377
sob_group_offset);
1378
else
1379
gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1380
}
1381
1382
cs_cmpl->sob_group = sob_group_offset;
1383
1384
/* Handle sob group kref and wraparound */
1385
kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1386
cprop->next_sob_group_val[stream]++;
1387
1388
if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1389
/*
1390
* Decrement as we reached the max value.
1391
* The release function won't be called here as we've
1392
* just incremented the refcount.
1393
*/
1394
kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1395
gaudi_sob_group_reset_error);
1396
cprop->next_sob_group_val[stream] = 1;
1397
/* only two SOBs are currently in use */
1398
cprop->curr_sob_group_idx[stream] =
1399
(cprop->curr_sob_group_idx[stream] + 1) &
1400
(HL_RSVD_SOBS - 1);
1401
1402
gaudi_collective_map_sobs(hdev, stream);
1403
1404
dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1405
cprop->curr_sob_group_idx[stream], stream);
1406
}
1407
1408
mb();
1409
hl_fence_put(cs->signal_fence);
1410
cs->signal_fence = NULL;
1411
1412
return 0;
1413
}
1414
1415
static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1416
{
1417
u32 cacheline_end, additional_commands;
1418
1419
cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1420
additional_commands = sizeof(struct packet_msg_prot) * 2;
1421
1422
if (user_cb_size + additional_commands > cacheline_end)
1423
return cacheline_end - user_cb_size + additional_commands;
1424
else
1425
return additional_commands;
1426
}
1427
1428
static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1429
struct hl_ctx *ctx, struct hl_cs *cs,
1430
enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1431
u32 encaps_signal_offset)
1432
{
1433
struct hw_queue_properties *hw_queue_prop;
1434
struct hl_cs_counters_atomic *cntr;
1435
struct hl_cs_job *job;
1436
struct hl_cb *cb;
1437
u32 cb_size;
1438
bool patched_cb;
1439
1440
cntr = &hdev->aggregated_cs_counters;
1441
1442
if (mode == HL_COLLECTIVE_MASTER) {
1443
/* CB size of collective master queue contains
1444
* 4 msg short packets for monitor 1 configuration
1445
* 1 fence packet
1446
* 4 msg short packets for monitor 2 configuration
1447
* 1 fence packet
1448
* 2 msg prot packets for completion and MSI
1449
*/
1450
cb_size = sizeof(struct packet_msg_short) * 8 +
1451
sizeof(struct packet_fence) * 2 +
1452
sizeof(struct packet_msg_prot) * 2;
1453
patched_cb = true;
1454
} else {
1455
/* CB size of collective slave queues contains
1456
* 4 msg short packets for monitor configuration
1457
* 1 fence packet
1458
* 1 additional msg short packet for sob signal
1459
*/
1460
cb_size = sizeof(struct packet_msg_short) * 5 +
1461
sizeof(struct packet_fence);
1462
patched_cb = false;
1463
}
1464
1465
hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1466
job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1467
if (!job) {
1468
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1469
atomic64_inc(&cntr->out_of_mem_drop_cnt);
1470
dev_err(hdev->dev, "Failed to allocate a new job\n");
1471
return -ENOMEM;
1472
}
1473
1474
/* Allocate internal mapped CB for non patched CBs */
1475
cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1476
if (!cb) {
1477
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1478
atomic64_inc(&cntr->out_of_mem_drop_cnt);
1479
kfree(job);
1480
return -EFAULT;
1481
}
1482
1483
job->id = 0;
1484
job->cs = cs;
1485
job->user_cb = cb;
1486
atomic_inc(&job->user_cb->cs_cnt);
1487
job->user_cb_size = cb_size;
1488
job->hw_queue_id = queue_id;
1489
1490
/* since its guaranteed to have only one chunk in the collective wait
1491
* cs, we can use this chunk to set the encapsulated signal offset
1492
* in the jobs.
1493
*/
1494
if (cs->encaps_signals)
1495
job->encaps_sig_wait_offset = encaps_signal_offset;
1496
1497
/*
1498
* No need in parsing, user CB is the patched CB.
1499
* We call hl_cb_destroy() out of two reasons - we don't need
1500
* the CB in the CB idr anymore and to decrement its refcount as
1501
* it was incremented inside hl_cb_kernel_create().
1502
*/
1503
if (patched_cb)
1504
job->patched_cb = job->user_cb;
1505
else
1506
job->patched_cb = NULL;
1507
1508
job->job_cb_size = job->user_cb_size;
1509
hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1510
1511
/* increment refcount as for external queues we get completion */
1512
if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1513
cs_get(cs);
1514
1515
cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1516
1517
list_add_tail(&job->cs_node, &cs->job_list);
1518
1519
hl_debugfs_add_job(hdev, job);
1520
1521
return 0;
1522
}
1523
1524
static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1525
struct hl_ctx *ctx, struct hl_cs *cs,
1526
u32 wait_queue_id, u32 collective_engine_id,
1527
u32 encaps_signal_offset)
1528
{
1529
struct gaudi_device *gaudi = hdev->asic_specific;
1530
struct hw_queue_properties *hw_queue_prop;
1531
u32 queue_id, collective_queue, num_jobs;
1532
u32 stream, nic_queue, nic_idx = 0;
1533
bool skip;
1534
int i, rc = 0;
1535
1536
/* Verify wait queue id is configured as master */
1537
hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1538
if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1539
dev_err(hdev->dev,
1540
"Queue %d is not configured as collective master\n",
1541
wait_queue_id);
1542
return -EINVAL;
1543
}
1544
1545
/* Verify engine id is supported */
1546
if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1547
collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1548
dev_err(hdev->dev,
1549
"Collective wait does not support engine %u\n",
1550
collective_engine_id);
1551
return -EINVAL;
1552
}
1553
1554
stream = wait_queue_id % 4;
1555
1556
if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1557
collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1558
else
1559
collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1560
1561
num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1562
nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1563
1564
/* First job goes to the collective master queue, it will wait for
1565
* the collective slave queues to finish execution.
1566
* The synchronization is done using two monitors:
1567
* First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1568
* reduction engine (DMA5/TPC7).
1569
*
1570
* Rest of the jobs goes to the collective slave queues which will
1571
* all wait for the user to signal sob 'cs_cmpl->sob_val'.
1572
*/
1573
for (i = 0 ; i < num_jobs ; i++) {
1574
if (i == 0) {
1575
queue_id = wait_queue_id;
1576
rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1577
HL_COLLECTIVE_MASTER, queue_id,
1578
wait_queue_id, encaps_signal_offset);
1579
} else {
1580
if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1581
if (gaudi->hw_cap_initialized &
1582
BIT(HW_CAP_NIC_SHIFT + nic_idx))
1583
skip = false;
1584
else
1585
skip = true;
1586
1587
queue_id = nic_queue;
1588
nic_queue += 4;
1589
nic_idx++;
1590
1591
if (skip)
1592
continue;
1593
} else {
1594
queue_id = collective_queue;
1595
}
1596
1597
rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1598
HL_COLLECTIVE_SLAVE, queue_id,
1599
wait_queue_id, encaps_signal_offset);
1600
}
1601
1602
if (rc)
1603
return rc;
1604
}
1605
1606
return rc;
1607
}
1608
1609
static int gaudi_late_init(struct hl_device *hdev)
1610
{
1611
struct gaudi_device *gaudi = hdev->asic_specific;
1612
int rc;
1613
1614
rc = gaudi->cpucp_info_get(hdev);
1615
if (rc) {
1616
dev_err(hdev->dev, "Failed to get cpucp info\n");
1617
return rc;
1618
}
1619
1620
if ((hdev->card_type == cpucp_card_type_pci) &&
1621
(hdev->nic_ports_mask & 0x3)) {
1622
dev_info(hdev->dev,
1623
"PCI card detected, only 8 ports are enabled\n");
1624
hdev->nic_ports_mask &= ~0x3;
1625
1626
/* Stop and disable unused NIC QMANs */
1627
WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1628
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1629
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1630
1631
WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1632
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1633
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1634
1635
WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1636
WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1637
1638
gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1639
}
1640
1641
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1642
if (rc)
1643
return rc;
1644
1645
/* Scrub both SRAM and DRAM */
1646
rc = hdev->asic_funcs->scrub_device_mem(hdev);
1647
if (rc)
1648
goto disable_pci_access;
1649
1650
rc = gaudi_fetch_psoc_frequency(hdev);
1651
if (rc) {
1652
dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1653
goto disable_pci_access;
1654
}
1655
1656
rc = gaudi_mmu_clear_pgt_range(hdev);
1657
if (rc) {
1658
dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1659
goto disable_pci_access;
1660
}
1661
1662
rc = gaudi_init_tpc_mem(hdev);
1663
if (rc) {
1664
dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1665
goto disable_pci_access;
1666
}
1667
1668
rc = gaudi_collective_init(hdev);
1669
if (rc) {
1670
dev_err(hdev->dev, "Failed to init collective\n");
1671
goto disable_pci_access;
1672
}
1673
1674
/* We only support a single ASID for the user, so for the sake of optimization, just
1675
* initialize the ASID one time during device initialization with the fixed value of 1
1676
*/
1677
gaudi_mmu_prepare(hdev, 1);
1678
1679
hl_fw_set_pll_profile(hdev);
1680
1681
return 0;
1682
1683
disable_pci_access:
1684
hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1685
1686
return rc;
1687
}
1688
1689
static void gaudi_late_fini(struct hl_device *hdev)
1690
{
1691
hl_hwmon_release_resources(hdev);
1692
}
1693
1694
static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1695
{
1696
dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1697
void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1698
int i, j, rc = 0;
1699
1700
/*
1701
* The device CPU works with 40-bits addresses, while bit 39 must be set
1702
* to '1' when accessing the host.
1703
* Bits 49:39 of the full host address are saved for a later
1704
* configuration of the HW to perform extension to 50 bits.
1705
* Because there is a single HW register that holds the extension bits,
1706
* these bits must be identical in all allocated range.
1707
*/
1708
1709
for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1710
virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1711
&dma_addr_arr[i],
1712
GFP_KERNEL | __GFP_ZERO);
1713
if (!virt_addr_arr[i]) {
1714
rc = -ENOMEM;
1715
goto free_dma_mem_arr;
1716
}
1717
1718
end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1719
if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1720
GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1721
break;
1722
}
1723
1724
if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1725
dev_err(hdev->dev,
1726
"MSB of CPU accessible DMA memory are not identical in all range\n");
1727
rc = -EFAULT;
1728
goto free_dma_mem_arr;
1729
}
1730
1731
hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1732
hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1733
hdev->cpu_pci_msb_addr =
1734
GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1735
1736
if (!hdev->asic_prop.fw_security_enabled)
1737
GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1738
1739
free_dma_mem_arr:
1740
for (j = 0 ; j < i ; j++)
1741
hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1742
dma_addr_arr[j]);
1743
1744
return rc;
1745
}
1746
1747
static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1748
{
1749
struct gaudi_device *gaudi = hdev->asic_specific;
1750
struct gaudi_internal_qman_info *q;
1751
u32 i;
1752
1753
for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1754
q = &gaudi->internal_qmans[i];
1755
if (!q->pq_kernel_addr)
1756
continue;
1757
hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1758
}
1759
}
1760
1761
static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1762
{
1763
struct gaudi_device *gaudi = hdev->asic_specific;
1764
struct gaudi_internal_qman_info *q;
1765
int rc, i;
1766
1767
for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1768
if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1769
continue;
1770
1771
q = &gaudi->internal_qmans[i];
1772
1773
switch (i) {
1774
case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1775
q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1776
break;
1777
case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1778
q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1779
break;
1780
case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1781
q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1782
break;
1783
case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1784
q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1785
break;
1786
default:
1787
dev_err(hdev->dev, "Bad internal queue index %d", i);
1788
rc = -EINVAL;
1789
goto free_internal_qmans_pq_mem;
1790
}
1791
1792
q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1793
GFP_KERNEL | __GFP_ZERO);
1794
if (!q->pq_kernel_addr) {
1795
rc = -ENOMEM;
1796
goto free_internal_qmans_pq_mem;
1797
}
1798
}
1799
1800
return 0;
1801
1802
free_internal_qmans_pq_mem:
1803
gaudi_free_internal_qmans_pq_mem(hdev);
1804
return rc;
1805
}
1806
1807
static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1808
{
1809
struct asic_fixed_properties *prop = &hdev->asic_prop;
1810
struct pci_mem_region *region;
1811
1812
/* CFG */
1813
region = &hdev->pci_mem_region[PCI_REGION_CFG];
1814
region->region_base = CFG_BASE;
1815
region->region_size = CFG_SIZE;
1816
region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1817
region->bar_size = CFG_BAR_SIZE;
1818
region->bar_id = CFG_BAR_ID;
1819
region->used = 1;
1820
1821
/* SRAM */
1822
region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1823
region->region_base = SRAM_BASE_ADDR;
1824
region->region_size = SRAM_SIZE;
1825
region->offset_in_bar = 0;
1826
region->bar_size = SRAM_BAR_SIZE;
1827
region->bar_id = SRAM_BAR_ID;
1828
region->used = 1;
1829
1830
/* DRAM */
1831
region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1832
region->region_base = DRAM_PHYS_BASE;
1833
region->region_size = hdev->asic_prop.dram_size;
1834
region->offset_in_bar = 0;
1835
region->bar_size = prop->dram_pci_bar_size;
1836
region->bar_id = HBM_BAR_ID;
1837
region->used = 1;
1838
1839
/* SP SRAM */
1840
region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1841
region->region_base = PSOC_SCRATCHPAD_ADDR;
1842
region->region_size = PSOC_SCRATCHPAD_SIZE;
1843
region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1844
region->bar_size = CFG_BAR_SIZE;
1845
region->bar_id = CFG_BAR_ID;
1846
region->used = 1;
1847
}
1848
1849
static int gaudi_sw_init(struct hl_device *hdev)
1850
{
1851
struct gaudi_device *gaudi;
1852
u32 i, event_id = 0;
1853
int rc;
1854
1855
/* Allocate device structure */
1856
gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1857
if (!gaudi)
1858
return -ENOMEM;
1859
1860
for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1861
if (gaudi_irq_map_table[i].valid) {
1862
if (event_id == GAUDI_EVENT_SIZE) {
1863
dev_err(hdev->dev,
1864
"Event array exceeds the limit of %u events\n",
1865
GAUDI_EVENT_SIZE);
1866
rc = -EINVAL;
1867
goto free_gaudi_device;
1868
}
1869
1870
gaudi->events[event_id++] =
1871
gaudi_irq_map_table[i].fc_id;
1872
}
1873
}
1874
1875
gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1876
1877
hdev->asic_specific = gaudi;
1878
1879
/* Create DMA pool for small allocations */
1880
hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1881
&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1882
if (!hdev->dma_pool) {
1883
dev_err(hdev->dev, "failed to create DMA pool\n");
1884
rc = -ENOMEM;
1885
goto free_gaudi_device;
1886
}
1887
1888
rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1889
if (rc)
1890
goto free_dma_pool;
1891
1892
hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1893
if (!hdev->cpu_accessible_dma_pool) {
1894
dev_err(hdev->dev,
1895
"Failed to create CPU accessible DMA pool\n");
1896
rc = -ENOMEM;
1897
goto free_cpu_dma_mem;
1898
}
1899
1900
rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1901
(uintptr_t) hdev->cpu_accessible_dma_mem,
1902
HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1903
if (rc) {
1904
dev_err(hdev->dev,
1905
"Failed to add memory to CPU accessible DMA pool\n");
1906
rc = -EFAULT;
1907
goto free_cpu_accessible_dma_pool;
1908
}
1909
1910
rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1911
if (rc)
1912
goto free_cpu_accessible_dma_pool;
1913
1914
spin_lock_init(&gaudi->hw_queues_lock);
1915
1916
hdev->supports_sync_stream = true;
1917
hdev->supports_coresight = true;
1918
hdev->supports_staged_submission = true;
1919
hdev->supports_wait_for_multi_cs = true;
1920
1921
hdev->asic_funcs->set_pci_memory_regions(hdev);
1922
hdev->stream_master_qid_arr =
1923
hdev->asic_funcs->get_stream_master_qid_arr();
1924
hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1925
1926
return 0;
1927
1928
free_cpu_accessible_dma_pool:
1929
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1930
free_cpu_dma_mem:
1931
if (!hdev->asic_prop.fw_security_enabled)
1932
GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1933
hdev->cpu_pci_msb_addr);
1934
hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1935
hdev->cpu_accessible_dma_address);
1936
free_dma_pool:
1937
dma_pool_destroy(hdev->dma_pool);
1938
free_gaudi_device:
1939
kfree(gaudi);
1940
return rc;
1941
}
1942
1943
static int gaudi_sw_fini(struct hl_device *hdev)
1944
{
1945
struct gaudi_device *gaudi = hdev->asic_specific;
1946
1947
gaudi_free_internal_qmans_pq_mem(hdev);
1948
1949
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1950
1951
if (!hdev->asic_prop.fw_security_enabled)
1952
GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1953
hdev->cpu_pci_msb_addr);
1954
1955
hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1956
hdev->cpu_accessible_dma_address);
1957
1958
dma_pool_destroy(hdev->dma_pool);
1959
1960
kfree(gaudi);
1961
1962
return 0;
1963
}
1964
1965
static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1966
{
1967
struct hl_device *hdev = arg;
1968
int i;
1969
1970
if (hdev->disabled)
1971
return IRQ_HANDLED;
1972
1973
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1974
hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1975
1976
hl_irq_handler_eq(irq, &hdev->event_queue);
1977
1978
return IRQ_HANDLED;
1979
}
1980
1981
/*
1982
* For backward compatibility, new MSI interrupts should be set after the
1983
* existing CPU and NIC interrupts.
1984
*/
1985
static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1986
bool cpu_eq)
1987
{
1988
int msi_vec;
1989
1990
if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1991
dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1992
GAUDI_EVENT_QUEUE_MSI_IDX);
1993
1994
msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1995
(nr + NIC_NUMBER_OF_ENGINES + 1);
1996
1997
return pci_irq_vector(hdev->pdev, msi_vec);
1998
}
1999
2000
static int gaudi_enable_msi_single(struct hl_device *hdev)
2001
{
2002
int rc, irq;
2003
2004
dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2005
2006
irq = gaudi_pci_irq_vector(hdev, 0, false);
2007
rc = request_irq(irq, gaudi_irq_handler_single, 0,
2008
"gaudi single msi", hdev);
2009
if (rc)
2010
dev_err(hdev->dev,
2011
"Failed to request single MSI IRQ\n");
2012
2013
return rc;
2014
}
2015
2016
static int gaudi_enable_msi(struct hl_device *hdev)
2017
{
2018
struct gaudi_device *gaudi = hdev->asic_specific;
2019
int rc;
2020
2021
if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2022
return 0;
2023
2024
rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2025
if (rc < 0) {
2026
dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2027
return rc;
2028
}
2029
2030
rc = gaudi_enable_msi_single(hdev);
2031
if (rc)
2032
goto free_pci_irq_vectors;
2033
2034
gaudi->hw_cap_initialized |= HW_CAP_MSI;
2035
2036
return 0;
2037
2038
free_pci_irq_vectors:
2039
pci_free_irq_vectors(hdev->pdev);
2040
return rc;
2041
}
2042
2043
static void gaudi_sync_irqs(struct hl_device *hdev)
2044
{
2045
struct gaudi_device *gaudi = hdev->asic_specific;
2046
2047
if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2048
return;
2049
2050
/* Wait for all pending IRQs to be finished */
2051
synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2052
}
2053
2054
static void gaudi_disable_msi(struct hl_device *hdev)
2055
{
2056
struct gaudi_device *gaudi = hdev->asic_specific;
2057
2058
if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2059
return;
2060
2061
gaudi_sync_irqs(hdev);
2062
free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2063
pci_free_irq_vectors(hdev->pdev);
2064
2065
gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2066
}
2067
2068
static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2069
{
2070
struct gaudi_device *gaudi = hdev->asic_specific;
2071
2072
if (hdev->asic_prop.fw_security_enabled)
2073
return;
2074
2075
if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2076
CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2077
return;
2078
2079
if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2080
return;
2081
2082
WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2083
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2084
WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2085
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2086
WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2087
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2088
WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2089
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2090
WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2091
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2092
WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2093
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2094
WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2095
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2096
WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2097
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2098
2099
WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2100
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2101
WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2102
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2103
WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2104
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2105
WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2106
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2107
WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2108
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2109
WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2110
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2111
WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2112
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2113
WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2114
1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2115
2116
WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2117
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2118
WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2119
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2120
WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2121
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2122
WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2123
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2124
WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2125
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2126
WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2127
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2128
WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2129
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2130
WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2131
1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2132
2133
gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2134
}
2135
2136
static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2137
{
2138
struct gaudi_device *gaudi = hdev->asic_specific;
2139
2140
if (hdev->asic_prop.fw_security_enabled)
2141
return;
2142
2143
if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2144
CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2145
return;
2146
2147
if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2148
return;
2149
2150
WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2151
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2152
WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2153
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2154
WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2155
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2156
WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2157
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2158
WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2159
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2160
WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2161
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2162
WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2163
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2164
WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2165
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2166
2167
WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2168
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2169
WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2170
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2171
WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2172
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2173
WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2174
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2175
WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2176
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2177
WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2178
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2179
WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2180
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2181
WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2182
1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2183
2184
WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2185
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2186
WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2187
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2188
WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2189
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2190
WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2191
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2192
WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2193
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2194
WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2195
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2196
WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2197
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2198
WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2199
1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2200
2201
gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2202
}
2203
2204
static void gaudi_init_e2e(struct hl_device *hdev)
2205
{
2206
if (hdev->asic_prop.fw_security_enabled)
2207
return;
2208
2209
if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2210
CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2211
return;
2212
2213
WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2214
WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2215
WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2216
WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2217
2218
WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2219
WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2220
WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2221
WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2222
2223
WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2224
WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2225
WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2226
WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2227
2228
WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2229
WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2230
WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2231
WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2232
2233
WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2234
WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2235
WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2236
WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2237
2238
WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2239
WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2240
WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2241
WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2242
2243
WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2244
WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2245
WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2246
WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2247
2248
WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2249
WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2250
WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2251
WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2252
2253
WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2254
WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2255
WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2256
WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2257
2258
WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2259
WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2260
WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2261
WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2262
2263
WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2264
WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2265
WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2266
WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2267
2268
WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2269
WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2270
WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2271
WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2272
2273
WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2274
WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2275
WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2276
WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2277
2278
WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2279
WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2280
WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2281
WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2282
2283
WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2284
WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2285
WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2286
WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2287
2288
WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2289
WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2290
WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2291
WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2292
2293
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2294
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2295
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2296
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2297
2298
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2299
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2300
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2301
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2302
2303
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2304
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2305
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2306
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2307
2308
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2309
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2310
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2311
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2312
2313
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2314
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2315
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2316
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2317
2318
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2319
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2320
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2321
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2322
2323
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2324
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2325
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2326
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2327
2328
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2329
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2330
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2331
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2332
2333
WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2334
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2335
WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2336
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2337
2338
WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2339
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2340
WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2341
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2342
2343
WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2344
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2345
WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2346
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2347
2348
WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2349
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2350
WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2351
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2352
2353
WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2354
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2355
WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2356
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2357
2358
WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2359
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2360
WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2361
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2362
2363
WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2364
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2365
WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2366
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2367
2368
WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2369
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2370
WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2371
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2372
2373
WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2374
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2375
WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2376
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2377
2378
WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2379
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2380
WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2381
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2382
2383
WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2384
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2385
WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2386
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2387
2388
WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2389
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2390
WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2391
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2392
2393
WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2394
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2395
WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2396
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2397
2398
WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2399
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2400
WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2401
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2402
2403
WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2404
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2405
WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2406
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2407
2408
WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2409
1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2410
WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2411
1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2412
2413
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2414
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2415
WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2416
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2417
2418
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2419
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2420
WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2421
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2422
2423
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2424
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2425
WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2426
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2427
2428
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2429
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2430
WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2431
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2432
2433
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2434
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2435
WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2436
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2437
2438
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2439
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2440
WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2441
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2442
2443
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2444
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2445
WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2446
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2447
2448
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2449
1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2450
WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2451
1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2452
}
2453
2454
static void gaudi_init_hbm_cred(struct hl_device *hdev)
2455
{
2456
u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2457
2458
if (hdev->asic_prop.fw_security_enabled)
2459
return;
2460
2461
if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2462
CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2463
return;
2464
2465
hbm0_wr = 0x33333333;
2466
hbm0_rd = 0x77777777;
2467
hbm1_wr = 0x55555555;
2468
hbm1_rd = 0xDDDDDDDD;
2469
2470
WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2471
WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2472
WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2473
WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2474
2475
WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2476
WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2477
WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2478
WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2479
2480
WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2481
WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2482
WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2483
WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2484
2485
WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2486
WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2487
WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2488
WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2489
2490
WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2491
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2492
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2493
WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2494
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2495
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2496
WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2497
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2498
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2499
WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2500
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2501
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2502
2503
WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2504
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2505
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2506
WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2507
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2508
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2509
WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2510
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2511
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2512
WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2513
(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2514
(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2515
}
2516
2517
static void gaudi_init_golden_registers(struct hl_device *hdev)
2518
{
2519
u32 tpc_offset;
2520
int tpc_id, i;
2521
2522
gaudi_init_e2e(hdev);
2523
gaudi_init_hbm_cred(hdev);
2524
2525
for (tpc_id = 0, tpc_offset = 0;
2526
tpc_id < TPC_NUMBER_OF_ENGINES;
2527
tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2528
/* Mask all arithmetic interrupts from TPC */
2529
WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2530
/* Set 16 cache lines */
2531
WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2532
ICACHE_FETCH_LINE_NUM, 2);
2533
}
2534
2535
/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2536
for (i = 0 ; i < 128 ; i += 8)
2537
writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2538
2539
WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540
WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541
WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542
WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2543
}
2544
2545
static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2546
int qman_id, dma_addr_t qman_pq_addr)
2547
{
2548
struct cpu_dyn_regs *dyn_regs =
2549
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2550
u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2551
u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2552
u32 q_off, dma_qm_offset;
2553
u32 dma_qm_err_cfg, irq_handler_offset;
2554
2555
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2556
2557
mtr_base_en_lo = lower_32_bits(CFG_BASE +
2558
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2559
mtr_base_en_hi = upper_32_bits(CFG_BASE +
2560
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2561
so_base_en_lo = lower_32_bits(CFG_BASE +
2562
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2563
so_base_en_hi = upper_32_bits(CFG_BASE +
2564
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2565
mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2566
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2567
mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2568
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2569
so_base_ws_lo = lower_32_bits(CFG_BASE +
2570
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2571
so_base_ws_hi = upper_32_bits(CFG_BASE +
2572
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2573
2574
q_off = dma_qm_offset + qman_id * 4;
2575
2576
WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2577
WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2578
2579
WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2580
WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2581
WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2582
2583
WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2584
WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2585
QMAN_LDMA_SRC_OFFSET);
2586
WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2587
QMAN_LDMA_DST_OFFSET);
2588
2589
WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2590
WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2591
WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2592
WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2593
WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2594
WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2595
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2596
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2597
2598
WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2599
2600
/* The following configuration is needed only once per QMAN */
2601
if (qman_id == 0) {
2602
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2603
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2604
le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2605
2606
/* Configure RAZWI IRQ */
2607
dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2608
if (hdev->stop_on_err)
2609
dma_qm_err_cfg |=
2610
PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2611
2612
WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2613
2614
WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2615
lower_32_bits(CFG_BASE + irq_handler_offset));
2616
WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2617
upper_32_bits(CFG_BASE + irq_handler_offset));
2618
2619
WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2620
gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2621
dma_id);
2622
2623
WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2624
QM_ARB_ERR_MSG_EN_MASK);
2625
2626
/* Set timeout to maximum */
2627
WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2628
2629
WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2630
QMAN_EXTERNAL_MAKE_TRUSTED);
2631
2632
WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2633
}
2634
}
2635
2636
static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2637
{
2638
struct cpu_dyn_regs *dyn_regs =
2639
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2640
u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2641
u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2642
u32 irq_handler_offset;
2643
2644
/* Set to maximum possible according to physical size */
2645
WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2646
WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2647
2648
/* WA for H/W bug H3-2116 */
2649
WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2650
2651
/* STOP_ON bit implies no completion to operation in case of RAZWI */
2652
if (hdev->stop_on_err)
2653
dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2654
2655
WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2656
2657
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2658
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2659
le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2660
2661
WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2662
lower_32_bits(CFG_BASE + irq_handler_offset));
2663
WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2664
upper_32_bits(CFG_BASE + irq_handler_offset));
2665
2666
WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2667
gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2668
WREG32(mmDMA0_CORE_PROT + dma_offset,
2669
1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2670
/* If the channel is secured, it should be in MMU bypass mode */
2671
WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2672
1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2673
WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2674
}
2675
2676
static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2677
u32 enable_mask)
2678
{
2679
u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2680
2681
WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2682
}
2683
2684
static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2685
{
2686
struct gaudi_device *gaudi = hdev->asic_specific;
2687
struct hl_hw_queue *q;
2688
int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2689
2690
if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2691
return;
2692
2693
for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2694
dma_id = gaudi_dma_assignment[i];
2695
/*
2696
* For queues after the CPU Q need to add 1 to get the correct
2697
* queue. In addition, need to add the CPU EQ and NIC IRQs in
2698
* order to get the correct MSI register.
2699
*/
2700
if (dma_id > 1) {
2701
cpu_skip = 1;
2702
nic_skip = NIC_NUMBER_OF_ENGINES;
2703
} else {
2704
cpu_skip = 0;
2705
nic_skip = 0;
2706
}
2707
2708
for (j = 0 ; j < QMAN_STREAMS ; j++) {
2709
q_idx = 4 * dma_id + j + cpu_skip;
2710
q = &hdev->kernel_queues[q_idx];
2711
q->cq_id = cq_id++;
2712
q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2713
gaudi_init_pci_dma_qman(hdev, dma_id, j,
2714
q->bus_address);
2715
}
2716
2717
gaudi_init_dma_core(hdev, dma_id);
2718
2719
gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2720
}
2721
2722
gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2723
}
2724
2725
static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2726
int qman_id, u64 qman_base_addr)
2727
{
2728
struct cpu_dyn_regs *dyn_regs =
2729
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2730
u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2731
u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2732
u32 dma_qm_err_cfg, irq_handler_offset;
2733
u32 q_off, dma_qm_offset;
2734
2735
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2736
2737
mtr_base_en_lo = lower_32_bits(CFG_BASE +
2738
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2739
mtr_base_en_hi = upper_32_bits(CFG_BASE +
2740
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2741
so_base_en_lo = lower_32_bits(CFG_BASE +
2742
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2743
so_base_en_hi = upper_32_bits(CFG_BASE +
2744
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2745
mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2746
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2747
mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2748
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2749
so_base_ws_lo = lower_32_bits(CFG_BASE +
2750
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2751
so_base_ws_hi = upper_32_bits(CFG_BASE +
2752
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2753
2754
q_off = dma_qm_offset + qman_id * 4;
2755
2756
if (qman_id < 4) {
2757
WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2758
lower_32_bits(qman_base_addr));
2759
WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2760
upper_32_bits(qman_base_addr));
2761
2762
WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2763
WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2764
WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2765
2766
WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2767
QMAN_CPDMA_SIZE_OFFSET);
2768
WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2769
QMAN_CPDMA_SRC_OFFSET);
2770
WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2771
QMAN_CPDMA_DST_OFFSET);
2772
} else {
2773
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2774
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2775
le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2776
2777
WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2778
QMAN_LDMA_SIZE_OFFSET);
2779
WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2780
QMAN_LDMA_SRC_OFFSET);
2781
WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2782
QMAN_LDMA_DST_OFFSET);
2783
2784
/* Configure RAZWI IRQ */
2785
dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2786
if (hdev->stop_on_err)
2787
dma_qm_err_cfg |=
2788
HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2789
2790
WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2791
2792
WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2793
lower_32_bits(CFG_BASE + irq_handler_offset));
2794
WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2795
upper_32_bits(CFG_BASE + irq_handler_offset));
2796
2797
WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2798
gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2799
dma_id);
2800
2801
WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2802
QM_ARB_ERR_MSG_EN_MASK);
2803
2804
/* Set timeout to maximum */
2805
WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2806
2807
WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2808
WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2809
QMAN_INTERNAL_MAKE_TRUSTED);
2810
}
2811
2812
WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2813
WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2814
WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2815
WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2816
2817
/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2818
if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2819
WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2820
mtr_base_ws_lo);
2821
WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2822
mtr_base_ws_hi);
2823
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2824
so_base_ws_lo);
2825
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2826
so_base_ws_hi);
2827
}
2828
}
2829
2830
static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2831
{
2832
struct gaudi_device *gaudi = hdev->asic_specific;
2833
struct gaudi_internal_qman_info *q;
2834
u64 qman_base_addr;
2835
int i, j, dma_id, internal_q_index;
2836
2837
if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2838
return;
2839
2840
for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2841
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2842
2843
for (j = 0 ; j < QMAN_STREAMS ; j++) {
2844
/*
2845
* Add the CPU queue in order to get the correct queue
2846
* number as all internal queue are placed after it
2847
*/
2848
internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2849
2850
q = &gaudi->internal_qmans[internal_q_index];
2851
qman_base_addr = (u64) q->pq_dma_addr;
2852
gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2853
qman_base_addr);
2854
}
2855
2856
/* Initializing lower CP for HBM DMA QMAN */
2857
gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2858
2859
gaudi_init_dma_core(hdev, dma_id);
2860
2861
gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2862
}
2863
2864
gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2865
}
2866
2867
static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2868
int qman_id, u64 qman_base_addr)
2869
{
2870
struct cpu_dyn_regs *dyn_regs =
2871
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2872
u32 mtr_base_lo, mtr_base_hi;
2873
u32 so_base_lo, so_base_hi;
2874
u32 irq_handler_offset;
2875
u32 q_off, mme_id;
2876
u32 mme_qm_err_cfg;
2877
2878
mtr_base_lo = lower_32_bits(CFG_BASE +
2879
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2880
mtr_base_hi = upper_32_bits(CFG_BASE +
2881
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2882
so_base_lo = lower_32_bits(CFG_BASE +
2883
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2884
so_base_hi = upper_32_bits(CFG_BASE +
2885
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2886
2887
q_off = mme_offset + qman_id * 4;
2888
2889
if (qman_id < 4) {
2890
WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2891
lower_32_bits(qman_base_addr));
2892
WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2893
upper_32_bits(qman_base_addr));
2894
2895
WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2896
WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2897
WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2898
2899
WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2900
QMAN_CPDMA_SIZE_OFFSET);
2901
WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2902
QMAN_CPDMA_SRC_OFFSET);
2903
WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2904
QMAN_CPDMA_DST_OFFSET);
2905
} else {
2906
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2907
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2908
le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2909
2910
WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2911
QMAN_LDMA_SIZE_OFFSET);
2912
WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2913
QMAN_LDMA_SRC_OFFSET);
2914
WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2915
QMAN_LDMA_DST_OFFSET);
2916
2917
/* Configure RAZWI IRQ */
2918
mme_id = mme_offset /
2919
(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2920
2921
mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2922
if (hdev->stop_on_err)
2923
mme_qm_err_cfg |=
2924
MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2925
2926
WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2927
2928
WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2929
lower_32_bits(CFG_BASE + irq_handler_offset));
2930
WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2931
upper_32_bits(CFG_BASE + irq_handler_offset));
2932
2933
WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2934
gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2935
mme_id);
2936
2937
WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2938
QM_ARB_ERR_MSG_EN_MASK);
2939
2940
/* Set timeout to maximum */
2941
WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2942
2943
WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2944
WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2945
QMAN_INTERNAL_MAKE_TRUSTED);
2946
}
2947
2948
WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2949
WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2950
WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2951
WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2952
}
2953
2954
static void gaudi_init_mme_qmans(struct hl_device *hdev)
2955
{
2956
struct gaudi_device *gaudi = hdev->asic_specific;
2957
struct gaudi_internal_qman_info *q;
2958
u64 qman_base_addr;
2959
u32 mme_offset;
2960
int i, internal_q_index;
2961
2962
if (gaudi->hw_cap_initialized & HW_CAP_MME)
2963
return;
2964
2965
/*
2966
* map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2967
* and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2968
*/
2969
2970
mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2971
2972
for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2973
internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2974
q = &gaudi->internal_qmans[internal_q_index];
2975
qman_base_addr = (u64) q->pq_dma_addr;
2976
gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2977
qman_base_addr);
2978
if (i == 3)
2979
mme_offset = 0;
2980
}
2981
2982
/* Initializing lower CP for MME QMANs */
2983
mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2984
gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2985
gaudi_init_mme_qman(hdev, 0, 4, 0);
2986
2987
WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988
WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2989
2990
gaudi->hw_cap_initialized |= HW_CAP_MME;
2991
}
2992
2993
static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2994
int qman_id, u64 qman_base_addr)
2995
{
2996
struct cpu_dyn_regs *dyn_regs =
2997
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2998
u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2999
u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3000
u32 tpc_qm_err_cfg, irq_handler_offset;
3001
u32 q_off, tpc_id;
3002
3003
mtr_base_en_lo = lower_32_bits(CFG_BASE +
3004
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3005
mtr_base_en_hi = upper_32_bits(CFG_BASE +
3006
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007
so_base_en_lo = lower_32_bits(CFG_BASE +
3008
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3009
so_base_en_hi = upper_32_bits(CFG_BASE +
3010
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011
mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3012
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3013
mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3014
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015
so_base_ws_lo = lower_32_bits(CFG_BASE +
3016
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3017
so_base_ws_hi = upper_32_bits(CFG_BASE +
3018
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019
3020
q_off = tpc_offset + qman_id * 4;
3021
3022
tpc_id = tpc_offset /
3023
(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3024
3025
if (qman_id < 4) {
3026
WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3027
lower_32_bits(qman_base_addr));
3028
WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3029
upper_32_bits(qman_base_addr));
3030
3031
WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3032
WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3033
WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3034
3035
WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3036
QMAN_CPDMA_SIZE_OFFSET);
3037
WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3038
QMAN_CPDMA_SRC_OFFSET);
3039
WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3040
QMAN_CPDMA_DST_OFFSET);
3041
} else {
3042
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3043
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3044
le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3045
3046
WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3047
QMAN_LDMA_SIZE_OFFSET);
3048
WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3049
QMAN_LDMA_SRC_OFFSET);
3050
WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3051
QMAN_LDMA_DST_OFFSET);
3052
3053
/* Configure RAZWI IRQ */
3054
tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3055
if (hdev->stop_on_err)
3056
tpc_qm_err_cfg |=
3057
TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3058
3059
WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3060
3061
WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3062
lower_32_bits(CFG_BASE + irq_handler_offset));
3063
WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3064
upper_32_bits(CFG_BASE + irq_handler_offset));
3065
3066
WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3067
gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3068
tpc_id);
3069
3070
WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3071
QM_ARB_ERR_MSG_EN_MASK);
3072
3073
/* Set timeout to maximum */
3074
WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3075
3076
WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3077
WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3078
QMAN_INTERNAL_MAKE_TRUSTED);
3079
}
3080
3081
WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3082
WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3083
WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3084
WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3085
3086
/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3087
if (tpc_id == 6) {
3088
WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3089
mtr_base_ws_lo);
3090
WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3091
mtr_base_ws_hi);
3092
WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3093
so_base_ws_lo);
3094
WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3095
so_base_ws_hi);
3096
}
3097
}
3098
3099
static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3100
{
3101
struct gaudi_device *gaudi = hdev->asic_specific;
3102
struct gaudi_internal_qman_info *q;
3103
u64 qman_base_addr;
3104
u32 so_base_hi, tpc_offset = 0;
3105
u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3106
mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3107
int i, tpc_id, internal_q_index;
3108
3109
if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3110
return;
3111
3112
so_base_hi = upper_32_bits(CFG_BASE +
3113
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3114
3115
for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3116
for (i = 0 ; i < QMAN_STREAMS ; i++) {
3117
internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3118
tpc_id * QMAN_STREAMS + i;
3119
q = &gaudi->internal_qmans[internal_q_index];
3120
qman_base_addr = (u64) q->pq_dma_addr;
3121
gaudi_init_tpc_qman(hdev, tpc_offset, i,
3122
qman_base_addr);
3123
3124
if (i == 3) {
3125
/* Initializing lower CP for TPC QMAN */
3126
gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3127
3128
/* Enable the QMAN and TPC channel */
3129
WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3130
QMAN_TPC_ENABLE);
3131
}
3132
}
3133
3134
WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3135
so_base_hi);
3136
3137
tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3138
3139
gaudi->hw_cap_initialized |=
3140
FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3141
}
3142
}
3143
3144
static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3145
int qman_id, u64 qman_base_addr, int nic_id)
3146
{
3147
struct cpu_dyn_regs *dyn_regs =
3148
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3149
u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3150
u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3151
u32 nic_qm_err_cfg, irq_handler_offset;
3152
u32 q_off;
3153
3154
mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3155
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3156
mtr_base_en_hi = upper_32_bits(CFG_BASE +
3157
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3158
so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3159
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3160
so_base_en_hi = upper_32_bits(CFG_BASE +
3161
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3162
mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3163
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3164
mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3165
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3166
so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3167
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3168
so_base_ws_hi = upper_32_bits(CFG_BASE +
3169
mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3170
3171
q_off = nic_offset + qman_id * 4;
3172
3173
WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3174
WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3175
3176
WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3177
WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3178
WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3179
3180
WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3181
QMAN_LDMA_SIZE_OFFSET);
3182
WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3183
QMAN_LDMA_SRC_OFFSET);
3184
WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3185
QMAN_LDMA_DST_OFFSET);
3186
3187
WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3188
WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3189
WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3190
WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3191
3192
/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3193
WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3194
WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3195
WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3196
WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3197
3198
if (qman_id == 0) {
3199
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3200
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3201
le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3202
3203
/* Configure RAZWI IRQ */
3204
nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3205
if (hdev->stop_on_err)
3206
nic_qm_err_cfg |=
3207
NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3208
3209
WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3210
3211
WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3212
lower_32_bits(CFG_BASE + irq_handler_offset));
3213
WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3214
upper_32_bits(CFG_BASE + irq_handler_offset));
3215
3216
WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3217
gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3218
nic_id);
3219
3220
WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3221
QM_ARB_ERR_MSG_EN_MASK);
3222
3223
/* Set timeout to maximum */
3224
WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3225
3226
WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3227
WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3228
QMAN_INTERNAL_MAKE_TRUSTED);
3229
}
3230
}
3231
3232
static void gaudi_init_nic_qmans(struct hl_device *hdev)
3233
{
3234
struct gaudi_device *gaudi = hdev->asic_specific;
3235
struct gaudi_internal_qman_info *q;
3236
u64 qman_base_addr;
3237
u32 nic_offset = 0;
3238
u32 nic_delta_between_qmans =
3239
mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3240
u32 nic_delta_between_nics =
3241
mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3242
int i, nic_id, internal_q_index;
3243
3244
if (!hdev->nic_ports_mask)
3245
return;
3246
3247
if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3248
return;
3249
3250
dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3251
3252
for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3253
if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3254
nic_offset += nic_delta_between_qmans;
3255
if (nic_id & 1) {
3256
nic_offset -= (nic_delta_between_qmans * 2);
3257
nic_offset += nic_delta_between_nics;
3258
}
3259
continue;
3260
}
3261
3262
for (i = 0 ; i < QMAN_STREAMS ; i++) {
3263
internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3264
nic_id * QMAN_STREAMS + i;
3265
q = &gaudi->internal_qmans[internal_q_index];
3266
qman_base_addr = (u64) q->pq_dma_addr;
3267
gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3268
qman_base_addr, nic_id);
3269
}
3270
3271
/* Enable the QMAN */
3272
WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3273
3274
nic_offset += nic_delta_between_qmans;
3275
if (nic_id & 1) {
3276
nic_offset -= (nic_delta_between_qmans * 2);
3277
nic_offset += nic_delta_between_nics;
3278
}
3279
3280
gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3281
}
3282
}
3283
3284
static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3285
{
3286
struct gaudi_device *gaudi = hdev->asic_specific;
3287
3288
if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3289
return;
3290
3291
WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3292
WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3293
WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3294
}
3295
3296
static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3297
{
3298
struct gaudi_device *gaudi = hdev->asic_specific;
3299
3300
if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3301
return;
3302
3303
WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3304
WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3305
WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3306
WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3307
WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3308
}
3309
3310
static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3311
{
3312
struct gaudi_device *gaudi = hdev->asic_specific;
3313
3314
if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3315
return;
3316
3317
WREG32(mmMME2_QM_GLBL_CFG0, 0);
3318
WREG32(mmMME0_QM_GLBL_CFG0, 0);
3319
}
3320
3321
static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3322
{
3323
struct gaudi_device *gaudi = hdev->asic_specific;
3324
u32 tpc_offset = 0;
3325
int tpc_id;
3326
3327
if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3328
return;
3329
3330
for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3331
WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3332
tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3333
}
3334
}
3335
3336
static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3337
{
3338
struct gaudi_device *gaudi = hdev->asic_specific;
3339
u32 nic_mask, nic_offset = 0;
3340
u32 nic_delta_between_qmans =
3341
mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3342
u32 nic_delta_between_nics =
3343
mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3344
int nic_id;
3345
3346
for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3347
nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3348
3349
if (gaudi->hw_cap_initialized & nic_mask)
3350
WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3351
3352
nic_offset += nic_delta_between_qmans;
3353
if (nic_id & 1) {
3354
nic_offset -= (nic_delta_between_qmans * 2);
3355
nic_offset += nic_delta_between_nics;
3356
}
3357
}
3358
}
3359
3360
static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3361
{
3362
struct gaudi_device *gaudi = hdev->asic_specific;
3363
3364
if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3365
return;
3366
3367
/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3368
WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3369
WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3370
WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371
}
3372
3373
static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3374
{
3375
struct gaudi_device *gaudi = hdev->asic_specific;
3376
3377
if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3378
return;
3379
3380
/* Stop CPs of HBM DMA QMANs */
3381
3382
WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3383
WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3384
WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385
WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386
WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387
}
3388
3389
static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3390
{
3391
struct gaudi_device *gaudi = hdev->asic_specific;
3392
3393
if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3394
return;
3395
3396
/* Stop CPs of MME QMANs */
3397
WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3398
WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3399
}
3400
3401
static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3402
{
3403
struct gaudi_device *gaudi = hdev->asic_specific;
3404
3405
if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3406
return;
3407
3408
WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3409
WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3410
WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411
WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412
WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413
WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414
WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415
WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416
}
3417
3418
static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3419
{
3420
struct gaudi_device *gaudi = hdev->asic_specific;
3421
3422
/* Stop upper CPs of QMANs */
3423
3424
if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3425
WREG32(mmNIC0_QM0_GLBL_CFG1,
3426
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3427
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3428
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3429
3430
if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3431
WREG32(mmNIC0_QM1_GLBL_CFG1,
3432
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3433
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3434
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3435
3436
if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3437
WREG32(mmNIC1_QM0_GLBL_CFG1,
3438
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3439
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3440
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3441
3442
if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3443
WREG32(mmNIC1_QM1_GLBL_CFG1,
3444
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3445
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3446
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3447
3448
if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3449
WREG32(mmNIC2_QM0_GLBL_CFG1,
3450
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3451
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3452
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3453
3454
if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3455
WREG32(mmNIC2_QM1_GLBL_CFG1,
3456
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3457
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3458
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3459
3460
if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3461
WREG32(mmNIC3_QM0_GLBL_CFG1,
3462
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3463
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3464
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3465
3466
if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3467
WREG32(mmNIC3_QM1_GLBL_CFG1,
3468
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3469
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3470
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3471
3472
if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3473
WREG32(mmNIC4_QM0_GLBL_CFG1,
3474
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3475
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3476
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3477
3478
if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3479
WREG32(mmNIC4_QM1_GLBL_CFG1,
3480
NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3481
NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3482
NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3483
}
3484
3485
static void gaudi_pci_dma_stall(struct hl_device *hdev)
3486
{
3487
struct gaudi_device *gaudi = hdev->asic_specific;
3488
3489
if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3490
return;
3491
3492
WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3493
WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3494
WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495
}
3496
3497
static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3498
{
3499
struct gaudi_device *gaudi = hdev->asic_specific;
3500
3501
if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3502
return;
3503
3504
WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3505
WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3506
WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507
WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508
WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509
}
3510
3511
static void gaudi_mme_stall(struct hl_device *hdev)
3512
{
3513
struct gaudi_device *gaudi = hdev->asic_specific;
3514
3515
if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3516
return;
3517
3518
/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3519
WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3520
WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3521
WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3522
WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3523
WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524
WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3525
WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526
WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3527
WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528
WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3529
WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530
WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3531
WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532
WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3533
WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534
WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3535
}
3536
3537
static void gaudi_tpc_stall(struct hl_device *hdev)
3538
{
3539
struct gaudi_device *gaudi = hdev->asic_specific;
3540
3541
if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3542
return;
3543
3544
WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3545
WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3546
WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547
WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548
WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549
WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550
WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551
WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552
}
3553
3554
static void gaudi_disable_clock_gating(struct hl_device *hdev)
3555
{
3556
u32 qman_offset;
3557
int i;
3558
3559
if (hdev->asic_prop.fw_security_enabled)
3560
return;
3561
3562
for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3563
WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3564
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3565
3566
qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3567
}
3568
3569
WREG32(mmMME0_QM_CGM_CFG, 0);
3570
WREG32(mmMME0_QM_CGM_CFG1, 0);
3571
WREG32(mmMME2_QM_CGM_CFG, 0);
3572
WREG32(mmMME2_QM_CGM_CFG1, 0);
3573
3574
for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3575
WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3576
WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3577
3578
qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3579
}
3580
}
3581
3582
static void gaudi_enable_timestamp(struct hl_device *hdev)
3583
{
3584
/* Disable the timestamp counter */
3585
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3586
3587
/* Zero the lower/upper parts of the 64-bit counter */
3588
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3589
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3590
3591
/* Enable the counter */
3592
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3593
}
3594
3595
static void gaudi_disable_timestamp(struct hl_device *hdev)
3596
{
3597
/* Disable the timestamp counter */
3598
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3599
}
3600
3601
static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3602
{
3603
u32 wait_timeout_ms;
3604
3605
if (hdev->pldm)
3606
wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3607
else
3608
wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3609
3610
if (fw_reset)
3611
goto skip_engines;
3612
3613
gaudi_stop_nic_qmans(hdev);
3614
gaudi_stop_mme_qmans(hdev);
3615
gaudi_stop_tpc_qmans(hdev);
3616
gaudi_stop_hbm_dma_qmans(hdev);
3617
gaudi_stop_pci_dma_qmans(hdev);
3618
3619
msleep(wait_timeout_ms);
3620
3621
gaudi_pci_dma_stall(hdev);
3622
gaudi_hbm_dma_stall(hdev);
3623
gaudi_tpc_stall(hdev);
3624
gaudi_mme_stall(hdev);
3625
3626
msleep(wait_timeout_ms);
3627
3628
gaudi_disable_nic_qmans(hdev);
3629
gaudi_disable_mme_qmans(hdev);
3630
gaudi_disable_tpc_qmans(hdev);
3631
gaudi_disable_hbm_dma_qmans(hdev);
3632
gaudi_disable_pci_dma_qmans(hdev);
3633
3634
gaudi_disable_timestamp(hdev);
3635
3636
skip_engines:
3637
gaudi_disable_msi(hdev);
3638
}
3639
3640
static int gaudi_mmu_init(struct hl_device *hdev)
3641
{
3642
struct asic_fixed_properties *prop = &hdev->asic_prop;
3643
struct gaudi_device *gaudi = hdev->asic_specific;
3644
u64 hop0_addr;
3645
int rc, i;
3646
3647
if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3648
return 0;
3649
3650
for (i = 0 ; i < prop->max_asid ; i++) {
3651
hop0_addr = prop->mmu_pgt_addr +
3652
(i * prop->dmmu.hop_table_size);
3653
3654
rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3655
if (rc) {
3656
dev_err(hdev->dev,
3657
"failed to set hop0 addr for asid %d\n", i);
3658
return rc;
3659
}
3660
}
3661
3662
/* init MMU cache manage page */
3663
WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3664
WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3665
3666
/* mem cache invalidation */
3667
WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3668
3669
rc = hl_mmu_invalidate_cache(hdev, true, 0);
3670
if (rc)
3671
return rc;
3672
3673
WREG32(mmMMU_UP_MMU_ENABLE, 1);
3674
WREG32(mmMMU_UP_SPI_MASK, 0xF);
3675
3676
WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3677
3678
/*
3679
* The H/W expects the first PI after init to be 1. After wraparound
3680
* we'll write 0.
3681
*/
3682
gaudi->mmu_cache_inv_pi = 1;
3683
3684
gaudi->hw_cap_initialized |= HW_CAP_MMU;
3685
3686
return 0;
3687
}
3688
3689
static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3690
{
3691
void __iomem *dst;
3692
3693
dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3694
3695
return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3696
}
3697
3698
static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3699
{
3700
void __iomem *dst;
3701
3702
dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3703
3704
return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3705
}
3706
3707
static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3708
{
3709
struct dynamic_fw_load_mgr *dynamic_loader;
3710
struct cpu_dyn_regs *dyn_regs;
3711
3712
dynamic_loader = &hdev->fw_loader.dynamic_loader;
3713
3714
/*
3715
* here we update initial values for few specific dynamic regs (as
3716
* before reading the first descriptor from FW those value has to be
3717
* hard-coded) in later stages of the protocol those values will be
3718
* updated automatically by reading the FW descriptor so data there
3719
* will always be up-to-date
3720
*/
3721
dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3722
dyn_regs->kmd_msg_to_cpu =
3723
cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3724
dyn_regs->cpu_cmd_status_to_host =
3725
cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3726
3727
dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3728
}
3729
3730
static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3731
{
3732
struct static_fw_load_mgr *static_loader;
3733
3734
static_loader = &hdev->fw_loader.static_loader;
3735
3736
static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3737
static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3738
static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3739
static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3740
static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3741
static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3742
static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3743
static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3744
static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3745
static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3746
static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3747
static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3748
static_loader->cpu_reset_wait_msec = hdev->pldm ?
3749
GAUDI_PLDM_RESET_WAIT_MSEC :
3750
GAUDI_CPU_RESET_WAIT_MSEC;
3751
}
3752
3753
static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3754
{
3755
struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3756
3757
pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3758
pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3759
pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3760
pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3761
pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3762
pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3763
}
3764
3765
static void gaudi_init_firmware_loader(struct hl_device *hdev)
3766
{
3767
struct asic_fixed_properties *prop = &hdev->asic_prop;
3768
struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3769
3770
/* fill common fields */
3771
fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3772
fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3773
fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3774
fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3775
fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3776
fw_loader->skip_bmc = !hdev->bmc_enable;
3777
fw_loader->sram_bar_id = SRAM_BAR_ID;
3778
fw_loader->dram_bar_id = HBM_BAR_ID;
3779
3780
if (prop->dynamic_fw_load)
3781
gaudi_init_dynamic_firmware_loader(hdev);
3782
else
3783
gaudi_init_static_firmware_loader(hdev);
3784
}
3785
3786
static int gaudi_init_cpu(struct hl_device *hdev)
3787
{
3788
struct gaudi_device *gaudi = hdev->asic_specific;
3789
int rc;
3790
3791
if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3792
return 0;
3793
3794
if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3795
return 0;
3796
3797
/*
3798
* The device CPU works with 40 bits addresses.
3799
* This register sets the extension to 50 bits.
3800
*/
3801
if (!hdev->asic_prop.fw_security_enabled)
3802
WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3803
3804
rc = hl_fw_init_cpu(hdev);
3805
3806
if (rc)
3807
return rc;
3808
3809
gaudi->hw_cap_initialized |= HW_CAP_CPU;
3810
3811
return 0;
3812
}
3813
3814
static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3815
{
3816
struct cpu_dyn_regs *dyn_regs =
3817
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3818
struct asic_fixed_properties *prop = &hdev->asic_prop;
3819
struct gaudi_device *gaudi = hdev->asic_specific;
3820
u32 status, irq_handler_offset;
3821
struct hl_eq *eq;
3822
struct hl_hw_queue *cpu_pq =
3823
&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3824
int err;
3825
3826
if (!hdev->cpu_queues_enable)
3827
return 0;
3828
3829
if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3830
return 0;
3831
3832
eq = &hdev->event_queue;
3833
3834
WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3835
WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3836
3837
WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3838
WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3839
3840
WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3841
lower_32_bits(hdev->cpu_accessible_dma_address));
3842
WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3843
upper_32_bits(hdev->cpu_accessible_dma_address));
3844
3845
WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3846
WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3847
WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3848
3849
/* Used for EQ CI */
3850
WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3851
3852
WREG32(mmCPU_IF_PF_PQ_PI, 0);
3853
3854
WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3855
3856
irq_handler_offset = prop->gic_interrupts_enable ?
3857
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3858
le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3859
3860
WREG32(irq_handler_offset,
3861
gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3862
3863
err = hl_poll_timeout(
3864
hdev,
3865
mmCPU_IF_QUEUE_INIT,
3866
status,
3867
(status == PQ_INIT_STATUS_READY_FOR_HOST),
3868
1000,
3869
cpu_timeout);
3870
3871
if (err) {
3872
dev_err(hdev->dev,
3873
"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3874
return -EIO;
3875
}
3876
3877
/* update FW application security bits */
3878
if (prop->fw_cpu_boot_dev_sts0_valid)
3879
prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3880
if (prop->fw_cpu_boot_dev_sts1_valid)
3881
prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3882
3883
gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3884
return 0;
3885
}
3886
3887
static void gaudi_pre_hw_init(struct hl_device *hdev)
3888
{
3889
/* Perform read from the device to make sure device is up */
3890
RREG32(mmHW_STATE);
3891
3892
if (!hdev->asic_prop.fw_security_enabled) {
3893
/* Set the access through PCI bars (Linux driver only) as
3894
* secured
3895
*/
3896
WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3897
(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3898
PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3899
3900
/* Perform read to flush the waiting writes to ensure
3901
* configuration was set in the device
3902
*/
3903
RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3904
}
3905
3906
/*
3907
* Let's mark in the H/W that we have reached this point. We check
3908
* this value in the reset_before_init function to understand whether
3909
* we need to reset the chip before doing H/W init. This register is
3910
* cleared by the H/W upon H/W reset
3911
*/
3912
WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3913
}
3914
3915
static int gaudi_hw_init(struct hl_device *hdev)
3916
{
3917
struct gaudi_device *gaudi = hdev->asic_specific;
3918
int rc;
3919
3920
gaudi_pre_hw_init(hdev);
3921
3922
/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3923
* So we set it here and if anyone tries to move it later to
3924
* a different address, there will be an error
3925
*/
3926
if (hdev->asic_prop.iatu_done_by_fw)
3927
gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3928
3929
/*
3930
* Before pushing u-boot/linux to device, need to set the hbm bar to
3931
* base address of dram
3932
*/
3933
if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3934
dev_err(hdev->dev,
3935
"failed to map HBM bar to DRAM base address\n");
3936
return -EIO;
3937
}
3938
3939
rc = gaudi_init_cpu(hdev);
3940
if (rc) {
3941
dev_err(hdev->dev, "failed to initialize CPU\n");
3942
return rc;
3943
}
3944
3945
/* In case the clock gating was enabled in preboot we need to disable
3946
* it here before touching the MME/TPC registers.
3947
*/
3948
gaudi_disable_clock_gating(hdev);
3949
3950
/* SRAM scrambler must be initialized after CPU is running from HBM */
3951
gaudi_init_scrambler_sram(hdev);
3952
3953
/* This is here just in case we are working without CPU */
3954
gaudi_init_scrambler_hbm(hdev);
3955
3956
gaudi_init_golden_registers(hdev);
3957
3958
rc = gaudi_mmu_init(hdev);
3959
if (rc)
3960
return rc;
3961
3962
gaudi_init_security(hdev);
3963
3964
gaudi_init_pci_dma_qmans(hdev);
3965
3966
gaudi_init_hbm_dma_qmans(hdev);
3967
3968
gaudi_init_mme_qmans(hdev);
3969
3970
gaudi_init_tpc_qmans(hdev);
3971
3972
gaudi_init_nic_qmans(hdev);
3973
3974
gaudi_enable_timestamp(hdev);
3975
3976
/* MSI must be enabled before CPU queues and NIC are initialized */
3977
rc = gaudi_enable_msi(hdev);
3978
if (rc)
3979
goto disable_queues;
3980
3981
/* must be called after MSI was enabled */
3982
rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3983
if (rc) {
3984
dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3985
rc);
3986
goto disable_msi;
3987
}
3988
3989
/* Perform read from the device to flush all configuration */
3990
RREG32(mmHW_STATE);
3991
3992
return 0;
3993
3994
disable_msi:
3995
gaudi_disable_msi(hdev);
3996
disable_queues:
3997
gaudi_disable_mme_qmans(hdev);
3998
gaudi_disable_pci_dma_qmans(hdev);
3999
4000
return rc;
4001
}
4002
4003
static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4004
{
4005
struct cpu_dyn_regs *dyn_regs =
4006
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4007
u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4008
struct gaudi_device *gaudi = hdev->asic_specific;
4009
bool driver_performs_reset;
4010
4011
if (!hard_reset) {
4012
dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4013
return 0;
4014
}
4015
4016
if (hdev->pldm) {
4017
reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4018
cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4019
} else {
4020
reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4021
cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4022
}
4023
4024
if (fw_reset) {
4025
dev_dbg(hdev->dev,
4026
"Firmware performs HARD reset, going to wait %dms\n",
4027
reset_timeout_ms);
4028
4029
goto skip_reset;
4030
}
4031
4032
driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4033
!hdev->asic_prop.hard_reset_done_by_fw);
4034
4035
/* Set device to handle FLR by H/W as we will put the device CPU to
4036
* halt mode
4037
*/
4038
if (driver_performs_reset)
4039
WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4040
PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4041
4042
/* If linux is loaded in the device CPU we need to communicate with it
4043
* via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4044
* registers in case of old F/Ws
4045
*/
4046
if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4047
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4048
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4049
le32_to_cpu(dyn_regs->gic_host_halt_irq);
4050
4051
WREG32(irq_handler_offset,
4052
gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4053
4054
/* This is a hail-mary attempt to revive the card in the small chance that the
4055
* f/w has experienced a watchdog event, which caused it to return back to preboot.
4056
* In that case, triggering reset through GIC won't help. We need to trigger the
4057
* reset as if Linux wasn't loaded.
4058
*
4059
* We do it only if the reset cause was HB, because that would be the indication
4060
* of such an event.
4061
*
4062
* In case watchdog hasn't expired but we still got HB, then this won't do any
4063
* damage.
4064
*/
4065
if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4066
if (hdev->asic_prop.hard_reset_done_by_fw)
4067
hl_fw_ask_hard_reset_without_linux(hdev);
4068
else
4069
hl_fw_ask_halt_machine_without_linux(hdev);
4070
}
4071
} else {
4072
if (hdev->asic_prop.hard_reset_done_by_fw)
4073
hl_fw_ask_hard_reset_without_linux(hdev);
4074
else
4075
hl_fw_ask_halt_machine_without_linux(hdev);
4076
}
4077
4078
if (driver_performs_reset) {
4079
4080
/* Configure the reset registers. Must be done as early as
4081
* possible in case we fail during H/W initialization
4082
*/
4083
WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4084
(CFG_RST_H_DMA_MASK |
4085
CFG_RST_H_MME_MASK |
4086
CFG_RST_H_SM_MASK |
4087
CFG_RST_H_TPC_7_MASK));
4088
4089
WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4090
4091
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4092
(CFG_RST_H_HBM_MASK |
4093
CFG_RST_H_TPC_7_MASK |
4094
CFG_RST_H_NIC_MASK |
4095
CFG_RST_H_SM_MASK |
4096
CFG_RST_H_DMA_MASK |
4097
CFG_RST_H_MME_MASK |
4098
CFG_RST_H_CPU_MASK |
4099
CFG_RST_H_MMU_MASK));
4100
4101
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4102
(CFG_RST_L_IF_MASK |
4103
CFG_RST_L_PSOC_MASK |
4104
CFG_RST_L_TPC_MASK));
4105
4106
msleep(cpu_timeout_ms);
4107
4108
/* Tell ASIC not to re-initialize PCIe */
4109
WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4110
4111
/* Restart BTL/BLR upon hard-reset */
4112
WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4113
4114
WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4115
1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4116
4117
dev_dbg(hdev->dev,
4118
"Issued HARD reset command, going to wait %dms\n",
4119
reset_timeout_ms);
4120
} else {
4121
dev_dbg(hdev->dev,
4122
"Firmware performs HARD reset, going to wait %dms\n",
4123
reset_timeout_ms);
4124
}
4125
4126
skip_reset:
4127
/*
4128
* After hard reset, we can't poll the BTM_FSM register because the PSOC
4129
* itself is in reset. Need to wait until the reset is deasserted
4130
*/
4131
msleep(reset_timeout_ms);
4132
4133
status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4134
if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4135
dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4136
return -ETIMEDOUT;
4137
}
4138
4139
if (gaudi) {
4140
gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4141
HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4142
HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4143
HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4144
HW_CAP_HBM_SCRAMBLER);
4145
4146
memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4147
4148
hdev->device_cpu_is_halted = false;
4149
}
4150
return 0;
4151
}
4152
4153
static int gaudi_suspend(struct hl_device *hdev)
4154
{
4155
return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4156
}
4157
4158
static int gaudi_resume(struct hl_device *hdev)
4159
{
4160
return gaudi_init_iatu(hdev);
4161
}
4162
4163
static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4164
void *cpu_addr, dma_addr_t dma_addr, size_t size)
4165
{
4166
int rc;
4167
4168
vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4169
VM_DONTCOPY | VM_NORESERVE);
4170
4171
rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4172
(dma_addr - HOST_PHYS_BASE), size);
4173
if (rc)
4174
dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4175
4176
return rc;
4177
}
4178
4179
static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4180
{
4181
struct cpu_dyn_regs *dyn_regs =
4182
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4183
u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4184
struct gaudi_device *gaudi = hdev->asic_specific;
4185
bool invalid_queue = false;
4186
int dma_id;
4187
4188
switch (hw_queue_id) {
4189
case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4190
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4191
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4192
q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4193
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4194
break;
4195
4196
case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4197
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4198
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4199
q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4200
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4201
break;
4202
4203
case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4204
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4205
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4206
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4207
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4208
break;
4209
4210
case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4211
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4212
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4213
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4214
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4215
break;
4216
4217
case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4218
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4219
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4220
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4221
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4222
break;
4223
4224
case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4225
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4226
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4227
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4228
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4229
break;
4230
4231
case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4232
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4233
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4234
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4235
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4236
break;
4237
4238
case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4239
dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4240
dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4241
q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4242
db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4243
break;
4244
4245
case GAUDI_QUEUE_ID_CPU_PQ:
4246
if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4247
db_reg_offset = mmCPU_IF_PF_PQ_PI;
4248
else
4249
invalid_queue = true;
4250
break;
4251
4252
case GAUDI_QUEUE_ID_MME_0_0:
4253
db_reg_offset = mmMME2_QM_PQ_PI_0;
4254
break;
4255
4256
case GAUDI_QUEUE_ID_MME_0_1:
4257
db_reg_offset = mmMME2_QM_PQ_PI_1;
4258
break;
4259
4260
case GAUDI_QUEUE_ID_MME_0_2:
4261
db_reg_offset = mmMME2_QM_PQ_PI_2;
4262
break;
4263
4264
case GAUDI_QUEUE_ID_MME_0_3:
4265
db_reg_offset = mmMME2_QM_PQ_PI_3;
4266
break;
4267
4268
case GAUDI_QUEUE_ID_MME_1_0:
4269
db_reg_offset = mmMME0_QM_PQ_PI_0;
4270
break;
4271
4272
case GAUDI_QUEUE_ID_MME_1_1:
4273
db_reg_offset = mmMME0_QM_PQ_PI_1;
4274
break;
4275
4276
case GAUDI_QUEUE_ID_MME_1_2:
4277
db_reg_offset = mmMME0_QM_PQ_PI_2;
4278
break;
4279
4280
case GAUDI_QUEUE_ID_MME_1_3:
4281
db_reg_offset = mmMME0_QM_PQ_PI_3;
4282
break;
4283
4284
case GAUDI_QUEUE_ID_TPC_0_0:
4285
db_reg_offset = mmTPC0_QM_PQ_PI_0;
4286
break;
4287
4288
case GAUDI_QUEUE_ID_TPC_0_1:
4289
db_reg_offset = mmTPC0_QM_PQ_PI_1;
4290
break;
4291
4292
case GAUDI_QUEUE_ID_TPC_0_2:
4293
db_reg_offset = mmTPC0_QM_PQ_PI_2;
4294
break;
4295
4296
case GAUDI_QUEUE_ID_TPC_0_3:
4297
db_reg_offset = mmTPC0_QM_PQ_PI_3;
4298
break;
4299
4300
case GAUDI_QUEUE_ID_TPC_1_0:
4301
db_reg_offset = mmTPC1_QM_PQ_PI_0;
4302
break;
4303
4304
case GAUDI_QUEUE_ID_TPC_1_1:
4305
db_reg_offset = mmTPC1_QM_PQ_PI_1;
4306
break;
4307
4308
case GAUDI_QUEUE_ID_TPC_1_2:
4309
db_reg_offset = mmTPC1_QM_PQ_PI_2;
4310
break;
4311
4312
case GAUDI_QUEUE_ID_TPC_1_3:
4313
db_reg_offset = mmTPC1_QM_PQ_PI_3;
4314
break;
4315
4316
case GAUDI_QUEUE_ID_TPC_2_0:
4317
db_reg_offset = mmTPC2_QM_PQ_PI_0;
4318
break;
4319
4320
case GAUDI_QUEUE_ID_TPC_2_1:
4321
db_reg_offset = mmTPC2_QM_PQ_PI_1;
4322
break;
4323
4324
case GAUDI_QUEUE_ID_TPC_2_2:
4325
db_reg_offset = mmTPC2_QM_PQ_PI_2;
4326
break;
4327
4328
case GAUDI_QUEUE_ID_TPC_2_3:
4329
db_reg_offset = mmTPC2_QM_PQ_PI_3;
4330
break;
4331
4332
case GAUDI_QUEUE_ID_TPC_3_0:
4333
db_reg_offset = mmTPC3_QM_PQ_PI_0;
4334
break;
4335
4336
case GAUDI_QUEUE_ID_TPC_3_1:
4337
db_reg_offset = mmTPC3_QM_PQ_PI_1;
4338
break;
4339
4340
case GAUDI_QUEUE_ID_TPC_3_2:
4341
db_reg_offset = mmTPC3_QM_PQ_PI_2;
4342
break;
4343
4344
case GAUDI_QUEUE_ID_TPC_3_3:
4345
db_reg_offset = mmTPC3_QM_PQ_PI_3;
4346
break;
4347
4348
case GAUDI_QUEUE_ID_TPC_4_0:
4349
db_reg_offset = mmTPC4_QM_PQ_PI_0;
4350
break;
4351
4352
case GAUDI_QUEUE_ID_TPC_4_1:
4353
db_reg_offset = mmTPC4_QM_PQ_PI_1;
4354
break;
4355
4356
case GAUDI_QUEUE_ID_TPC_4_2:
4357
db_reg_offset = mmTPC4_QM_PQ_PI_2;
4358
break;
4359
4360
case GAUDI_QUEUE_ID_TPC_4_3:
4361
db_reg_offset = mmTPC4_QM_PQ_PI_3;
4362
break;
4363
4364
case GAUDI_QUEUE_ID_TPC_5_0:
4365
db_reg_offset = mmTPC5_QM_PQ_PI_0;
4366
break;
4367
4368
case GAUDI_QUEUE_ID_TPC_5_1:
4369
db_reg_offset = mmTPC5_QM_PQ_PI_1;
4370
break;
4371
4372
case GAUDI_QUEUE_ID_TPC_5_2:
4373
db_reg_offset = mmTPC5_QM_PQ_PI_2;
4374
break;
4375
4376
case GAUDI_QUEUE_ID_TPC_5_3:
4377
db_reg_offset = mmTPC5_QM_PQ_PI_3;
4378
break;
4379
4380
case GAUDI_QUEUE_ID_TPC_6_0:
4381
db_reg_offset = mmTPC6_QM_PQ_PI_0;
4382
break;
4383
4384
case GAUDI_QUEUE_ID_TPC_6_1:
4385
db_reg_offset = mmTPC6_QM_PQ_PI_1;
4386
break;
4387
4388
case GAUDI_QUEUE_ID_TPC_6_2:
4389
db_reg_offset = mmTPC6_QM_PQ_PI_2;
4390
break;
4391
4392
case GAUDI_QUEUE_ID_TPC_6_3:
4393
db_reg_offset = mmTPC6_QM_PQ_PI_3;
4394
break;
4395
4396
case GAUDI_QUEUE_ID_TPC_7_0:
4397
db_reg_offset = mmTPC7_QM_PQ_PI_0;
4398
break;
4399
4400
case GAUDI_QUEUE_ID_TPC_7_1:
4401
db_reg_offset = mmTPC7_QM_PQ_PI_1;
4402
break;
4403
4404
case GAUDI_QUEUE_ID_TPC_7_2:
4405
db_reg_offset = mmTPC7_QM_PQ_PI_2;
4406
break;
4407
4408
case GAUDI_QUEUE_ID_TPC_7_3:
4409
db_reg_offset = mmTPC7_QM_PQ_PI_3;
4410
break;
4411
4412
case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4413
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4414
invalid_queue = true;
4415
4416
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4417
db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4418
break;
4419
4420
case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4421
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4422
invalid_queue = true;
4423
4424
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4425
db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4426
break;
4427
4428
case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4429
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4430
invalid_queue = true;
4431
4432
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4433
db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4434
break;
4435
4436
case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4437
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4438
invalid_queue = true;
4439
4440
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4441
db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4442
break;
4443
4444
case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4445
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4446
invalid_queue = true;
4447
4448
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4449
db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4450
break;
4451
4452
case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4453
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4454
invalid_queue = true;
4455
4456
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4457
db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4458
break;
4459
4460
case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4461
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4462
invalid_queue = true;
4463
4464
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4465
db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4466
break;
4467
4468
case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4469
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4470
invalid_queue = true;
4471
4472
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4473
db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4474
break;
4475
4476
case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4477
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4478
invalid_queue = true;
4479
4480
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4481
db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4482
break;
4483
4484
case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4485
if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4486
invalid_queue = true;
4487
4488
q_off = ((hw_queue_id - 1) & 0x3) * 4;
4489
db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4490
break;
4491
4492
default:
4493
invalid_queue = true;
4494
}
4495
4496
if (invalid_queue) {
4497
/* Should never get here */
4498
dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4499
hw_queue_id);
4500
return;
4501
}
4502
4503
db_value = pi;
4504
4505
/* ring the doorbell */
4506
WREG32(db_reg_offset, db_value);
4507
4508
if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4509
/* make sure device CPU will read latest data from host */
4510
mb();
4511
4512
irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4513
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4514
le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4515
4516
WREG32(irq_handler_offset,
4517
gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4518
}
4519
}
4520
4521
static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4522
struct hl_bd *bd)
4523
{
4524
__le64 *pbd = (__le64 *) bd;
4525
4526
/* The QMANs are on the host memory so a simple copy suffice */
4527
pqe[0] = pbd[0];
4528
pqe[1] = pbd[1];
4529
}
4530
4531
static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4532
dma_addr_t *dma_handle, gfp_t flags)
4533
{
4534
void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4535
dma_handle, flags);
4536
4537
/* Shift to the device's base physical address of host memory */
4538
if (kernel_addr)
4539
*dma_handle += HOST_PHYS_BASE;
4540
4541
return kernel_addr;
4542
}
4543
4544
static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4545
void *cpu_addr, dma_addr_t dma_handle)
4546
{
4547
/* Cancel the device's base physical address of host memory */
4548
dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4549
4550
dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4551
}
4552
4553
static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4554
{
4555
struct asic_fixed_properties *prop = &hdev->asic_prop;
4556
u64 cur_addr = prop->dram_user_base_address;
4557
u32 chunk_size, busy;
4558
int rc, dma_id;
4559
4560
while (cur_addr < prop->dram_end_address) {
4561
for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4562
u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4563
4564
chunk_size =
4565
min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4566
4567
dev_dbg(hdev->dev,
4568
"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4569
cur_addr, cur_addr + chunk_size);
4570
4571
WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4572
lower_32_bits(val));
4573
WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4574
upper_32_bits(val));
4575
WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4576
lower_32_bits(cur_addr));
4577
WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4578
upper_32_bits(cur_addr));
4579
WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4580
chunk_size);
4581
WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4582
((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4583
(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4584
4585
cur_addr += chunk_size;
4586
4587
if (cur_addr == prop->dram_end_address)
4588
break;
4589
}
4590
4591
for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4592
u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4593
4594
rc = hl_poll_timeout(
4595
hdev,
4596
mmDMA0_CORE_STS0 + dma_offset,
4597
busy,
4598
((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4599
1000,
4600
HBM_SCRUBBING_TIMEOUT_US);
4601
4602
if (rc) {
4603
dev_err(hdev->dev,
4604
"DMA Timeout during HBM scrubbing of DMA #%d\n",
4605
dma_id);
4606
return -EIO;
4607
}
4608
}
4609
}
4610
4611
return 0;
4612
}
4613
4614
static int gaudi_scrub_device_mem(struct hl_device *hdev)
4615
{
4616
struct asic_fixed_properties *prop = &hdev->asic_prop;
4617
u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4618
u64 addr, size, val = hdev->memory_scrub_val;
4619
ktime_t timeout;
4620
int rc = 0;
4621
4622
if (!hdev->memory_scrub)
4623
return 0;
4624
4625
timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4626
while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4627
if (ktime_compare(ktime_get(), timeout) > 0) {
4628
dev_err(hdev->dev, "waiting for idle timeout\n");
4629
return -ETIMEDOUT;
4630
}
4631
usleep_range((1000 >> 2) + 1, 1000);
4632
}
4633
4634
/* Scrub SRAM */
4635
addr = prop->sram_user_base_address;
4636
size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4637
4638
dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4639
addr, addr + size, val);
4640
rc = gaudi_memset_device_memory(hdev, addr, size, val);
4641
if (rc) {
4642
dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4643
return rc;
4644
}
4645
4646
/* Scrub HBM using all DMA channels in parallel */
4647
rc = gaudi_scrub_device_dram(hdev, val);
4648
if (rc) {
4649
dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4650
return rc;
4651
}
4652
4653
return 0;
4654
}
4655
4656
static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4657
u32 queue_id, dma_addr_t *dma_handle,
4658
u16 *queue_len)
4659
{
4660
struct gaudi_device *gaudi = hdev->asic_specific;
4661
struct gaudi_internal_qman_info *q;
4662
4663
if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4664
gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4665
dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4666
return NULL;
4667
}
4668
4669
q = &gaudi->internal_qmans[queue_id];
4670
*dma_handle = q->pq_dma_addr;
4671
*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4672
4673
return q->pq_kernel_addr;
4674
}
4675
4676
static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4677
u16 len, u32 timeout, u64 *result)
4678
{
4679
struct gaudi_device *gaudi = hdev->asic_specific;
4680
4681
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4682
if (result)
4683
*result = 0;
4684
return 0;
4685
}
4686
4687
if (!timeout)
4688
timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4689
4690
return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4691
timeout, result);
4692
}
4693
4694
static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4695
{
4696
struct packet_msg_prot *fence_pkt;
4697
dma_addr_t pkt_dma_addr;
4698
u32 fence_val, tmp, timeout_usec;
4699
dma_addr_t fence_dma_addr;
4700
u32 *fence_ptr;
4701
int rc;
4702
4703
if (hdev->pldm)
4704
timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4705
else
4706
timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4707
4708
fence_val = GAUDI_QMAN0_FENCE_VAL;
4709
4710
fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4711
if (!fence_ptr) {
4712
dev_err(hdev->dev,
4713
"Failed to allocate memory for H/W queue %d testing\n",
4714
hw_queue_id);
4715
return -ENOMEM;
4716
}
4717
4718
*fence_ptr = 0;
4719
4720
fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4721
&pkt_dma_addr);
4722
if (!fence_pkt) {
4723
dev_err(hdev->dev,
4724
"Failed to allocate packet for H/W queue %d testing\n",
4725
hw_queue_id);
4726
rc = -ENOMEM;
4727
goto free_fence_ptr;
4728
}
4729
4730
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4731
tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4732
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4733
4734
fence_pkt->ctl = cpu_to_le32(tmp);
4735
fence_pkt->value = cpu_to_le32(fence_val);
4736
fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4737
4738
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4739
sizeof(struct packet_msg_prot),
4740
pkt_dma_addr);
4741
if (rc) {
4742
dev_err(hdev->dev,
4743
"Failed to send fence packet to H/W queue %d\n",
4744
hw_queue_id);
4745
goto free_pkt;
4746
}
4747
4748
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4749
1000, timeout_usec, true);
4750
4751
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4752
4753
if (rc == -ETIMEDOUT) {
4754
dev_err(hdev->dev,
4755
"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4756
hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4757
rc = -EIO;
4758
}
4759
4760
free_pkt:
4761
hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4762
free_fence_ptr:
4763
hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4764
return rc;
4765
}
4766
4767
static int gaudi_test_cpu_queue(struct hl_device *hdev)
4768
{
4769
struct gaudi_device *gaudi = hdev->asic_specific;
4770
4771
/*
4772
* check capability here as send_cpu_message() won't update the result
4773
* value if no capability
4774
*/
4775
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4776
return 0;
4777
4778
return hl_fw_test_cpu_queue(hdev);
4779
}
4780
4781
static int gaudi_test_queues(struct hl_device *hdev)
4782
{
4783
int i, rc, ret_val = 0;
4784
4785
for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4786
if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4787
rc = gaudi_test_queue(hdev, i);
4788
if (rc)
4789
ret_val = -EINVAL;
4790
}
4791
}
4792
4793
rc = gaudi_test_cpu_queue(hdev);
4794
if (rc)
4795
ret_val = -EINVAL;
4796
4797
return ret_val;
4798
}
4799
4800
static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4801
gfp_t mem_flags, dma_addr_t *dma_handle)
4802
{
4803
void *kernel_addr;
4804
4805
if (size > GAUDI_DMA_POOL_BLK_SIZE)
4806
return NULL;
4807
4808
kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4809
4810
/* Shift to the device's base physical address of host memory */
4811
if (kernel_addr)
4812
*dma_handle += HOST_PHYS_BASE;
4813
4814
return kernel_addr;
4815
}
4816
4817
static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4818
dma_addr_t dma_addr)
4819
{
4820
/* Cancel the device's base physical address of host memory */
4821
dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4822
4823
dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4824
}
4825
4826
static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4827
size_t size, dma_addr_t *dma_handle)
4828
{
4829
return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4830
}
4831
4832
static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4833
size_t size, void *vaddr)
4834
{
4835
hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4836
}
4837
4838
static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4839
{
4840
struct scatterlist *sg, *sg_next_iter;
4841
u32 count, dma_desc_cnt;
4842
u64 len, len_next;
4843
dma_addr_t addr, addr_next;
4844
4845
dma_desc_cnt = 0;
4846
4847
for_each_sgtable_dma_sg(sgt, sg, count) {
4848
len = sg_dma_len(sg);
4849
addr = sg_dma_address(sg);
4850
4851
if (len == 0)
4852
break;
4853
4854
while ((count + 1) < sgt->nents) {
4855
sg_next_iter = sg_next(sg);
4856
len_next = sg_dma_len(sg_next_iter);
4857
addr_next = sg_dma_address(sg_next_iter);
4858
4859
if (len_next == 0)
4860
break;
4861
4862
if ((addr + len == addr_next) &&
4863
(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4864
len += len_next;
4865
count++;
4866
sg = sg_next_iter;
4867
} else {
4868
break;
4869
}
4870
}
4871
4872
dma_desc_cnt++;
4873
}
4874
4875
return dma_desc_cnt * sizeof(struct packet_lin_dma);
4876
}
4877
4878
static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4879
struct hl_cs_parser *parser,
4880
struct packet_lin_dma *user_dma_pkt,
4881
u64 addr, enum dma_data_direction dir)
4882
{
4883
struct hl_userptr *userptr;
4884
int rc;
4885
4886
if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4887
parser->job_userptr_list, &userptr))
4888
goto already_pinned;
4889
4890
userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4891
if (!userptr)
4892
return -ENOMEM;
4893
4894
rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4895
userptr);
4896
if (rc)
4897
goto free_userptr;
4898
4899
list_add_tail(&userptr->job_node, parser->job_userptr_list);
4900
4901
rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4902
if (rc) {
4903
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4904
goto unpin_memory;
4905
}
4906
4907
userptr->dma_mapped = true;
4908
userptr->dir = dir;
4909
4910
already_pinned:
4911
parser->patched_cb_size +=
4912
gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4913
4914
return 0;
4915
4916
unpin_memory:
4917
list_del(&userptr->job_node);
4918
hl_unpin_host_memory(hdev, userptr);
4919
free_userptr:
4920
kfree(userptr);
4921
return rc;
4922
}
4923
4924
static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4925
struct hl_cs_parser *parser,
4926
struct packet_lin_dma *user_dma_pkt,
4927
bool src_in_host)
4928
{
4929
enum dma_data_direction dir;
4930
bool skip_host_mem_pin = false, user_memset;
4931
u64 addr;
4932
int rc = 0;
4933
4934
user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4935
GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4936
GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4937
4938
if (src_in_host) {
4939
if (user_memset)
4940
skip_host_mem_pin = true;
4941
4942
dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4943
dir = DMA_TO_DEVICE;
4944
addr = le64_to_cpu(user_dma_pkt->src_addr);
4945
} else {
4946
dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4947
dir = DMA_FROM_DEVICE;
4948
addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4949
GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4950
GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4951
}
4952
4953
if (skip_host_mem_pin)
4954
parser->patched_cb_size += sizeof(*user_dma_pkt);
4955
else
4956
rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4957
addr, dir);
4958
4959
return rc;
4960
}
4961
4962
static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4963
struct hl_cs_parser *parser,
4964
struct packet_lin_dma *user_dma_pkt)
4965
{
4966
bool src_in_host = false;
4967
u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4968
GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4969
GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4970
4971
dev_dbg(hdev->dev, "DMA packet details:\n");
4972
dev_dbg(hdev->dev, "source == 0x%llx\n",
4973
le64_to_cpu(user_dma_pkt->src_addr));
4974
dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4975
dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4976
4977
/*
4978
* Special handling for DMA with size 0. Bypass all validations
4979
* because no transactions will be done except for WR_COMP, which
4980
* is not a security issue
4981
*/
4982
if (!le32_to_cpu(user_dma_pkt->tsize)) {
4983
parser->patched_cb_size += sizeof(*user_dma_pkt);
4984
return 0;
4985
}
4986
4987
if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4988
src_in_host = true;
4989
4990
return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4991
src_in_host);
4992
}
4993
4994
static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
4995
struct hl_cs_parser *parser,
4996
struct packet_load_and_exe *user_pkt)
4997
{
4998
u32 cfg;
4999
5000
cfg = le32_to_cpu(user_pkt->cfg);
5001
5002
if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5003
dev_err(hdev->dev,
5004
"User not allowed to use Load and Execute\n");
5005
return -EPERM;
5006
}
5007
5008
parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5009
5010
return 0;
5011
}
5012
5013
static int gaudi_validate_cb(struct hl_device *hdev,
5014
struct hl_cs_parser *parser, bool is_mmu)
5015
{
5016
u32 cb_parsed_length = 0;
5017
int rc = 0;
5018
5019
parser->patched_cb_size = 0;
5020
5021
/* cb_user_size is more than 0 so loop will always be executed */
5022
while (cb_parsed_length < parser->user_cb_size) {
5023
enum packet_id pkt_id;
5024
u16 pkt_size;
5025
struct gaudi_packet *user_pkt;
5026
5027
user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5028
5029
pkt_id = (enum packet_id) (
5030
(le64_to_cpu(user_pkt->header) &
5031
PACKET_HEADER_PACKET_ID_MASK) >>
5032
PACKET_HEADER_PACKET_ID_SHIFT);
5033
5034
if (!validate_packet_id(pkt_id)) {
5035
dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5036
rc = -EINVAL;
5037
break;
5038
}
5039
5040
pkt_size = gaudi_packet_sizes[pkt_id];
5041
cb_parsed_length += pkt_size;
5042
if (cb_parsed_length > parser->user_cb_size) {
5043
dev_err(hdev->dev,
5044
"packet 0x%x is out of CB boundary\n", pkt_id);
5045
rc = -EINVAL;
5046
break;
5047
}
5048
5049
switch (pkt_id) {
5050
case PACKET_MSG_PROT:
5051
dev_err(hdev->dev,
5052
"User not allowed to use MSG_PROT\n");
5053
rc = -EPERM;
5054
break;
5055
5056
case PACKET_CP_DMA:
5057
dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5058
rc = -EPERM;
5059
break;
5060
5061
case PACKET_STOP:
5062
dev_err(hdev->dev, "User not allowed to use STOP\n");
5063
rc = -EPERM;
5064
break;
5065
5066
case PACKET_WREG_BULK:
5067
dev_err(hdev->dev,
5068
"User not allowed to use WREG_BULK\n");
5069
rc = -EPERM;
5070
break;
5071
5072
case PACKET_LOAD_AND_EXE:
5073
rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5074
(struct packet_load_and_exe *) user_pkt);
5075
break;
5076
5077
case PACKET_LIN_DMA:
5078
parser->contains_dma_pkt = true;
5079
if (is_mmu)
5080
parser->patched_cb_size += pkt_size;
5081
else
5082
rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5083
(struct packet_lin_dma *) user_pkt);
5084
break;
5085
5086
case PACKET_WREG_32:
5087
case PACKET_MSG_LONG:
5088
case PACKET_MSG_SHORT:
5089
case PACKET_REPEAT:
5090
case PACKET_FENCE:
5091
case PACKET_NOP:
5092
case PACKET_ARB_POINT:
5093
parser->patched_cb_size += pkt_size;
5094
break;
5095
5096
default:
5097
dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5098
pkt_id);
5099
rc = -EINVAL;
5100
break;
5101
}
5102
5103
if (rc)
5104
break;
5105
}
5106
5107
/*
5108
* The new CB should have space at the end for two MSG_PROT packets:
5109
* 1. Optional NOP padding for cacheline alignment
5110
* 2. A packet that will act as a completion packet
5111
* 3. A packet that will generate MSI interrupt
5112
*/
5113
if (parser->completion)
5114
parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5115
parser->patched_cb_size);
5116
5117
return rc;
5118
}
5119
5120
static int gaudi_patch_dma_packet(struct hl_device *hdev,
5121
struct hl_cs_parser *parser,
5122
struct packet_lin_dma *user_dma_pkt,
5123
struct packet_lin_dma *new_dma_pkt,
5124
u32 *new_dma_pkt_size)
5125
{
5126
struct hl_userptr *userptr;
5127
struct scatterlist *sg, *sg_next_iter;
5128
u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5129
u64 len, len_next;
5130
dma_addr_t dma_addr, dma_addr_next;
5131
u64 device_memory_addr, addr;
5132
enum dma_data_direction dir;
5133
struct sg_table *sgt;
5134
bool src_in_host = false;
5135
bool skip_host_mem_pin = false;
5136
bool user_memset;
5137
5138
ctl = le32_to_cpu(user_dma_pkt->ctl);
5139
5140
if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5141
src_in_host = true;
5142
5143
user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5144
GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5145
5146
if (src_in_host) {
5147
addr = le64_to_cpu(user_dma_pkt->src_addr);
5148
device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5149
dir = DMA_TO_DEVICE;
5150
if (user_memset)
5151
skip_host_mem_pin = true;
5152
} else {
5153
addr = le64_to_cpu(user_dma_pkt->dst_addr);
5154
device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5155
dir = DMA_FROM_DEVICE;
5156
}
5157
5158
if ((!skip_host_mem_pin) &&
5159
(!hl_userptr_is_pinned(hdev, addr,
5160
le32_to_cpu(user_dma_pkt->tsize),
5161
parser->job_userptr_list, &userptr))) {
5162
dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5163
addr, user_dma_pkt->tsize);
5164
return -EFAULT;
5165
}
5166
5167
if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5168
memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5169
*new_dma_pkt_size = sizeof(*user_dma_pkt);
5170
return 0;
5171
}
5172
5173
user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5174
5175
sgt = userptr->sgt;
5176
dma_desc_cnt = 0;
5177
5178
for_each_sgtable_dma_sg(sgt, sg, count) {
5179
len = sg_dma_len(sg);
5180
dma_addr = sg_dma_address(sg);
5181
5182
if (len == 0)
5183
break;
5184
5185
while ((count + 1) < sgt->nents) {
5186
sg_next_iter = sg_next(sg);
5187
len_next = sg_dma_len(sg_next_iter);
5188
dma_addr_next = sg_dma_address(sg_next_iter);
5189
5190
if (len_next == 0)
5191
break;
5192
5193
if ((dma_addr + len == dma_addr_next) &&
5194
(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5195
len += len_next;
5196
count++;
5197
sg = sg_next_iter;
5198
} else {
5199
break;
5200
}
5201
}
5202
5203
ctl = le32_to_cpu(user_dma_pkt->ctl);
5204
if (likely(dma_desc_cnt))
5205
ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5206
ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5207
new_dma_pkt->ctl = cpu_to_le32(ctl);
5208
new_dma_pkt->tsize = cpu_to_le32(len);
5209
5210
if (dir == DMA_TO_DEVICE) {
5211
new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5212
new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5213
} else {
5214
new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5215
new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5216
}
5217
5218
if (!user_memset)
5219
device_memory_addr += len;
5220
dma_desc_cnt++;
5221
new_dma_pkt++;
5222
}
5223
5224
if (!dma_desc_cnt) {
5225
dev_err(hdev->dev,
5226
"Error of 0 SG entries when patching DMA packet\n");
5227
return -EFAULT;
5228
}
5229
5230
/* Fix the last dma packet - wrcomp must be as user set it */
5231
new_dma_pkt--;
5232
new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5233
5234
*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5235
5236
return 0;
5237
}
5238
5239
static int gaudi_patch_cb(struct hl_device *hdev,
5240
struct hl_cs_parser *parser)
5241
{
5242
u32 cb_parsed_length = 0;
5243
u32 cb_patched_cur_length = 0;
5244
int rc = 0;
5245
5246
/* cb_user_size is more than 0 so loop will always be executed */
5247
while (cb_parsed_length < parser->user_cb_size) {
5248
enum packet_id pkt_id;
5249
u16 pkt_size;
5250
u32 new_pkt_size = 0;
5251
struct gaudi_packet *user_pkt, *kernel_pkt;
5252
5253
user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5254
kernel_pkt = parser->patched_cb->kernel_address +
5255
cb_patched_cur_length;
5256
5257
pkt_id = (enum packet_id) (
5258
(le64_to_cpu(user_pkt->header) &
5259
PACKET_HEADER_PACKET_ID_MASK) >>
5260
PACKET_HEADER_PACKET_ID_SHIFT);
5261
5262
if (!validate_packet_id(pkt_id)) {
5263
dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5264
rc = -EINVAL;
5265
break;
5266
}
5267
5268
pkt_size = gaudi_packet_sizes[pkt_id];
5269
cb_parsed_length += pkt_size;
5270
if (cb_parsed_length > parser->user_cb_size) {
5271
dev_err(hdev->dev,
5272
"packet 0x%x is out of CB boundary\n", pkt_id);
5273
rc = -EINVAL;
5274
break;
5275
}
5276
5277
switch (pkt_id) {
5278
case PACKET_LIN_DMA:
5279
rc = gaudi_patch_dma_packet(hdev, parser,
5280
(struct packet_lin_dma *) user_pkt,
5281
(struct packet_lin_dma *) kernel_pkt,
5282
&new_pkt_size);
5283
cb_patched_cur_length += new_pkt_size;
5284
break;
5285
5286
case PACKET_MSG_PROT:
5287
dev_err(hdev->dev,
5288
"User not allowed to use MSG_PROT\n");
5289
rc = -EPERM;
5290
break;
5291
5292
case PACKET_CP_DMA:
5293
dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5294
rc = -EPERM;
5295
break;
5296
5297
case PACKET_STOP:
5298
dev_err(hdev->dev, "User not allowed to use STOP\n");
5299
rc = -EPERM;
5300
break;
5301
5302
case PACKET_WREG_32:
5303
case PACKET_WREG_BULK:
5304
case PACKET_MSG_LONG:
5305
case PACKET_MSG_SHORT:
5306
case PACKET_REPEAT:
5307
case PACKET_FENCE:
5308
case PACKET_NOP:
5309
case PACKET_ARB_POINT:
5310
case PACKET_LOAD_AND_EXE:
5311
memcpy(kernel_pkt, user_pkt, pkt_size);
5312
cb_patched_cur_length += pkt_size;
5313
break;
5314
5315
default:
5316
dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5317
pkt_id);
5318
rc = -EINVAL;
5319
break;
5320
}
5321
5322
if (rc)
5323
break;
5324
}
5325
5326
return rc;
5327
}
5328
5329
static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5330
struct hl_cs_parser *parser)
5331
{
5332
u64 handle;
5333
u32 patched_cb_size;
5334
struct hl_cb *user_cb;
5335
int rc;
5336
5337
/*
5338
* The new CB should have space at the end for two MSG_PROT packets:
5339
* 1. Optional NOP padding for cacheline alignment
5340
* 2. A packet that will act as a completion packet
5341
* 3. A packet that will generate MSI interrupt
5342
*/
5343
if (parser->completion)
5344
parser->patched_cb_size = parser->user_cb_size +
5345
gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5346
else
5347
parser->patched_cb_size = parser->user_cb_size;
5348
5349
rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5350
parser->patched_cb_size, false, false,
5351
&handle);
5352
5353
if (rc) {
5354
dev_err(hdev->dev,
5355
"Failed to allocate patched CB for DMA CS %d\n",
5356
rc);
5357
return rc;
5358
}
5359
5360
parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5361
/* hl_cb_get should never fail */
5362
if (!parser->patched_cb) {
5363
dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5364
rc = -EFAULT;
5365
goto out;
5366
}
5367
5368
/*
5369
* We are protected from overflow because the check
5370
* "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5371
* in the common code. That check is done only if is_kernel_allocated_cb is true.
5372
*
5373
* There is no option to reach here without going through that check because:
5374
* 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5375
* an external queue.
5376
* 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5377
*/
5378
memcpy(parser->patched_cb->kernel_address,
5379
parser->user_cb->kernel_address,
5380
parser->user_cb_size);
5381
5382
patched_cb_size = parser->patched_cb_size;
5383
5384
/* Validate patched CB instead of user CB */
5385
user_cb = parser->user_cb;
5386
parser->user_cb = parser->patched_cb;
5387
rc = gaudi_validate_cb(hdev, parser, true);
5388
parser->user_cb = user_cb;
5389
5390
if (rc) {
5391
hl_cb_put(parser->patched_cb);
5392
goto out;
5393
}
5394
5395
if (patched_cb_size != parser->patched_cb_size) {
5396
dev_err(hdev->dev, "user CB size mismatch\n");
5397
hl_cb_put(parser->patched_cb);
5398
rc = -EINVAL;
5399
goto out;
5400
}
5401
5402
out:
5403
/*
5404
* Always call cb destroy here because we still have 1 reference
5405
* to it by calling cb_get earlier. After the job will be completed,
5406
* cb_put will release it, but here we want to remove it from the
5407
* idr
5408
*/
5409
hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5410
5411
return rc;
5412
}
5413
5414
static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5415
struct hl_cs_parser *parser)
5416
{
5417
u64 handle;
5418
int rc;
5419
5420
rc = gaudi_validate_cb(hdev, parser, false);
5421
5422
if (rc)
5423
goto free_userptr;
5424
5425
rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5426
parser->patched_cb_size, false, false,
5427
&handle);
5428
if (rc) {
5429
dev_err(hdev->dev,
5430
"Failed to allocate patched CB for DMA CS %d\n", rc);
5431
goto free_userptr;
5432
}
5433
5434
parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5435
/* hl_cb_get should never fail here */
5436
if (!parser->patched_cb) {
5437
dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5438
rc = -EFAULT;
5439
goto out;
5440
}
5441
5442
rc = gaudi_patch_cb(hdev, parser);
5443
5444
if (rc)
5445
hl_cb_put(parser->patched_cb);
5446
5447
out:
5448
/*
5449
* Always call cb destroy here because we still have 1 reference
5450
* to it by calling cb_get earlier. After the job will be completed,
5451
* cb_put will release it, but here we want to remove it from the
5452
* idr
5453
*/
5454
hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5455
5456
free_userptr:
5457
if (rc)
5458
hl_userptr_delete_list(hdev, parser->job_userptr_list);
5459
return rc;
5460
}
5461
5462
static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5463
struct hl_cs_parser *parser)
5464
{
5465
struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5466
struct gaudi_device *gaudi = hdev->asic_specific;
5467
u32 nic_queue_offset, nic_mask_q_id;
5468
5469
if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5470
(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5471
nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5472
nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5473
5474
if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5475
dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5476
return -EINVAL;
5477
}
5478
}
5479
5480
/* For internal queue jobs just check if CB address is valid */
5481
if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5482
parser->user_cb_size,
5483
asic_prop->sram_user_base_address,
5484
asic_prop->sram_end_address))
5485
return 0;
5486
5487
if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5488
parser->user_cb_size,
5489
asic_prop->dram_user_base_address,
5490
asic_prop->dram_end_address))
5491
return 0;
5492
5493
/* PMMU and HPMMU addresses are equal, check only one of them */
5494
if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5495
parser->user_cb_size,
5496
asic_prop->pmmu.start_addr,
5497
asic_prop->pmmu.end_addr))
5498
return 0;
5499
5500
dev_err(hdev->dev,
5501
"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5502
parser->user_cb, parser->user_cb_size);
5503
5504
return -EFAULT;
5505
}
5506
5507
static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5508
{
5509
struct gaudi_device *gaudi = hdev->asic_specific;
5510
5511
if (parser->queue_type == QUEUE_TYPE_INT)
5512
return gaudi_parse_cb_no_ext_queue(hdev, parser);
5513
5514
if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5515
return gaudi_parse_cb_mmu(hdev, parser);
5516
else
5517
return gaudi_parse_cb_no_mmu(hdev, parser);
5518
}
5519
5520
static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5521
u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5522
u32 msi_vec, bool eb)
5523
{
5524
struct packet_msg_prot *cq_pkt;
5525
struct packet_nop *cq_padding;
5526
u64 msi_addr;
5527
u32 tmp;
5528
5529
cq_padding = kernel_address + original_len;
5530
cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5531
5532
while ((void *)cq_padding < (void *)cq_pkt) {
5533
cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5534
cq_padding++;
5535
}
5536
5537
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5538
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5539
5540
if (eb)
5541
tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5542
5543
cq_pkt->ctl = cpu_to_le32(tmp);
5544
cq_pkt->value = cpu_to_le32(cq_val);
5545
cq_pkt->addr = cpu_to_le64(cq_addr);
5546
5547
cq_pkt++;
5548
5549
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5550
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5551
cq_pkt->ctl = cpu_to_le32(tmp);
5552
cq_pkt->value = cpu_to_le32(1);
5553
msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5554
cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5555
}
5556
5557
static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5558
{
5559
WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5560
}
5561
5562
static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5563
u32 size, u64 val)
5564
{
5565
struct packet_lin_dma *lin_dma_pkt;
5566
struct hl_cs_job *job;
5567
u32 cb_size, ctl, err_cause;
5568
struct hl_cb *cb;
5569
int rc;
5570
5571
cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5572
if (!cb)
5573
return -EFAULT;
5574
5575
lin_dma_pkt = cb->kernel_address;
5576
memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5577
cb_size = sizeof(*lin_dma_pkt);
5578
5579
ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5580
ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5581
ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5582
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5583
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5584
5585
lin_dma_pkt->ctl = cpu_to_le32(ctl);
5586
lin_dma_pkt->src_addr = cpu_to_le64(val);
5587
lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5588
lin_dma_pkt->tsize = cpu_to_le32(size);
5589
5590
job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5591
if (!job) {
5592
dev_err(hdev->dev, "Failed to allocate a new job\n");
5593
rc = -ENOMEM;
5594
goto release_cb;
5595
}
5596
5597
/* Verify DMA is OK */
5598
err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5599
if (err_cause && !hdev->init_done) {
5600
dev_dbg(hdev->dev,
5601
"Clearing DMA0 engine from errors (cause 0x%x)\n",
5602
err_cause);
5603
WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5604
}
5605
5606
job->id = 0;
5607
job->user_cb = cb;
5608
atomic_inc(&job->user_cb->cs_cnt);
5609
job->user_cb_size = cb_size;
5610
job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5611
job->patched_cb = job->user_cb;
5612
job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5613
5614
hl_debugfs_add_job(hdev, job);
5615
5616
rc = gaudi_send_job_on_qman0(hdev, job);
5617
hl_debugfs_remove_job(hdev, job);
5618
kfree(job);
5619
atomic_dec(&cb->cs_cnt);
5620
5621
/* Verify DMA is OK */
5622
err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5623
if (err_cause) {
5624
dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5625
rc = -EIO;
5626
if (!hdev->init_done) {
5627
dev_dbg(hdev->dev,
5628
"Clearing DMA0 engine from errors (cause 0x%x)\n",
5629
err_cause);
5630
WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5631
}
5632
}
5633
5634
release_cb:
5635
hl_cb_put(cb);
5636
hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5637
5638
return rc;
5639
}
5640
5641
static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5642
u32 num_regs, u32 val)
5643
{
5644
struct packet_msg_long *pkt;
5645
struct hl_cs_job *job;
5646
u32 cb_size, ctl;
5647
struct hl_cb *cb;
5648
int i, rc;
5649
5650
cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5651
5652
if (cb_size > SZ_2M) {
5653
dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5654
return -ENOMEM;
5655
}
5656
5657
cb = hl_cb_kernel_create(hdev, cb_size, false);
5658
if (!cb)
5659
return -EFAULT;
5660
5661
pkt = cb->kernel_address;
5662
5663
ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5664
ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5665
ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5666
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5667
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5668
5669
for (i = 0; i < num_regs ; i++, pkt++) {
5670
pkt->ctl = cpu_to_le32(ctl);
5671
pkt->value = cpu_to_le32(val);
5672
pkt->addr = cpu_to_le64(reg_base + (i * 4));
5673
}
5674
5675
job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5676
if (!job) {
5677
dev_err(hdev->dev, "Failed to allocate a new job\n");
5678
rc = -ENOMEM;
5679
goto release_cb;
5680
}
5681
5682
job->id = 0;
5683
job->user_cb = cb;
5684
atomic_inc(&job->user_cb->cs_cnt);
5685
job->user_cb_size = cb_size;
5686
job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5687
job->patched_cb = job->user_cb;
5688
job->job_cb_size = cb_size;
5689
5690
hl_debugfs_add_job(hdev, job);
5691
5692
rc = gaudi_send_job_on_qman0(hdev, job);
5693
hl_debugfs_remove_job(hdev, job);
5694
kfree(job);
5695
atomic_dec(&cb->cs_cnt);
5696
5697
release_cb:
5698
hl_cb_put(cb);
5699
hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5700
5701
return rc;
5702
}
5703
5704
static int gaudi_restore_sm_registers(struct hl_device *hdev)
5705
{
5706
u64 base_addr;
5707
u32 num_regs;
5708
int rc;
5709
5710
base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5711
num_regs = NUM_OF_SOB_IN_BLOCK;
5712
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5713
if (rc) {
5714
dev_err(hdev->dev, "failed resetting SM registers");
5715
return -ENOMEM;
5716
}
5717
5718
base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5719
num_regs = NUM_OF_SOB_IN_BLOCK;
5720
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5721
if (rc) {
5722
dev_err(hdev->dev, "failed resetting SM registers");
5723
return -ENOMEM;
5724
}
5725
5726
base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5727
num_regs = NUM_OF_SOB_IN_BLOCK;
5728
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5729
if (rc) {
5730
dev_err(hdev->dev, "failed resetting SM registers");
5731
return -ENOMEM;
5732
}
5733
5734
base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5735
num_regs = NUM_OF_MONITORS_IN_BLOCK;
5736
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5737
if (rc) {
5738
dev_err(hdev->dev, "failed resetting SM registers");
5739
return -ENOMEM;
5740
}
5741
5742
base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5743
num_regs = NUM_OF_MONITORS_IN_BLOCK;
5744
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5745
if (rc) {
5746
dev_err(hdev->dev, "failed resetting SM registers");
5747
return -ENOMEM;
5748
}
5749
5750
base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5751
num_regs = NUM_OF_MONITORS_IN_BLOCK;
5752
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5753
if (rc) {
5754
dev_err(hdev->dev, "failed resetting SM registers");
5755
return -ENOMEM;
5756
}
5757
5758
base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5759
(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5760
num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5761
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5762
if (rc) {
5763
dev_err(hdev->dev, "failed resetting SM registers");
5764
return -ENOMEM;
5765
}
5766
5767
base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5768
(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5769
num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5770
rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5771
if (rc) {
5772
dev_err(hdev->dev, "failed resetting SM registers");
5773
return -ENOMEM;
5774
}
5775
5776
return 0;
5777
}
5778
5779
static void gaudi_restore_dma_registers(struct hl_device *hdev)
5780
{
5781
u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5782
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5783
int i;
5784
5785
for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5786
u64 sob_addr = CFG_BASE +
5787
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5788
(i * sob_delta);
5789
u32 dma_offset = i * DMA_CORE_OFFSET;
5790
5791
WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5792
lower_32_bits(sob_addr));
5793
WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5794
upper_32_bits(sob_addr));
5795
WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5796
5797
/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5798
* modified by the user for SRAM reduction
5799
*/
5800
if (i > 1)
5801
WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5802
0x00000001);
5803
}
5804
}
5805
5806
static void gaudi_restore_qm_registers(struct hl_device *hdev)
5807
{
5808
u32 qman_offset;
5809
int i;
5810
5811
for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5812
qman_offset = i * DMA_QMAN_OFFSET;
5813
WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5814
}
5815
5816
for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5817
qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5818
WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5819
}
5820
5821
for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5822
qman_offset = i * TPC_QMAN_OFFSET;
5823
WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5824
}
5825
5826
for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5827
qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5828
(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5829
WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5830
}
5831
}
5832
5833
static int gaudi_restore_user_registers(struct hl_device *hdev)
5834
{
5835
int rc;
5836
5837
rc = gaudi_restore_sm_registers(hdev);
5838
if (rc)
5839
return rc;
5840
5841
gaudi_restore_dma_registers(hdev);
5842
gaudi_restore_qm_registers(hdev);
5843
5844
return 0;
5845
}
5846
5847
static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5848
{
5849
return 0;
5850
}
5851
5852
static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5853
{
5854
u32 size = hdev->asic_prop.mmu_pgt_size +
5855
hdev->asic_prop.mmu_cache_mng_size;
5856
struct gaudi_device *gaudi = hdev->asic_specific;
5857
u64 addr = hdev->asic_prop.mmu_pgt_addr;
5858
5859
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5860
return 0;
5861
5862
return gaudi_memset_device_memory(hdev, addr, size, 0);
5863
}
5864
5865
static void gaudi_restore_phase_topology(struct hl_device *hdev)
5866
{
5867
5868
}
5869
5870
static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5871
u32 size_to_dma, dma_addr_t dma_addr)
5872
{
5873
u32 err_cause, val;
5874
u64 dma_offset;
5875
int rc;
5876
5877
dma_offset = dma_id * DMA_CORE_OFFSET;
5878
5879
WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5880
WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5881
WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5882
WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5883
WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5884
WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5885
(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5886
5887
rc = hl_poll_timeout(
5888
hdev,
5889
mmDMA0_CORE_STS0 + dma_offset,
5890
val,
5891
((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5892
0,
5893
1000000);
5894
5895
if (rc) {
5896
dev_err(hdev->dev,
5897
"DMA %d timed-out during reading of 0x%llx\n",
5898
dma_id, addr);
5899
return -EIO;
5900
}
5901
5902
/* Verify DMA is OK */
5903
err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5904
if (err_cause) {
5905
dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5906
dev_dbg(hdev->dev,
5907
"Clearing DMA0 engine from errors (cause 0x%x)\n",
5908
err_cause);
5909
WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5910
5911
return -EIO;
5912
}
5913
5914
return 0;
5915
}
5916
5917
static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5918
void *blob_addr)
5919
{
5920
u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5921
u32 qm_glbl_sts0, qm_cgm_sts;
5922
u64 dma_offset, qm_offset;
5923
dma_addr_t dma_addr;
5924
void *kernel_addr;
5925
bool is_eng_idle;
5926
int rc = 0, dma_id;
5927
5928
kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5929
5930
if (!kernel_addr)
5931
return -ENOMEM;
5932
5933
hdev->asic_funcs->hw_queues_lock(hdev);
5934
5935
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5936
dma_offset = dma_id * DMA_CORE_OFFSET;
5937
qm_offset = dma_id * DMA_QMAN_OFFSET;
5938
dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5939
qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5940
qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5941
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5942
IS_DMA_IDLE(dma_core_sts0);
5943
5944
if (!is_eng_idle) {
5945
dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5946
dma_offset = dma_id * DMA_CORE_OFFSET;
5947
qm_offset = dma_id * DMA_QMAN_OFFSET;
5948
dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5949
qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5950
qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5951
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5952
IS_DMA_IDLE(dma_core_sts0);
5953
5954
if (!is_eng_idle) {
5955
dev_err_ratelimited(hdev->dev,
5956
"Can't read via DMA because it is BUSY\n");
5957
rc = -EAGAIN;
5958
goto out;
5959
}
5960
}
5961
5962
cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5963
WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5964
0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5965
5966
/* TODO: remove this by mapping the DMA temporary buffer to the MMU
5967
* using the compute ctx ASID, if exists. If not, use the kernel ctx
5968
* ASID
5969
*/
5970
WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5971
5972
/* Verify DMA is OK */
5973
err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5974
if (err_cause) {
5975
dev_dbg(hdev->dev,
5976
"Clearing DMA0 engine from errors (cause 0x%x)\n",
5977
err_cause);
5978
WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5979
}
5980
5981
pos = 0;
5982
size_left = size;
5983
size_to_dma = SZ_2M;
5984
5985
while (size_left > 0) {
5986
5987
if (size_left < SZ_2M)
5988
size_to_dma = size_left;
5989
5990
rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
5991
dma_addr);
5992
if (rc)
5993
break;
5994
5995
memcpy(blob_addr + pos, kernel_addr, size_to_dma);
5996
5997
if (size_left <= SZ_2M)
5998
break;
5999
6000
pos += SZ_2M;
6001
addr += SZ_2M;
6002
size_left -= SZ_2M;
6003
}
6004
6005
/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6006
* using the compute ctx ASID, if exists. If not, use the kernel ctx
6007
* ASID
6008
*/
6009
WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6010
~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6011
6012
WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6013
6014
out:
6015
hdev->asic_funcs->hw_queues_unlock(hdev);
6016
6017
hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6018
6019
return rc;
6020
}
6021
6022
static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6023
{
6024
struct gaudi_device *gaudi = hdev->asic_specific;
6025
6026
if (hdev->reset_info.hard_reset_pending)
6027
return U64_MAX;
6028
6029
return readq(hdev->pcie_bar[HBM_BAR_ID] +
6030
(addr - gaudi->hbm_bar_cur_addr));
6031
}
6032
6033
static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6034
{
6035
struct gaudi_device *gaudi = hdev->asic_specific;
6036
6037
if (hdev->reset_info.hard_reset_pending)
6038
return;
6039
6040
writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6041
(addr - gaudi->hbm_bar_cur_addr));
6042
}
6043
6044
void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6045
{
6046
/* mask to zero the MMBP and ASID bits */
6047
WREG32_AND(reg, ~0x7FF);
6048
WREG32_OR(reg, asid);
6049
}
6050
6051
static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6052
{
6053
struct gaudi_device *gaudi = hdev->asic_specific;
6054
6055
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6056
return;
6057
6058
if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6059
dev_crit(hdev->dev, "asid %u is too big\n", asid);
6060
return;
6061
}
6062
6063
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6064
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6065
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6066
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6067
gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6068
6069
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6070
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6071
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6072
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6073
gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6074
6075
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6076
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6077
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6078
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6079
gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6080
6081
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6082
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6083
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6084
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6085
gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6086
6087
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6088
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6089
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6090
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6091
gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6092
6093
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6094
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6095
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6096
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6097
gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6098
6099
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6100
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6101
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6102
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6103
gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6104
6105
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6106
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6107
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6108
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6109
gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6110
6111
gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6112
gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6113
gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6114
gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6115
gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6116
gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6117
gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6118
gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6119
6120
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6121
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6122
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6123
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6124
gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6125
gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6126
gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6127
6128
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6129
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6130
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6131
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6132
gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6133
gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6134
gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6135
6136
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6137
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6138
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6139
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6140
gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6141
gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6142
gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6143
6144
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6145
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6146
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6147
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6148
gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6149
gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6150
gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6151
6152
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6153
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6154
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6155
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6156
gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6157
gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6158
gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6159
6160
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6161
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6162
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6163
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6164
gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6165
gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6166
gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6167
6168
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6169
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6170
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6171
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6172
gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6173
gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6174
gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6175
6176
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6177
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6178
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6179
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6180
gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6181
gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6182
gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6183
6184
gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6185
gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6186
gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6187
gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6188
gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6189
gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6190
gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6191
gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6192
gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6193
gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6194
6195
gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6196
gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6197
gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6198
gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6199
gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6200
gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6201
gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6202
gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6203
gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6204
gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6205
gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6206
gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6207
6208
if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6209
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6210
asid);
6211
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6212
asid);
6213
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6214
asid);
6215
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6216
asid);
6217
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6218
asid);
6219
}
6220
6221
if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6222
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6223
asid);
6224
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6225
asid);
6226
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6227
asid);
6228
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6229
asid);
6230
gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6231
asid);
6232
}
6233
6234
if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6235
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6236
asid);
6237
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6238
asid);
6239
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6240
asid);
6241
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6242
asid);
6243
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6244
asid);
6245
}
6246
6247
if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6248
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6249
asid);
6250
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6251
asid);
6252
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6253
asid);
6254
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6255
asid);
6256
gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6257
asid);
6258
}
6259
6260
if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6261
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6262
asid);
6263
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6264
asid);
6265
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6266
asid);
6267
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6268
asid);
6269
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6270
asid);
6271
}
6272
6273
if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6274
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6275
asid);
6276
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6277
asid);
6278
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6279
asid);
6280
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6281
asid);
6282
gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6283
asid);
6284
}
6285
6286
if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6287
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6288
asid);
6289
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6290
asid);
6291
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6292
asid);
6293
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6294
asid);
6295
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6296
asid);
6297
}
6298
6299
if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6300
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6301
asid);
6302
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6303
asid);
6304
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6305
asid);
6306
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6307
asid);
6308
gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6309
asid);
6310
}
6311
6312
if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6313
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6314
asid);
6315
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6316
asid);
6317
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6318
asid);
6319
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6320
asid);
6321
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6322
asid);
6323
}
6324
6325
if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6326
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6327
asid);
6328
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6329
asid);
6330
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6331
asid);
6332
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6333
asid);
6334
gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6335
asid);
6336
}
6337
6338
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6339
gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6340
}
6341
6342
static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6343
struct hl_cs_job *job)
6344
{
6345
struct packet_msg_prot *fence_pkt;
6346
u32 *fence_ptr;
6347
dma_addr_t fence_dma_addr;
6348
struct hl_cb *cb;
6349
u32 tmp, timeout, dma_offset;
6350
int rc;
6351
6352
if (hdev->pldm)
6353
timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6354
else
6355
timeout = HL_DEVICE_TIMEOUT_USEC;
6356
6357
fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6358
if (!fence_ptr) {
6359
dev_err(hdev->dev,
6360
"Failed to allocate fence memory for QMAN0\n");
6361
return -ENOMEM;
6362
}
6363
6364
cb = job->patched_cb;
6365
6366
fence_pkt = cb->kernel_address +
6367
job->job_cb_size - sizeof(struct packet_msg_prot);
6368
6369
tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6370
tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6371
tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6372
6373
fence_pkt->ctl = cpu_to_le32(tmp);
6374
fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6375
fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6376
6377
dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6378
6379
WREG32(mmDMA0_CORE_PROT + dma_offset,
6380
BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6381
6382
rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6383
job->job_cb_size, cb->bus_address);
6384
if (rc) {
6385
dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6386
goto free_fence_ptr;
6387
}
6388
6389
rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6390
(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6391
timeout, true);
6392
6393
hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6394
6395
if (rc == -ETIMEDOUT) {
6396
dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6397
goto free_fence_ptr;
6398
}
6399
6400
free_fence_ptr:
6401
WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6402
6403
hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6404
return rc;
6405
}
6406
6407
static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6408
{
6409
if (event_type >= GAUDI_EVENT_SIZE)
6410
goto event_not_supported;
6411
6412
if (!gaudi_irq_map_table[event_type].valid)
6413
goto event_not_supported;
6414
6415
snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6416
6417
return;
6418
6419
event_not_supported:
6420
snprintf(desc, size, "N/A");
6421
}
6422
6423
static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6424
bool is_write, u16 *engine_id_1,
6425
u16 *engine_id_2)
6426
{
6427
u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6428
6429
mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6430
DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6431
6432
switch (x_y) {
6433
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6434
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6435
dma_id[0] = 0;
6436
dma_id[1] = 2;
6437
break;
6438
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6439
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6440
dma_id[0] = 1;
6441
dma_id[1] = 3;
6442
break;
6443
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6444
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6445
dma_id[0] = 4;
6446
dma_id[1] = 6;
6447
break;
6448
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6449
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6450
dma_id[0] = 5;
6451
dma_id[1] = 7;
6452
break;
6453
default:
6454
goto unknown_initiator;
6455
}
6456
6457
for (i = 0 ; i < 2 ; i++) {
6458
dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6459
err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6460
}
6461
6462
switch (x_y) {
6463
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6464
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6465
if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6466
*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6467
return "DMA0";
6468
} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6469
*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6470
return "DMA2";
6471
} else {
6472
*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6473
*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6474
return "DMA0 or DMA2";
6475
}
6476
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6477
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6478
if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6479
*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6480
return "DMA1";
6481
} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6482
*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6483
return "DMA3";
6484
} else {
6485
*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6486
*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6487
return "DMA1 or DMA3";
6488
}
6489
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6490
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6491
if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6492
*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6493
return "DMA4";
6494
} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6495
*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6496
return "DMA6";
6497
} else {
6498
*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6499
*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6500
return "DMA4 or DMA6";
6501
}
6502
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6503
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6504
if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6505
*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6506
return "DMA5";
6507
} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6508
*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6509
return "DMA7";
6510
} else {
6511
*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6512
*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6513
return "DMA5 or DMA7";
6514
}
6515
}
6516
6517
unknown_initiator:
6518
return "unknown initiator";
6519
}
6520
6521
static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6522
u16 *engine_id_1, u16 *engine_id_2)
6523
{
6524
u32 val, x_y, axi_id;
6525
6526
val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6527
RREG32(mmMMU_UP_RAZWI_READ_ID);
6528
x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6529
(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6530
axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6531
RAZWI_INITIATOR_AXI_ID_SHIFT);
6532
6533
switch (x_y) {
6534
case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6535
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6536
*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6537
return "TPC0";
6538
}
6539
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6540
*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6541
return "NIC0";
6542
}
6543
break;
6544
case RAZWI_INITIATOR_ID_X_Y_TPC1:
6545
*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6546
return "TPC1";
6547
case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6548
case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6549
*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6550
return "MME0";
6551
case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6552
case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6553
*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6554
return "MME1";
6555
case RAZWI_INITIATOR_ID_X_Y_TPC2:
6556
*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6557
return "TPC2";
6558
case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6559
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6560
*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6561
return "TPC3";
6562
}
6563
/* PCI, CPU or PSOC does not have engine id*/
6564
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6565
return "PCI";
6566
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6567
return "CPU";
6568
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6569
return "PSOC";
6570
break;
6571
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6572
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6573
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6574
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6575
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6576
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6577
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6578
case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6579
return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6580
engine_id_1, engine_id_2);
6581
case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6582
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6583
*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6584
return "TPC4";
6585
}
6586
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6587
*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6588
return "NIC1";
6589
}
6590
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6591
*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6592
return "NIC2";
6593
}
6594
break;
6595
case RAZWI_INITIATOR_ID_X_Y_TPC5:
6596
*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6597
return "TPC5";
6598
case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6599
case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6600
*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6601
return "MME2";
6602
case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6603
case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6604
*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6605
return "MME3";
6606
case RAZWI_INITIATOR_ID_X_Y_TPC6:
6607
*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6608
return "TPC6";
6609
case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6610
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6611
*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6612
return "TPC7";
6613
}
6614
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6615
*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6616
return "NIC4";
6617
}
6618
if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6619
*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6620
return "NIC5";
6621
}
6622
break;
6623
default:
6624
break;
6625
}
6626
6627
dev_err(hdev->dev,
6628
"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6629
val,
6630
(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6631
(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6632
(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6633
RAZWI_INITIATOR_AXI_ID_MASK);
6634
6635
return "unknown initiator";
6636
}
6637
6638
static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6639
u16 *engine_id_2, bool *is_read, bool *is_write)
6640
{
6641
6642
if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6643
dev_err_ratelimited(hdev->dev,
6644
"RAZWI event caused by illegal write of %s\n",
6645
gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6646
WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6647
*is_write = true;
6648
}
6649
6650
if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6651
dev_err_ratelimited(hdev->dev,
6652
"RAZWI event caused by illegal read of %s\n",
6653
gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6654
WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6655
*is_read = true;
6656
}
6657
}
6658
6659
static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6660
{
6661
struct gaudi_device *gaudi = hdev->asic_specific;
6662
u32 val;
6663
6664
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6665
return;
6666
6667
val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6668
if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6669
*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6670
*addr <<= 32;
6671
*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6672
6673
dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6674
hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6675
6676
WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6677
}
6678
6679
val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6680
if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6681
*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6682
*addr <<= 32;
6683
*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6684
6685
dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6686
6687
WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6688
}
6689
}
6690
6691
/*
6692
* +-------------------+------------------------------------------------------+
6693
* | Configuration Reg | Description |
6694
* | Address | |
6695
* +-------------------+------------------------------------------------------+
6696
* | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6697
* | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6698
* | |0xF34 memory wrappers 63:32 |
6699
* | |0xF38 memory wrappers 95:64 |
6700
* | |0xF3C memory wrappers 127:96 |
6701
* +-------------------+------------------------------------------------------+
6702
* | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6703
* | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6704
* | |0xF44 memory wrappers 63:32 |
6705
* | |0xF48 memory wrappers 95:64 |
6706
* | |0xF4C memory wrappers 127:96 |
6707
* +-------------------+------------------------------------------------------+
6708
*/
6709
static int gaudi_extract_ecc_info(struct hl_device *hdev,
6710
struct ecc_info_extract_params *params, u64 *ecc_address,
6711
u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6712
{
6713
u32 i, num_mem_regs, reg, err_bit;
6714
u64 err_addr, err_word = 0;
6715
6716
num_mem_regs = params->num_memories / 32 +
6717
((params->num_memories % 32) ? 1 : 0);
6718
6719
if (params->block_address >= CFG_BASE)
6720
params->block_address -= CFG_BASE;
6721
6722
if (params->derr)
6723
err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6724
else
6725
err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6726
6727
/* Set invalid wrapper index */
6728
*memory_wrapper_idx = 0xFF;
6729
6730
/* Iterate through memory wrappers, a single bit must be set */
6731
for (i = 0 ; i < num_mem_regs ; i++) {
6732
err_addr += i * 4;
6733
err_word = RREG32(err_addr);
6734
if (err_word) {
6735
err_bit = __ffs(err_word);
6736
*memory_wrapper_idx = err_bit + (32 * i);
6737
break;
6738
}
6739
}
6740
6741
if (*memory_wrapper_idx == 0xFF) {
6742
dev_err(hdev->dev, "ECC error information cannot be found\n");
6743
return -EINVAL;
6744
}
6745
6746
WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6747
*memory_wrapper_idx);
6748
6749
*ecc_address =
6750
RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6751
*ecc_syndrom =
6752
RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6753
6754
/* Clear error indication */
6755
reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6756
if (params->derr)
6757
reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6758
else
6759
reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6760
6761
WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6762
6763
return 0;
6764
}
6765
6766
/*
6767
* gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6768
*
6769
* @idx: the current pi/ci value
6770
* @q_len: the queue length (power of 2)
6771
*
6772
* @return the cyclically decremented index
6773
*/
6774
static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6775
{
6776
u32 mask = q_len - 1;
6777
6778
/*
6779
* modular decrement is equivalent to adding (queue_size -1)
6780
* later we take LSBs to make sure the value is in the
6781
* range [0, queue_len - 1]
6782
*/
6783
return (idx + q_len - 1) & mask;
6784
}
6785
6786
/**
6787
* gaudi_handle_sw_config_stream_data - print SW config stream data
6788
*
6789
* @hdev: pointer to the habanalabs device structure
6790
* @stream: the QMAN's stream
6791
* @qman_base: base address of QMAN registers block
6792
* @event_mask: mask of the last events occurred
6793
*/
6794
static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6795
u64 qman_base, u64 event_mask)
6796
{
6797
u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6798
u32 cq_ptr_lo_off, size;
6799
6800
cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6801
6802
cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6803
stream * cq_ptr_lo_off;
6804
cq_ptr_hi = cq_ptr_lo +
6805
(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6806
cq_tsize = cq_ptr_lo +
6807
(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6808
6809
cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6810
size = RREG32(cq_tsize);
6811
dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6812
stream, cq_ptr, size);
6813
6814
if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6815
hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6816
hdev->captured_err_info.undef_opcode.cq_size = size;
6817
hdev->captured_err_info.undef_opcode.stream_id = stream;
6818
}
6819
}
6820
6821
/**
6822
* gaudi_handle_last_pqes_on_err - print last PQEs on error
6823
*
6824
* @hdev: pointer to the habanalabs device structure
6825
* @qid_base: first QID of the QMAN (out of 4 streams)
6826
* @stream: the QMAN's stream
6827
* @qman_base: base address of QMAN registers block
6828
* @event_mask: mask of the last events occurred
6829
* @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6830
*/
6831
static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6832
u32 stream, u64 qman_base,
6833
u64 event_mask,
6834
bool pr_sw_conf)
6835
{
6836
u32 ci, qm_ci_stream_off, queue_len;
6837
struct hl_hw_queue *q;
6838
u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6839
int i;
6840
6841
q = &hdev->kernel_queues[qid_base + stream];
6842
6843
qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6844
pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6845
stream * qm_ci_stream_off;
6846
6847
queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6848
q->int_queue_len : HL_QUEUE_LENGTH;
6849
6850
hdev->asic_funcs->hw_queues_lock(hdev);
6851
6852
if (pr_sw_conf)
6853
gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6854
6855
ci = RREG32(pq_ci);
6856
6857
/* we should start printing form ci -1 */
6858
ci = gaudi_queue_idx_dec(ci, queue_len);
6859
memset(addr, 0, sizeof(addr));
6860
6861
for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6862
struct hl_bd *bd;
6863
u32 len;
6864
6865
bd = q->kernel_address;
6866
bd += ci;
6867
6868
len = le32_to_cpu(bd->len);
6869
/* len 0 means uninitialized entry- break */
6870
if (!len)
6871
break;
6872
6873
addr[i] = le64_to_cpu(bd->ptr);
6874
6875
dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6876
stream, ci, addr[i], len);
6877
6878
/* get previous ci, wrap if needed */
6879
ci = gaudi_queue_idx_dec(ci, queue_len);
6880
}
6881
6882
if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6883
struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6884
u32 arr_idx = undef_opcode->cb_addr_streams_len;
6885
6886
if (arr_idx == 0) {
6887
undef_opcode->timestamp = ktime_get();
6888
undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6889
}
6890
6891
memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6892
undef_opcode->cb_addr_streams_len++;
6893
}
6894
6895
hdev->asic_funcs->hw_queues_unlock(hdev);
6896
}
6897
6898
/**
6899
* handle_qman_data_on_err - extract QMAN data on error
6900
*
6901
* @hdev: pointer to the habanalabs device structure
6902
* @qid_base: first QID of the QMAN (out of 4 streams)
6903
* @stream: the QMAN's stream
6904
* @qman_base: base address of QMAN registers block
6905
* @event_mask: mask of the last events occurred
6906
*
6907
* This function attempt to exatract as much data as possible on QMAN error.
6908
* On upper CP print the SW config stream data and last 8 PQEs.
6909
* On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6910
*/
6911
static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6912
u32 stream, u64 qman_base, u64 event_mask)
6913
{
6914
u32 i;
6915
6916
if (stream != QMAN_STREAMS) {
6917
gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6918
qman_base, event_mask, true);
6919
return;
6920
}
6921
6922
/* handle Lower-CP */
6923
gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6924
6925
for (i = 0; i < QMAN_STREAMS; i++)
6926
gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6927
qman_base, event_mask, false);
6928
}
6929
6930
static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6931
const char *qm_name,
6932
u64 qman_base,
6933
u32 qid_base,
6934
u64 *event_mask)
6935
{
6936
u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6937
u64 glbl_sts_addr, arb_err_addr;
6938
char reg_desc[32];
6939
6940
glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6941
arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6942
6943
/* Iterate through all stream GLBL_STS1 registers + Lower CP */
6944
for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6945
glbl_sts_clr_val = 0;
6946
glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6947
6948
if (!glbl_sts_val)
6949
continue;
6950
6951
if (i == QMAN_STREAMS)
6952
snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6953
else
6954
snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6955
6956
for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6957
if (glbl_sts_val & BIT(j)) {
6958
dev_err_ratelimited(hdev->dev,
6959
"%s %s. err cause: %s\n",
6960
qm_name, reg_desc,
6961
gaudi_qman_error_cause[j]);
6962
glbl_sts_clr_val |= BIT(j);
6963
}
6964
}
6965
/* check for undefined opcode */
6966
if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6967
hdev->captured_err_info.undef_opcode.write_enable) {
6968
memset(&hdev->captured_err_info.undef_opcode, 0,
6969
sizeof(hdev->captured_err_info.undef_opcode));
6970
6971
hdev->captured_err_info.undef_opcode.write_enable = false;
6972
*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6973
}
6974
6975
/* Write 1 clear errors */
6976
if (!hdev->stop_on_err)
6977
WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6978
else
6979
handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6980
}
6981
6982
arb_err_val = RREG32(arb_err_addr);
6983
6984
if (!arb_err_val)
6985
return;
6986
6987
for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6988
if (arb_err_val & BIT(j)) {
6989
dev_err_ratelimited(hdev->dev,
6990
"%s ARB_ERR. err cause: %s\n",
6991
qm_name,
6992
gaudi_qman_arb_error_cause[j]);
6993
}
6994
}
6995
}
6996
6997
static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
6998
struct hl_eq_sm_sei_data *sei_data)
6999
{
7000
u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7001
7002
/* Flip the bits as the enum is ordered in the opposite way */
7003
index = (index ^ 0x3) & 0x3;
7004
7005
switch (sei_data->sei_cause) {
7006
case SM_SEI_SO_OVERFLOW:
7007
dev_err_ratelimited(hdev->dev,
7008
"%s SEI Error: SOB Group %u overflow/underflow",
7009
gaudi_sync_manager_names[index],
7010
le32_to_cpu(sei_data->sei_log));
7011
break;
7012
case SM_SEI_LBW_4B_UNALIGNED:
7013
dev_err_ratelimited(hdev->dev,
7014
"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7015
gaudi_sync_manager_names[index],
7016
le32_to_cpu(sei_data->sei_log));
7017
break;
7018
case SM_SEI_AXI_RESPONSE_ERR:
7019
dev_err_ratelimited(hdev->dev,
7020
"%s SEI Error: AXI ID %u response error",
7021
gaudi_sync_manager_names[index],
7022
le32_to_cpu(sei_data->sei_log));
7023
break;
7024
default:
7025
dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7026
le32_to_cpu(sei_data->sei_log));
7027
break;
7028
}
7029
}
7030
7031
static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7032
struct hl_eq_ecc_data *ecc_data)
7033
{
7034
struct ecc_info_extract_params params;
7035
u64 ecc_address = 0, ecc_syndrom = 0;
7036
u8 index, memory_wrapper_idx = 0;
7037
bool extract_info_from_fw;
7038
int rc;
7039
7040
if (hdev->asic_prop.fw_security_enabled) {
7041
extract_info_from_fw = true;
7042
goto extract_ecc_info;
7043
}
7044
7045
switch (event_type) {
7046
case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7047
case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7048
extract_info_from_fw = true;
7049
break;
7050
case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7051
index = event_type - GAUDI_EVENT_TPC0_SERR;
7052
params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7053
params.num_memories = 90;
7054
params.derr = false;
7055
extract_info_from_fw = false;
7056
break;
7057
case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7058
index = event_type - GAUDI_EVENT_TPC0_DERR;
7059
params.block_address =
7060
mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7061
params.num_memories = 90;
7062
params.derr = true;
7063
extract_info_from_fw = false;
7064
break;
7065
case GAUDI_EVENT_MME0_ACC_SERR:
7066
case GAUDI_EVENT_MME1_ACC_SERR:
7067
case GAUDI_EVENT_MME2_ACC_SERR:
7068
case GAUDI_EVENT_MME3_ACC_SERR:
7069
index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7070
params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7071
params.num_memories = 128;
7072
params.derr = false;
7073
extract_info_from_fw = false;
7074
break;
7075
case GAUDI_EVENT_MME0_ACC_DERR:
7076
case GAUDI_EVENT_MME1_ACC_DERR:
7077
case GAUDI_EVENT_MME2_ACC_DERR:
7078
case GAUDI_EVENT_MME3_ACC_DERR:
7079
index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7080
params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7081
params.num_memories = 128;
7082
params.derr = true;
7083
extract_info_from_fw = false;
7084
break;
7085
case GAUDI_EVENT_MME0_SBAB_SERR:
7086
case GAUDI_EVENT_MME1_SBAB_SERR:
7087
case GAUDI_EVENT_MME2_SBAB_SERR:
7088
case GAUDI_EVENT_MME3_SBAB_SERR:
7089
index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7090
params.block_address =
7091
mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7092
params.num_memories = 33;
7093
params.derr = false;
7094
extract_info_from_fw = false;
7095
break;
7096
case GAUDI_EVENT_MME0_SBAB_DERR:
7097
case GAUDI_EVENT_MME1_SBAB_DERR:
7098
case GAUDI_EVENT_MME2_SBAB_DERR:
7099
case GAUDI_EVENT_MME3_SBAB_DERR:
7100
index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7101
params.block_address =
7102
mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7103
params.num_memories = 33;
7104
params.derr = true;
7105
extract_info_from_fw = false;
7106
break;
7107
default:
7108
return;
7109
}
7110
7111
extract_ecc_info:
7112
if (extract_info_from_fw) {
7113
ecc_address = le64_to_cpu(ecc_data->ecc_address);
7114
ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7115
memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7116
} else {
7117
rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7118
&ecc_syndrom, &memory_wrapper_idx);
7119
if (rc)
7120
return;
7121
}
7122
7123
dev_err(hdev->dev,
7124
"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7125
ecc_address, ecc_syndrom, memory_wrapper_idx);
7126
}
7127
7128
static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7129
{
7130
u64 qman_base;
7131
char desc[32];
7132
u32 qid_base;
7133
u8 index;
7134
7135
switch (event_type) {
7136
case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7137
index = event_type - GAUDI_EVENT_TPC0_QM;
7138
qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7139
qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7140
snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7141
break;
7142
case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7143
if (event_type == GAUDI_EVENT_MME0_QM) {
7144
index = 0;
7145
qid_base = GAUDI_QUEUE_ID_MME_0_0;
7146
} else { /* event_type == GAUDI_EVENT_MME2_QM */
7147
index = 2;
7148
qid_base = GAUDI_QUEUE_ID_MME_1_0;
7149
}
7150
qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7151
snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7152
break;
7153
case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7154
index = event_type - GAUDI_EVENT_DMA0_QM;
7155
qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7156
/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7157
if (index > 1)
7158
qid_base++;
7159
qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7160
snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7161
break;
7162
case GAUDI_EVENT_NIC0_QM0:
7163
qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7164
qman_base = mmNIC0_QM0_BASE;
7165
snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7166
break;
7167
case GAUDI_EVENT_NIC0_QM1:
7168
qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7169
qman_base = mmNIC0_QM1_BASE;
7170
snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7171
break;
7172
case GAUDI_EVENT_NIC1_QM0:
7173
qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7174
qman_base = mmNIC1_QM0_BASE;
7175
snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7176
break;
7177
case GAUDI_EVENT_NIC1_QM1:
7178
qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7179
qman_base = mmNIC1_QM1_BASE;
7180
snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7181
break;
7182
case GAUDI_EVENT_NIC2_QM0:
7183
qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7184
qman_base = mmNIC2_QM0_BASE;
7185
snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7186
break;
7187
case GAUDI_EVENT_NIC2_QM1:
7188
qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7189
qman_base = mmNIC2_QM1_BASE;
7190
snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7191
break;
7192
case GAUDI_EVENT_NIC3_QM0:
7193
qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7194
qman_base = mmNIC3_QM0_BASE;
7195
snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7196
break;
7197
case GAUDI_EVENT_NIC3_QM1:
7198
qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7199
qman_base = mmNIC3_QM1_BASE;
7200
snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7201
break;
7202
case GAUDI_EVENT_NIC4_QM0:
7203
qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7204
qman_base = mmNIC4_QM0_BASE;
7205
snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7206
break;
7207
case GAUDI_EVENT_NIC4_QM1:
7208
qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7209
qman_base = mmNIC4_QM1_BASE;
7210
snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7211
break;
7212
default:
7213
return;
7214
}
7215
7216
gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7217
}
7218
7219
static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7220
bool check_razwi, u64 *event_mask)
7221
{
7222
bool is_read = false, is_write = false;
7223
u16 engine_id[2], num_of_razwi_eng = 0;
7224
char desc[64] = "";
7225
u64 razwi_addr = 0;
7226
u8 razwi_flags = 0;
7227
7228
/*
7229
* Init engine id by default as not valid and only if razwi initiated from engine with
7230
* engine id it will get valid value.
7231
*/
7232
engine_id[0] = HL_RAZWI_NA_ENG_ID;
7233
engine_id[1] = HL_RAZWI_NA_ENG_ID;
7234
7235
gaudi_get_event_desc(event_type, desc, sizeof(desc));
7236
dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7237
event_type, desc);
7238
7239
if (check_razwi) {
7240
gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7241
&is_write);
7242
gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7243
7244
if (is_read)
7245
razwi_flags |= HL_RAZWI_READ;
7246
if (is_write)
7247
razwi_flags |= HL_RAZWI_WRITE;
7248
7249
if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7250
if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7251
num_of_razwi_eng = 2;
7252
else
7253
num_of_razwi_eng = 1;
7254
}
7255
7256
if (razwi_flags)
7257
hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7258
razwi_flags, event_mask);
7259
}
7260
}
7261
7262
static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7263
struct cpucp_pkt_sync_err *sync_err)
7264
{
7265
struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7266
7267
dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7268
le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7269
}
7270
7271
static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7272
struct hl_eq_fw_alive *fw_alive)
7273
{
7274
dev_err(hdev->dev,
7275
"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7276
(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7277
le32_to_cpu(fw_alive->process_id),
7278
le32_to_cpu(fw_alive->thread_id),
7279
le64_to_cpu(fw_alive->uptime_seconds));
7280
}
7281
7282
static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7283
void *data)
7284
{
7285
char desc[64] = "", *type;
7286
struct eq_nic_sei_event *eq_nic_sei = data;
7287
u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7288
7289
switch (eq_nic_sei->axi_error_cause) {
7290
case RXB:
7291
type = "RXB";
7292
break;
7293
case RXE:
7294
type = "RXE";
7295
break;
7296
case TXS:
7297
type = "TXS";
7298
break;
7299
case TXE:
7300
type = "TXE";
7301
break;
7302
case QPC_RESP:
7303
type = "QPC_RESP";
7304
break;
7305
case NON_AXI_ERR:
7306
type = "NON_AXI_ERR";
7307
break;
7308
case TMR:
7309
type = "TMR";
7310
break;
7311
default:
7312
dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7313
eq_nic_sei->axi_error_cause);
7314
type = "N/A";
7315
break;
7316
}
7317
7318
snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7319
eq_nic_sei->id);
7320
dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7321
event_type, desc);
7322
}
7323
7324
static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7325
{
7326
/* GAUDI doesn't support any reset except hard-reset */
7327
return -EPERM;
7328
}
7329
7330
static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7331
struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7332
{
7333
u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7334
int rc = 0;
7335
7336
if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7337
CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7338
if (!hbm_ecc_data) {
7339
dev_err(hdev->dev, "No FW ECC data");
7340
return 0;
7341
}
7342
7343
wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7344
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7345
rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7346
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7347
ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7348
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7349
derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7350
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7351
serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7352
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7353
type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7354
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7355
ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7356
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7357
7358
dev_err(hdev->dev,
7359
"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7360
device, ch, wr_par, rd_par, ca_par, serr, derr);
7361
dev_err(hdev->dev,
7362
"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7363
device, ch, hbm_ecc_data->first_addr, type,
7364
hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7365
hbm_ecc_data->dec_cnt);
7366
return 0;
7367
}
7368
7369
if (hdev->asic_prop.fw_security_enabled) {
7370
dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7371
return 0;
7372
}
7373
7374
base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7375
for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7376
val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7377
val = (val & 0xFF) | ((val >> 8) & 0xFF);
7378
if (val) {
7379
rc = -EIO;
7380
dev_err(hdev->dev,
7381
"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7382
device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7383
(val >> 2) & 0x1, (val >> 3) & 0x1,
7384
(val >> 4) & 0x1);
7385
7386
val2 = RREG32(base + ch * 0x1000 + 0x060);
7387
dev_err(hdev->dev,
7388
"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7389
device, ch * 2,
7390
RREG32(base + ch * 0x1000 + 0x064),
7391
(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7392
(val2 & 0xFF0000) >> 16,
7393
(val2 & 0xFF000000) >> 24);
7394
}
7395
7396
val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7397
val = (val & 0xFF) | ((val >> 8) & 0xFF);
7398
if (val) {
7399
rc = -EIO;
7400
dev_err(hdev->dev,
7401
"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7402
device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7403
(val >> 2) & 0x1, (val >> 3) & 0x1,
7404
(val >> 4) & 0x1);
7405
7406
val2 = RREG32(base + ch * 0x1000 + 0x070);
7407
dev_err(hdev->dev,
7408
"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7409
device, ch * 2 + 1,
7410
RREG32(base + ch * 0x1000 + 0x074),
7411
(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7412
(val2 & 0xFF0000) >> 16,
7413
(val2 & 0xFF000000) >> 24);
7414
}
7415
7416
/* Clear interrupts */
7417
RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7418
RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7419
WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7420
WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7421
RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7422
RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7423
}
7424
7425
val = RREG32(base + 0x8F30);
7426
val2 = RREG32(base + 0x8F34);
7427
if (val | val2) {
7428
rc = -EIO;
7429
dev_err(hdev->dev,
7430
"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7431
device, val, val2);
7432
}
7433
val = RREG32(base + 0x8F40);
7434
val2 = RREG32(base + 0x8F44);
7435
if (val | val2) {
7436
rc = -EIO;
7437
dev_err(hdev->dev,
7438
"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7439
device, val, val2);
7440
}
7441
7442
return rc;
7443
}
7444
7445
static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7446
{
7447
switch (hbm_event_type) {
7448
case GAUDI_EVENT_HBM0_SPI_0:
7449
case GAUDI_EVENT_HBM0_SPI_1:
7450
return 0;
7451
case GAUDI_EVENT_HBM1_SPI_0:
7452
case GAUDI_EVENT_HBM1_SPI_1:
7453
return 1;
7454
case GAUDI_EVENT_HBM2_SPI_0:
7455
case GAUDI_EVENT_HBM2_SPI_1:
7456
return 2;
7457
case GAUDI_EVENT_HBM3_SPI_0:
7458
case GAUDI_EVENT_HBM3_SPI_1:
7459
return 3;
7460
default:
7461
break;
7462
}
7463
7464
/* Should never happen */
7465
return 0;
7466
}
7467
7468
static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7469
char *interrupt_name)
7470
{
7471
u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7472
bool soft_reset_required = false;
7473
7474
tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7475
TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7476
7477
for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7478
if (tpc_interrupts_cause & BIT(i)) {
7479
dev_err_ratelimited(hdev->dev,
7480
"TPC%d_%s interrupt cause: %s\n",
7481
tpc_id, interrupt_name,
7482
gaudi_tpc_interrupts_cause[i]);
7483
/* If this is QM error, we need to soft-reset */
7484
if (i == 15)
7485
soft_reset_required = true;
7486
}
7487
7488
/* Clear interrupts */
7489
WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7490
7491
return soft_reset_required;
7492
}
7493
7494
static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7495
{
7496
return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7497
}
7498
7499
static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7500
{
7501
return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7502
}
7503
7504
static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7505
{
7506
ktime_t zero_time = ktime_set(0, 0);
7507
7508
mutex_lock(&hdev->clk_throttling.lock);
7509
7510
switch (event_type) {
7511
case GAUDI_EVENT_FIX_POWER_ENV_S:
7512
hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7513
hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7514
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7515
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7516
dev_info_ratelimited(hdev->dev,
7517
"Clock throttling due to power consumption\n");
7518
break;
7519
7520
case GAUDI_EVENT_FIX_POWER_ENV_E:
7521
hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7522
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7523
dev_info_ratelimited(hdev->dev,
7524
"Power envelop is safe, back to optimal clock\n");
7525
break;
7526
7527
case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7528
hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7529
hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7530
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7531
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7532
*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7533
dev_info_ratelimited(hdev->dev,
7534
"Clock throttling due to overheating\n");
7535
break;
7536
7537
case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7538
hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7539
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7540
*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7541
dev_info_ratelimited(hdev->dev,
7542
"Thermal envelop is safe, back to optimal clock\n");
7543
break;
7544
7545
default:
7546
dev_err(hdev->dev, "Received invalid clock change event %d\n",
7547
event_type);
7548
break;
7549
}
7550
7551
mutex_unlock(&hdev->clk_throttling.lock);
7552
}
7553
7554
static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7555
{
7556
struct gaudi_device *gaudi = hdev->asic_specific;
7557
struct hl_info_fw_err_info fw_err_info;
7558
u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7559
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7560
u32 fw_fatal_err_flag = 0, flags = 0;
7561
u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7562
>> EQ_CTL_EVENT_TYPE_SHIFT);
7563
bool reset_required, reset_direct = false;
7564
u8 cause;
7565
int rc;
7566
7567
if (event_type >= GAUDI_EVENT_SIZE) {
7568
dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7569
event_type, GAUDI_EVENT_SIZE - 1);
7570
return;
7571
}
7572
7573
gaudi->events_stat[event_type]++;
7574
gaudi->events_stat_aggregate[event_type]++;
7575
7576
switch (event_type) {
7577
case GAUDI_EVENT_PCIE_CORE_DERR:
7578
case GAUDI_EVENT_PCIE_IF_DERR:
7579
case GAUDI_EVENT_PCIE_PHY_DERR:
7580
case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7581
case GAUDI_EVENT_MME0_ACC_DERR:
7582
case GAUDI_EVENT_MME0_SBAB_DERR:
7583
case GAUDI_EVENT_MME1_ACC_DERR:
7584
case GAUDI_EVENT_MME1_SBAB_DERR:
7585
case GAUDI_EVENT_MME2_ACC_DERR:
7586
case GAUDI_EVENT_MME2_SBAB_DERR:
7587
case GAUDI_EVENT_MME3_ACC_DERR:
7588
case GAUDI_EVENT_MME3_SBAB_DERR:
7589
case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7590
fallthrough;
7591
case GAUDI_EVENT_CPU_IF_ECC_DERR:
7592
case GAUDI_EVENT_PSOC_MEM_DERR:
7593
case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7594
case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7595
case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7596
case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7597
case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7598
case GAUDI_EVENT_MMU_DERR:
7599
case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7600
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7601
gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7602
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7603
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7604
goto reset_device;
7605
7606
case GAUDI_EVENT_GIC500:
7607
case GAUDI_EVENT_AXI_ECC:
7608
case GAUDI_EVENT_L2_RAM_ECC:
7609
case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7610
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7611
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7612
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7613
goto reset_device;
7614
7615
case GAUDI_EVENT_HBM0_SPI_0:
7616
case GAUDI_EVENT_HBM1_SPI_0:
7617
case GAUDI_EVENT_HBM2_SPI_0:
7618
case GAUDI_EVENT_HBM3_SPI_0:
7619
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7620
gaudi_hbm_read_interrupts(hdev,
7621
gaudi_hbm_event_to_dev(event_type),
7622
&eq_entry->hbm_ecc_data);
7623
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7624
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7625
goto reset_device;
7626
7627
case GAUDI_EVENT_HBM0_SPI_1:
7628
case GAUDI_EVENT_HBM1_SPI_1:
7629
case GAUDI_EVENT_HBM2_SPI_1:
7630
case GAUDI_EVENT_HBM3_SPI_1:
7631
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7632
gaudi_hbm_read_interrupts(hdev,
7633
gaudi_hbm_event_to_dev(event_type),
7634
&eq_entry->hbm_ecc_data);
7635
hl_fw_unmask_irq(hdev, event_type);
7636
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7637
break;
7638
7639
case GAUDI_EVENT_TPC0_DEC:
7640
case GAUDI_EVENT_TPC1_DEC:
7641
case GAUDI_EVENT_TPC2_DEC:
7642
case GAUDI_EVENT_TPC3_DEC:
7643
case GAUDI_EVENT_TPC4_DEC:
7644
case GAUDI_EVENT_TPC5_DEC:
7645
case GAUDI_EVENT_TPC6_DEC:
7646
case GAUDI_EVENT_TPC7_DEC:
7647
/* In TPC DEC event, notify on TPC assertion. While there isn't
7648
* a specific event for assertion yet, the FW generates TPC DEC event.
7649
* The SW upper layer will inspect an internal mapped area to indicate
7650
* if the event is a TPC Assertion or a "real" TPC DEC.
7651
*/
7652
event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7653
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7654
reset_required = gaudi_tpc_read_interrupts(hdev,
7655
tpc_dec_event_to_tpc_id(event_type),
7656
"AXI_SLV_DEC_Error");
7657
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7658
if (reset_required) {
7659
dev_err(hdev->dev, "reset required due to %s\n",
7660
gaudi_irq_map_table[event_type].name);
7661
7662
reset_direct = true;
7663
goto reset_device;
7664
} else {
7665
hl_fw_unmask_irq(hdev, event_type);
7666
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7667
}
7668
break;
7669
7670
case GAUDI_EVENT_TPC0_KRN_ERR:
7671
case GAUDI_EVENT_TPC1_KRN_ERR:
7672
case GAUDI_EVENT_TPC2_KRN_ERR:
7673
case GAUDI_EVENT_TPC3_KRN_ERR:
7674
case GAUDI_EVENT_TPC4_KRN_ERR:
7675
case GAUDI_EVENT_TPC5_KRN_ERR:
7676
case GAUDI_EVENT_TPC6_KRN_ERR:
7677
case GAUDI_EVENT_TPC7_KRN_ERR:
7678
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7679
reset_required = gaudi_tpc_read_interrupts(hdev,
7680
tpc_krn_event_to_tpc_id(event_type),
7681
"KRN_ERR");
7682
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7683
if (reset_required) {
7684
dev_err(hdev->dev, "reset required due to %s\n",
7685
gaudi_irq_map_table[event_type].name);
7686
7687
reset_direct = true;
7688
goto reset_device;
7689
} else {
7690
hl_fw_unmask_irq(hdev, event_type);
7691
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7692
}
7693
break;
7694
7695
case GAUDI_EVENT_PCIE_CORE_SERR:
7696
case GAUDI_EVENT_PCIE_IF_SERR:
7697
case GAUDI_EVENT_PCIE_PHY_SERR:
7698
case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7699
case GAUDI_EVENT_MME0_ACC_SERR:
7700
case GAUDI_EVENT_MME0_SBAB_SERR:
7701
case GAUDI_EVENT_MME1_ACC_SERR:
7702
case GAUDI_EVENT_MME1_SBAB_SERR:
7703
case GAUDI_EVENT_MME2_ACC_SERR:
7704
case GAUDI_EVENT_MME2_SBAB_SERR:
7705
case GAUDI_EVENT_MME3_ACC_SERR:
7706
case GAUDI_EVENT_MME3_SBAB_SERR:
7707
case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7708
case GAUDI_EVENT_CPU_IF_ECC_SERR:
7709
case GAUDI_EVENT_PSOC_MEM_SERR:
7710
case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7711
case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7712
case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7713
case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7714
case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7715
fallthrough;
7716
case GAUDI_EVENT_MMU_SERR:
7717
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7718
gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7719
hl_fw_unmask_irq(hdev, event_type);
7720
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7721
break;
7722
7723
case GAUDI_EVENT_PCIE_DEC:
7724
case GAUDI_EVENT_CPU_AXI_SPLITTER:
7725
case GAUDI_EVENT_PSOC_AXI_DEC:
7726
case GAUDI_EVENT_PSOC_PRSTN_FALL:
7727
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7728
hl_fw_unmask_irq(hdev, event_type);
7729
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7730
break;
7731
7732
case GAUDI_EVENT_MMU_PAGE_FAULT:
7733
case GAUDI_EVENT_MMU_WR_PERM:
7734
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7735
hl_fw_unmask_irq(hdev, event_type);
7736
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7737
break;
7738
7739
case GAUDI_EVENT_MME0_WBC_RSP:
7740
case GAUDI_EVENT_MME0_SBAB0_RSP:
7741
case GAUDI_EVENT_MME1_WBC_RSP:
7742
case GAUDI_EVENT_MME1_SBAB0_RSP:
7743
case GAUDI_EVENT_MME2_WBC_RSP:
7744
case GAUDI_EVENT_MME2_SBAB0_RSP:
7745
case GAUDI_EVENT_MME3_WBC_RSP:
7746
case GAUDI_EVENT_MME3_SBAB0_RSP:
7747
case GAUDI_EVENT_RAZWI_OR_ADC:
7748
case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7749
case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7750
fallthrough;
7751
case GAUDI_EVENT_NIC0_QM0:
7752
case GAUDI_EVENT_NIC0_QM1:
7753
case GAUDI_EVENT_NIC1_QM0:
7754
case GAUDI_EVENT_NIC1_QM1:
7755
case GAUDI_EVENT_NIC2_QM0:
7756
case GAUDI_EVENT_NIC2_QM1:
7757
case GAUDI_EVENT_NIC3_QM0:
7758
case GAUDI_EVENT_NIC3_QM1:
7759
case GAUDI_EVENT_NIC4_QM0:
7760
case GAUDI_EVENT_NIC4_QM1:
7761
case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7762
case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7763
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7764
gaudi_handle_qman_err(hdev, event_type, &event_mask);
7765
hl_fw_unmask_irq(hdev, event_type);
7766
event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7767
break;
7768
7769
case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7770
gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7771
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7772
goto reset_device;
7773
7774
case GAUDI_EVENT_TPC0_BMON_SPMU:
7775
case GAUDI_EVENT_TPC1_BMON_SPMU:
7776
case GAUDI_EVENT_TPC2_BMON_SPMU:
7777
case GAUDI_EVENT_TPC3_BMON_SPMU:
7778
case GAUDI_EVENT_TPC4_BMON_SPMU:
7779
case GAUDI_EVENT_TPC5_BMON_SPMU:
7780
case GAUDI_EVENT_TPC6_BMON_SPMU:
7781
case GAUDI_EVENT_TPC7_BMON_SPMU:
7782
case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7783
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7784
hl_fw_unmask_irq(hdev, event_type);
7785
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7786
break;
7787
7788
case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7789
gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7790
hl_fw_unmask_irq(hdev, event_type);
7791
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7792
break;
7793
7794
case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7795
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7796
gaudi_print_sm_sei_info(hdev, event_type,
7797
&eq_entry->sm_sei_data);
7798
rc = hl_state_dump(hdev);
7799
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7800
if (rc)
7801
dev_err(hdev->dev,
7802
"Error during system state dump %d\n", rc);
7803
hl_fw_unmask_irq(hdev, event_type);
7804
break;
7805
7806
case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7807
break;
7808
7809
case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7810
gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7811
hl_fw_unmask_irq(hdev, event_type);
7812
break;
7813
7814
case GAUDI_EVENT_PSOC_GPIO_U16_0:
7815
cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7816
dev_err(hdev->dev,
7817
"Received high temp H/W interrupt %d (cause %d)\n",
7818
event_type, cause);
7819
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7820
break;
7821
7822
case GAUDI_EVENT_DEV_RESET_REQ:
7823
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7824
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7825
goto reset_device;
7826
7827
case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7828
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7829
gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7830
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7831
goto reset_device;
7832
7833
case GAUDI_EVENT_FW_ALIVE_S:
7834
gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7835
gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7836
fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7837
fw_err_info.event_id = event_type;
7838
fw_err_info.event_mask = &event_mask;
7839
hl_handle_fw_err(hdev, &fw_err_info);
7840
goto reset_device;
7841
7842
default:
7843
dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7844
event_type);
7845
break;
7846
}
7847
7848
if (event_mask)
7849
hl_notifier_event_send_all(hdev, event_mask);
7850
7851
return;
7852
7853
reset_device:
7854
reset_required = true;
7855
7856
if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7857
flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7858
7859
/* notify on device unavailable while the reset triggered by fw */
7860
event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7861
HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7862
} else if (hdev->hard_reset_on_fw_events) {
7863
flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7864
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7865
} else {
7866
reset_required = false;
7867
}
7868
7869
if (reset_required) {
7870
/* escalate general hw errors to critical/fatal error */
7871
if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7872
hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7873
7874
hl_device_cond_reset(hdev, flags, event_mask);
7875
} else {
7876
hl_fw_unmask_irq(hdev, event_type);
7877
/* Notification on occurred event needs to be sent although reset is not executed */
7878
if (event_mask)
7879
hl_notifier_event_send_all(hdev, event_mask);
7880
}
7881
}
7882
7883
static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7884
{
7885
struct gaudi_device *gaudi = hdev->asic_specific;
7886
7887
if (aggregate) {
7888
*size = (u32) sizeof(gaudi->events_stat_aggregate);
7889
return gaudi->events_stat_aggregate;
7890
}
7891
7892
*size = (u32) sizeof(gaudi->events_stat);
7893
return gaudi->events_stat;
7894
}
7895
7896
static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7897
{
7898
struct gaudi_device *gaudi = hdev->asic_specific;
7899
u32 status, timeout_usec;
7900
int rc;
7901
7902
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7903
hdev->reset_info.hard_reset_pending)
7904
return 0;
7905
7906
if (hdev->pldm)
7907
timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7908
else
7909
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7910
7911
/* L0 & L1 invalidation */
7912
WREG32(mmSTLB_INV_PS, 3);
7913
WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7914
WREG32(mmSTLB_INV_PS, 2);
7915
7916
rc = hl_poll_timeout(
7917
hdev,
7918
mmSTLB_INV_PS,
7919
status,
7920
!status,
7921
1000,
7922
timeout_usec);
7923
7924
WREG32(mmSTLB_INV_SET, 0);
7925
7926
return rc;
7927
}
7928
7929
static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7930
bool is_hard, u32 flags,
7931
u32 asid, u64 va, u64 size)
7932
{
7933
/* Treat as invalidate all because there is no range invalidation
7934
* in Gaudi
7935
*/
7936
return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7937
}
7938
7939
static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7940
{
7941
u32 status, timeout_usec;
7942
int rc;
7943
7944
if (hdev->pldm)
7945
timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7946
else
7947
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7948
7949
WREG32(MMU_ASID, asid);
7950
WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7951
WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7952
WREG32(MMU_BUSY, 0x80000000);
7953
7954
rc = hl_poll_timeout(
7955
hdev,
7956
MMU_BUSY,
7957
status,
7958
!(status & 0x80000000),
7959
1000,
7960
timeout_usec);
7961
7962
if (rc) {
7963
dev_err(hdev->dev,
7964
"Timeout during MMU hop0 config of asid %d\n", asid);
7965
return rc;
7966
}
7967
7968
return 0;
7969
}
7970
7971
static int gaudi_send_heartbeat(struct hl_device *hdev)
7972
{
7973
struct gaudi_device *gaudi = hdev->asic_specific;
7974
7975
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7976
return 0;
7977
7978
return hl_fw_send_heartbeat(hdev);
7979
}
7980
7981
static int gaudi_cpucp_info_get(struct hl_device *hdev)
7982
{
7983
struct gaudi_device *gaudi = hdev->asic_specific;
7984
struct asic_fixed_properties *prop = &hdev->asic_prop;
7985
int rc;
7986
7987
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7988
return 0;
7989
7990
rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
7991
mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
7992
mmCPU_BOOT_ERR1);
7993
if (rc)
7994
return rc;
7995
7996
if (!strlen(prop->cpucp_info.card_name))
7997
strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
7998
CARD_NAME_MAX_LEN);
7999
8000
hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8001
8002
set_default_power_values(hdev);
8003
8004
return 0;
8005
}
8006
8007
static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8008
struct engines_data *e)
8009
{
8010
struct gaudi_device *gaudi = hdev->asic_specific;
8011
const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8012
const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8013
const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8014
unsigned long *mask = (unsigned long *)mask_arr;
8015
u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8016
bool is_idle = true, is_eng_idle, is_slave;
8017
u64 offset;
8018
int i, dma_id, port;
8019
8020
if (e)
8021
hl_engine_data_sprintf(e,
8022
"\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8023
"--- ------- ------------ ---------- -------------\n");
8024
8025
for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8026
dma_id = gaudi_dma_assignment[i];
8027
offset = dma_id * DMA_QMAN_OFFSET;
8028
8029
qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8030
qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8031
dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8032
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8033
IS_DMA_IDLE(dma_core_sts0);
8034
is_idle &= is_eng_idle;
8035
8036
if (mask && !is_eng_idle)
8037
set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8038
if (e)
8039
hl_engine_data_sprintf(e, fmt, dma_id,
8040
is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8041
qm_cgm_sts, dma_core_sts0);
8042
}
8043
8044
if (e)
8045
hl_engine_data_sprintf(e,
8046
"\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8047
"--- ------- ------------ ---------- ----------\n");
8048
8049
for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8050
offset = i * TPC_QMAN_OFFSET;
8051
qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8052
qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8053
tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8054
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8055
IS_TPC_IDLE(tpc_cfg_sts);
8056
is_idle &= is_eng_idle;
8057
8058
if (mask && !is_eng_idle)
8059
set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8060
if (e)
8061
hl_engine_data_sprintf(e, fmt, i,
8062
is_eng_idle ? "Y" : "N",
8063
qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8064
}
8065
8066
if (e)
8067
hl_engine_data_sprintf(e,
8068
"\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8069
"--- ------- ------------ ---------- -----------\n");
8070
8071
for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8072
offset = i * MME_QMAN_OFFSET;
8073
mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8074
is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8075
8076
/* MME 1 & 3 are slaves, no need to check their QMANs */
8077
is_slave = i % 2;
8078
if (!is_slave) {
8079
qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8080
qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8081
is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8082
}
8083
8084
is_idle &= is_eng_idle;
8085
8086
if (mask && !is_eng_idle)
8087
set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8088
if (e) {
8089
if (!is_slave)
8090
hl_engine_data_sprintf(e, fmt, i,
8091
is_eng_idle ? "Y" : "N",
8092
qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8093
else
8094
hl_engine_data_sprintf(e, mme_slave_fmt, i,
8095
is_eng_idle ? "Y" : "N", "-",
8096
"-", mme_arch_sts);
8097
}
8098
}
8099
8100
if (e)
8101
hl_engine_data_sprintf(e,
8102
"\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8103
"--- ------- ------------ ----------\n");
8104
8105
for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8106
offset = i * NIC_MACRO_QMAN_OFFSET;
8107
port = 2 * i;
8108
if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8109
qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8110
qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8111
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8112
is_idle &= is_eng_idle;
8113
8114
if (mask && !is_eng_idle)
8115
set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8116
if (e)
8117
hl_engine_data_sprintf(e, nic_fmt, port,
8118
is_eng_idle ? "Y" : "N",
8119
qm_glbl_sts0, qm_cgm_sts);
8120
}
8121
8122
port = 2 * i + 1;
8123
if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8124
qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8125
qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8126
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8127
is_idle &= is_eng_idle;
8128
8129
if (mask && !is_eng_idle)
8130
set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8131
if (e)
8132
hl_engine_data_sprintf(e, nic_fmt, port,
8133
is_eng_idle ? "Y" : "N",
8134
qm_glbl_sts0, qm_cgm_sts);
8135
}
8136
}
8137
8138
if (e)
8139
hl_engine_data_sprintf(e, "\n");
8140
8141
return is_idle;
8142
}
8143
8144
static void gaudi_hw_queues_lock(struct hl_device *hdev)
8145
__acquires(&gaudi->hw_queues_lock)
8146
{
8147
struct gaudi_device *gaudi = hdev->asic_specific;
8148
8149
spin_lock(&gaudi->hw_queues_lock);
8150
}
8151
8152
static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8153
__releases(&gaudi->hw_queues_lock)
8154
{
8155
struct gaudi_device *gaudi = hdev->asic_specific;
8156
8157
spin_unlock(&gaudi->hw_queues_lock);
8158
}
8159
8160
static u32 gaudi_get_pci_id(struct hl_device *hdev)
8161
{
8162
return hdev->pdev->device;
8163
}
8164
8165
static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8166
size_t max_size)
8167
{
8168
struct gaudi_device *gaudi = hdev->asic_specific;
8169
8170
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8171
return 0;
8172
8173
return hl_fw_get_eeprom_data(hdev, data, max_size);
8174
}
8175
8176
static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8177
{
8178
struct gaudi_device *gaudi = hdev->asic_specific;
8179
8180
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8181
return 0;
8182
8183
return hl_fw_get_monitor_dump(hdev, data);
8184
}
8185
8186
/*
8187
* this function should be used only during initialization and/or after reset,
8188
* when there are no active users.
8189
*/
8190
static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8191
{
8192
u64 kernel_timeout;
8193
u32 status, offset;
8194
int rc;
8195
8196
offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8197
8198
if (hdev->pldm)
8199
kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8200
else
8201
kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8202
8203
WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8204
lower_32_bits(tpc_kernel));
8205
WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8206
upper_32_bits(tpc_kernel));
8207
8208
WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8209
lower_32_bits(tpc_kernel));
8210
WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8211
upper_32_bits(tpc_kernel));
8212
/* set a valid LUT pointer, content is of no significance */
8213
WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8214
lower_32_bits(tpc_kernel));
8215
WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8216
upper_32_bits(tpc_kernel));
8217
8218
WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8219
lower_32_bits(CFG_BASE +
8220
mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8221
8222
WREG32(mmTPC0_CFG_TPC_CMD + offset,
8223
(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8224
1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8225
/* wait a bit for the engine to start executing */
8226
usleep_range(1000, 1500);
8227
8228
/* wait until engine has finished executing */
8229
rc = hl_poll_timeout(
8230
hdev,
8231
mmTPC0_CFG_STATUS + offset,
8232
status,
8233
(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8234
TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8235
1000,
8236
kernel_timeout);
8237
8238
if (rc) {
8239
dev_err(hdev->dev,
8240
"Timeout while waiting for TPC%d icache prefetch\n",
8241
tpc_id);
8242
return -EIO;
8243
}
8244
8245
WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8246
1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8247
8248
/* wait a bit for the engine to start executing */
8249
usleep_range(1000, 1500);
8250
8251
/* wait until engine has finished executing */
8252
rc = hl_poll_timeout(
8253
hdev,
8254
mmTPC0_CFG_STATUS + offset,
8255
status,
8256
(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8257
TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8258
1000,
8259
kernel_timeout);
8260
8261
if (rc) {
8262
dev_err(hdev->dev,
8263
"Timeout while waiting for TPC%d vector pipe\n",
8264
tpc_id);
8265
return -EIO;
8266
}
8267
8268
rc = hl_poll_timeout(
8269
hdev,
8270
mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8271
status,
8272
(status == 0),
8273
1000,
8274
kernel_timeout);
8275
8276
if (rc) {
8277
dev_err(hdev->dev,
8278
"Timeout while waiting for TPC%d kernel to execute\n",
8279
tpc_id);
8280
return -EIO;
8281
}
8282
8283
return 0;
8284
}
8285
8286
static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8287
struct hl_ctx *ctx)
8288
{
8289
struct gaudi_device *gaudi = hdev->asic_specific;
8290
int min_alloc_order, rc, collective_cb_size;
8291
8292
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8293
return 0;
8294
8295
hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8296
HOST_SPACE_INTERNAL_CB_SZ,
8297
&hdev->internal_cb_pool_dma_addr,
8298
GFP_KERNEL | __GFP_ZERO);
8299
8300
if (!hdev->internal_cb_pool_virt_addr)
8301
return -ENOMEM;
8302
8303
collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8304
sizeof(struct packet_fence);
8305
min_alloc_order = ilog2(collective_cb_size);
8306
8307
hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8308
if (!hdev->internal_cb_pool) {
8309
dev_err(hdev->dev,
8310
"Failed to create internal CB pool\n");
8311
rc = -ENOMEM;
8312
goto free_internal_cb_pool;
8313
}
8314
8315
rc = gen_pool_add(hdev->internal_cb_pool,
8316
(uintptr_t) hdev->internal_cb_pool_virt_addr,
8317
HOST_SPACE_INTERNAL_CB_SZ, -1);
8318
if (rc) {
8319
dev_err(hdev->dev,
8320
"Failed to add memory to internal CB pool\n");
8321
rc = -EFAULT;
8322
goto destroy_internal_cb_pool;
8323
}
8324
8325
hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8326
HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8327
HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8328
8329
if (!hdev->internal_cb_va_base) {
8330
rc = -ENOMEM;
8331
goto destroy_internal_cb_pool;
8332
}
8333
8334
mutex_lock(&hdev->mmu_lock);
8335
8336
rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8337
hdev->internal_cb_pool_dma_addr,
8338
HOST_SPACE_INTERNAL_CB_SZ);
8339
if (rc)
8340
goto unreserve_internal_cb_pool;
8341
8342
rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8343
if (rc)
8344
goto unmap_internal_cb_pool;
8345
8346
mutex_unlock(&hdev->mmu_lock);
8347
8348
return 0;
8349
8350
unmap_internal_cb_pool:
8351
hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8352
HOST_SPACE_INTERNAL_CB_SZ);
8353
unreserve_internal_cb_pool:
8354
mutex_unlock(&hdev->mmu_lock);
8355
hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8356
HOST_SPACE_INTERNAL_CB_SZ);
8357
destroy_internal_cb_pool:
8358
gen_pool_destroy(hdev->internal_cb_pool);
8359
free_internal_cb_pool:
8360
hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8361
hdev->internal_cb_pool_dma_addr);
8362
8363
return rc;
8364
}
8365
8366
static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8367
struct hl_ctx *ctx)
8368
{
8369
struct gaudi_device *gaudi = hdev->asic_specific;
8370
8371
if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8372
return;
8373
8374
mutex_lock(&hdev->mmu_lock);
8375
hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8376
HOST_SPACE_INTERNAL_CB_SZ);
8377
hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8378
HOST_SPACE_INTERNAL_CB_SZ);
8379
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8380
mutex_unlock(&hdev->mmu_lock);
8381
8382
gen_pool_destroy(hdev->internal_cb_pool);
8383
8384
hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8385
hdev->internal_cb_pool_dma_addr);
8386
}
8387
8388
static int gaudi_ctx_init(struct hl_ctx *ctx)
8389
{
8390
int rc;
8391
8392
if (ctx->asid == HL_KERNEL_ASID_ID)
8393
return 0;
8394
8395
rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8396
if (rc)
8397
return rc;
8398
8399
rc = gaudi_restore_user_registers(ctx->hdev);
8400
if (rc)
8401
gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8402
8403
return rc;
8404
}
8405
8406
static void gaudi_ctx_fini(struct hl_ctx *ctx)
8407
{
8408
if (ctx->asid == HL_KERNEL_ASID_ID)
8409
return;
8410
8411
gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8412
}
8413
8414
static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8415
{
8416
return 0;
8417
}
8418
8419
static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8420
{
8421
return gaudi_cq_assignment[cq_idx];
8422
}
8423
8424
static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8425
{
8426
return sizeof(struct packet_msg_short) +
8427
sizeof(struct packet_msg_prot) * 2;
8428
}
8429
8430
static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8431
{
8432
return sizeof(struct packet_msg_short) * 4 +
8433
sizeof(struct packet_fence) +
8434
sizeof(struct packet_msg_prot) * 2;
8435
}
8436
8437
static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8438
{
8439
return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8440
}
8441
8442
static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8443
u32 size, bool eb)
8444
{
8445
struct hl_cb *cb = (struct hl_cb *) data;
8446
struct packet_msg_short *pkt;
8447
u32 value, ctl, pkt_size = sizeof(*pkt);
8448
8449
pkt = cb->kernel_address + size;
8450
memset(pkt, 0, pkt_size);
8451
8452
/* Inc by 1, Mode ADD */
8453
value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8454
value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8455
8456
ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8457
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8458
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8459
ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8460
ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8461
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8462
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8463
8464
pkt->value = cpu_to_le32(value);
8465
pkt->ctl = cpu_to_le32(ctl);
8466
8467
return size + pkt_size;
8468
}
8469
8470
static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8471
u16 addr)
8472
{
8473
u32 ctl, pkt_size = sizeof(*pkt);
8474
8475
memset(pkt, 0, pkt_size);
8476
8477
ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8478
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8479
ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8480
ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8481
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8482
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8483
8484
pkt->value = cpu_to_le32(value);
8485
pkt->ctl = cpu_to_le32(ctl);
8486
8487
return pkt_size;
8488
}
8489
8490
static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8491
struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8492
u16 sob_val, u16 mon_id)
8493
{
8494
u64 monitor_base;
8495
u32 ctl, value, pkt_size = sizeof(*pkt);
8496
u16 msg_addr_offset;
8497
u8 mask;
8498
8499
if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8500
dev_err(hdev->dev,
8501
"sob_base %u (mask %#x) is not valid\n",
8502
sob_base, sob_mask);
8503
return 0;
8504
}
8505
8506
/*
8507
* monitor_base should be the content of the base0 address registers,
8508
* so it will be added to the msg short offsets
8509
*/
8510
monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8511
8512
msg_addr_offset =
8513
(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8514
monitor_base;
8515
8516
memset(pkt, 0, pkt_size);
8517
8518
/* Monitor config packet: bind the monitor to a sync object */
8519
value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8520
value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8521
value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8522
0); /* GREATER OR EQUAL*/
8523
value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8524
8525
ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8526
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8527
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8528
ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8529
ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8530
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8531
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8532
8533
pkt->value = cpu_to_le32(value);
8534
pkt->ctl = cpu_to_le32(ctl);
8535
8536
return pkt_size;
8537
}
8538
8539
static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8540
{
8541
u32 ctl, cfg, pkt_size = sizeof(*pkt);
8542
8543
memset(pkt, 0, pkt_size);
8544
8545
cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8546
cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8547
cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8548
8549
ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8550
ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8551
ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8552
ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8553
8554
pkt->cfg = cpu_to_le32(cfg);
8555
pkt->ctl = cpu_to_le32(ctl);
8556
8557
return pkt_size;
8558
}
8559
8560
static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8561
{
8562
u32 offset, nic_index;
8563
8564
switch (queue_id) {
8565
case GAUDI_QUEUE_ID_DMA_0_0:
8566
offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8567
break;
8568
case GAUDI_QUEUE_ID_DMA_0_1:
8569
offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8570
break;
8571
case GAUDI_QUEUE_ID_DMA_0_2:
8572
offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8573
break;
8574
case GAUDI_QUEUE_ID_DMA_0_3:
8575
offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8576
break;
8577
case GAUDI_QUEUE_ID_DMA_1_0:
8578
offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8579
break;
8580
case GAUDI_QUEUE_ID_DMA_1_1:
8581
offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8582
break;
8583
case GAUDI_QUEUE_ID_DMA_1_2:
8584
offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8585
break;
8586
case GAUDI_QUEUE_ID_DMA_1_3:
8587
offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8588
break;
8589
case GAUDI_QUEUE_ID_DMA_5_0:
8590
offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8591
break;
8592
case GAUDI_QUEUE_ID_DMA_5_1:
8593
offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8594
break;
8595
case GAUDI_QUEUE_ID_DMA_5_2:
8596
offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8597
break;
8598
case GAUDI_QUEUE_ID_DMA_5_3:
8599
offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8600
break;
8601
case GAUDI_QUEUE_ID_TPC_7_0:
8602
offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8603
break;
8604
case GAUDI_QUEUE_ID_TPC_7_1:
8605
offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8606
break;
8607
case GAUDI_QUEUE_ID_TPC_7_2:
8608
offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8609
break;
8610
case GAUDI_QUEUE_ID_TPC_7_3:
8611
offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8612
break;
8613
case GAUDI_QUEUE_ID_NIC_0_0:
8614
case GAUDI_QUEUE_ID_NIC_1_0:
8615
case GAUDI_QUEUE_ID_NIC_2_0:
8616
case GAUDI_QUEUE_ID_NIC_3_0:
8617
case GAUDI_QUEUE_ID_NIC_4_0:
8618
case GAUDI_QUEUE_ID_NIC_5_0:
8619
case GAUDI_QUEUE_ID_NIC_6_0:
8620
case GAUDI_QUEUE_ID_NIC_7_0:
8621
case GAUDI_QUEUE_ID_NIC_8_0:
8622
case GAUDI_QUEUE_ID_NIC_9_0:
8623
nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8624
offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8625
(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8626
(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8627
break;
8628
case GAUDI_QUEUE_ID_NIC_0_1:
8629
case GAUDI_QUEUE_ID_NIC_1_1:
8630
case GAUDI_QUEUE_ID_NIC_2_1:
8631
case GAUDI_QUEUE_ID_NIC_3_1:
8632
case GAUDI_QUEUE_ID_NIC_4_1:
8633
case GAUDI_QUEUE_ID_NIC_5_1:
8634
case GAUDI_QUEUE_ID_NIC_6_1:
8635
case GAUDI_QUEUE_ID_NIC_7_1:
8636
case GAUDI_QUEUE_ID_NIC_8_1:
8637
case GAUDI_QUEUE_ID_NIC_9_1:
8638
nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8639
offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8640
(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8641
(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8642
break;
8643
case GAUDI_QUEUE_ID_NIC_0_2:
8644
case GAUDI_QUEUE_ID_NIC_1_2:
8645
case GAUDI_QUEUE_ID_NIC_2_2:
8646
case GAUDI_QUEUE_ID_NIC_3_2:
8647
case GAUDI_QUEUE_ID_NIC_4_2:
8648
case GAUDI_QUEUE_ID_NIC_5_2:
8649
case GAUDI_QUEUE_ID_NIC_6_2:
8650
case GAUDI_QUEUE_ID_NIC_7_2:
8651
case GAUDI_QUEUE_ID_NIC_8_2:
8652
case GAUDI_QUEUE_ID_NIC_9_2:
8653
nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8654
offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8655
(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8656
(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8657
break;
8658
case GAUDI_QUEUE_ID_NIC_0_3:
8659
case GAUDI_QUEUE_ID_NIC_1_3:
8660
case GAUDI_QUEUE_ID_NIC_2_3:
8661
case GAUDI_QUEUE_ID_NIC_3_3:
8662
case GAUDI_QUEUE_ID_NIC_4_3:
8663
case GAUDI_QUEUE_ID_NIC_5_3:
8664
case GAUDI_QUEUE_ID_NIC_6_3:
8665
case GAUDI_QUEUE_ID_NIC_7_3:
8666
case GAUDI_QUEUE_ID_NIC_8_3:
8667
case GAUDI_QUEUE_ID_NIC_9_3:
8668
nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8669
offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8670
(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8671
(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8672
break;
8673
default:
8674
return -EINVAL;
8675
}
8676
8677
*addr = CFG_BASE + offset;
8678
8679
return 0;
8680
}
8681
8682
static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8683
{
8684
u64 monitor_base;
8685
u32 size = 0;
8686
u16 msg_addr_offset;
8687
8688
/*
8689
* monitor_base should be the content of the base0 address registers,
8690
* so it will be added to the msg short offsets
8691
*/
8692
monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8693
8694
/* First monitor config packet: low address of the sync */
8695
msg_addr_offset =
8696
(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8697
monitor_base;
8698
8699
size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8700
msg_addr_offset);
8701
8702
/* Second monitor config packet: high address of the sync */
8703
msg_addr_offset =
8704
(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8705
monitor_base;
8706
8707
size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8708
msg_addr_offset);
8709
8710
/*
8711
* Third monitor config packet: the payload, i.e. what to write when the
8712
* sync triggers
8713
*/
8714
msg_addr_offset =
8715
(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8716
monitor_base;
8717
8718
size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8719
8720
return size;
8721
}
8722
8723
static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8724
struct hl_gen_wait_properties *prop)
8725
{
8726
struct hl_cb *cb = (struct hl_cb *) prop->data;
8727
void *buf = cb->kernel_address;
8728
u64 fence_addr = 0;
8729
u32 size = prop->size;
8730
8731
if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8732
dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8733
prop->q_idx);
8734
return 0;
8735
}
8736
8737
size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8738
size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8739
prop->sob_mask, prop->sob_val, prop->mon_id);
8740
size += gaudi_add_fence_pkt(buf + size);
8741
8742
return size;
8743
}
8744
8745
static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8746
{
8747
struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8748
8749
dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8750
hw_sob->sob_id);
8751
8752
WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8753
hw_sob->sob_id * 4, 0);
8754
8755
kref_init(&hw_sob->kref);
8756
}
8757
8758
static u64 gaudi_get_device_time(struct hl_device *hdev)
8759
{
8760
u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8761
8762
return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8763
}
8764
8765
static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8766
u32 *block_size, u32 *block_id)
8767
{
8768
return -EPERM;
8769
}
8770
8771
static int gaudi_block_mmap(struct hl_device *hdev,
8772
struct vm_area_struct *vma,
8773
u32 block_id, u32 block_size)
8774
{
8775
return -EPERM;
8776
}
8777
8778
static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8779
{
8780
struct cpu_dyn_regs *dyn_regs =
8781
&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8782
u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8783
mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8784
le32_to_cpu(dyn_regs->gic_host_ints_irq);
8785
8786
WREG32(irq_handler_offset,
8787
gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8788
}
8789
8790
static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8791
{
8792
return -EINVAL;
8793
}
8794
8795
static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8796
{
8797
switch (pll_idx) {
8798
case HL_GAUDI_CPU_PLL: return CPU_PLL;
8799
case HL_GAUDI_PCI_PLL: return PCI_PLL;
8800
case HL_GAUDI_NIC_PLL: return NIC_PLL;
8801
case HL_GAUDI_DMA_PLL: return DMA_PLL;
8802
case HL_GAUDI_MESH_PLL: return MESH_PLL;
8803
case HL_GAUDI_MME_PLL: return MME_PLL;
8804
case HL_GAUDI_TPC_PLL: return TPC_PLL;
8805
case HL_GAUDI_IF_PLL: return IF_PLL;
8806
case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8807
case HL_GAUDI_HBM_PLL: return HBM_PLL;
8808
default: return -EINVAL;
8809
}
8810
}
8811
8812
static int gaudi_add_sync_to_engine_map_entry(
8813
struct hl_sync_to_engine_map *map, u32 reg_value,
8814
enum hl_sync_engine_type engine_type, u32 engine_id)
8815
{
8816
struct hl_sync_to_engine_map_entry *entry;
8817
8818
/* Reg value represents a partial address of sync object,
8819
* it is used as unique identifier. For this we need to
8820
* clear the cutoff cfg base bits from the value.
8821
*/
8822
if (reg_value == 0 || reg_value == 0xffffffff)
8823
return 0;
8824
reg_value -= lower_32_bits(CFG_BASE);
8825
8826
/* create a new hash entry */
8827
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8828
if (!entry)
8829
return -ENOMEM;
8830
entry->engine_type = engine_type;
8831
entry->engine_id = engine_id;
8832
entry->sync_id = reg_value;
8833
hash_add(map->tb, &entry->node, reg_value);
8834
8835
return 0;
8836
}
8837
8838
static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8839
struct hl_sync_to_engine_map *map)
8840
{
8841
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8842
int i, j, rc;
8843
u32 reg_value;
8844
8845
/* Iterate over TPC engines */
8846
for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8847
8848
reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8849
sds->props[SP_NEXT_TPC] * i);
8850
8851
rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8852
ENGINE_TPC, i);
8853
if (rc)
8854
goto free_sync_to_engine_map;
8855
}
8856
8857
/* Iterate over MME engines */
8858
for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8859
for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8860
8861
reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8862
sds->props[SP_NEXT_MME] * i +
8863
j * sizeof(u32));
8864
8865
rc = gaudi_add_sync_to_engine_map_entry(
8866
map, reg_value, ENGINE_MME,
8867
i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8868
if (rc)
8869
goto free_sync_to_engine_map;
8870
}
8871
}
8872
8873
/* Iterate over DMA engines */
8874
for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8875
reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8876
sds->props[SP_DMA_QUEUES_OFFSET] * i);
8877
rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8878
ENGINE_DMA, i);
8879
if (rc)
8880
goto free_sync_to_engine_map;
8881
}
8882
8883
return 0;
8884
8885
free_sync_to_engine_map:
8886
hl_state_dump_free_sync_to_engine_map(map);
8887
8888
return rc;
8889
}
8890
8891
static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8892
{
8893
return FIELD_GET(
8894
SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8895
mon->status);
8896
}
8897
8898
static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8899
{
8900
const size_t max_write = 10;
8901
u32 gid, mask, sob;
8902
int i, offset;
8903
8904
/* Sync object ID is calculated as follows:
8905
* (8 * group_id + cleared bits in mask)
8906
*/
8907
gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8908
mon->arm_data);
8909
mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8910
mon->arm_data);
8911
8912
for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8913
max_write; mask >>= 1, i++) {
8914
if (!(mask & 1)) {
8915
sob = gid * MONITOR_MAX_SOBS + i;
8916
8917
if (offset > 0)
8918
offset += snprintf(sobs + offset, max_write,
8919
", ");
8920
8921
offset += snprintf(sobs + offset, max_write, "%u", sob);
8922
}
8923
}
8924
}
8925
8926
static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8927
struct hl_device *hdev,
8928
struct hl_mon_state_dump *mon)
8929
{
8930
const char *name;
8931
char scratch_buf1[BIN_REG_STRING_SIZE],
8932
scratch_buf2[BIN_REG_STRING_SIZE];
8933
char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8934
8935
name = hl_state_dump_get_monitor_name(hdev, mon);
8936
if (!name)
8937
name = "";
8938
8939
gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8940
8941
return hl_snprintf_resize(
8942
buf, size, offset,
8943
"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8944
mon->id, name,
8945
FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8946
mon->arm_data),
8947
hl_format_as_binary(
8948
scratch_buf1, sizeof(scratch_buf1),
8949
FIELD_GET(
8950
SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8951
mon->arm_data)),
8952
FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8953
mon->arm_data),
8954
mon->wr_data,
8955
(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8956
hl_format_as_binary(
8957
scratch_buf2, sizeof(scratch_buf2),
8958
FIELD_GET(
8959
SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8960
mon->status)),
8961
monitored_sobs);
8962
}
8963
8964
8965
static int gaudi_print_fences_single_engine(
8966
struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8967
enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8968
size_t *size, size_t *offset)
8969
{
8970
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8971
int rc = -ENOMEM, i;
8972
u32 *statuses, *fences;
8973
8974
statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8975
sizeof(*statuses), GFP_KERNEL);
8976
if (!statuses)
8977
goto out;
8978
8979
fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8980
sds->props[SP_ENGINE_NUM_OF_QUEUES],
8981
sizeof(*fences), GFP_KERNEL);
8982
if (!fences)
8983
goto free_status;
8984
8985
for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
8986
statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
8987
8988
for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
8989
sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
8990
fences[i] = RREG32(base_offset + i * sizeof(u32));
8991
8992
/* The actual print */
8993
for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
8994
u32 fence_id;
8995
u64 fence_cnt, fence_rdata;
8996
const char *engine_name;
8997
8998
if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
8999
statuses[i]))
9000
continue;
9001
9002
fence_id =
9003
FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9004
fence_cnt = base_offset + CFG_BASE +
9005
sizeof(u32) *
9006
(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9007
fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9008
sds->props[SP_FENCE0_RDATA_OFFSET];
9009
engine_name = hl_sync_engine_to_string(engine_type);
9010
9011
rc = hl_snprintf_resize(
9012
buf, size, offset,
9013
"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9014
engine_name, engine_id,
9015
i, fence_id,
9016
fence_cnt, engine_name, engine_id, fence_id, i,
9017
fence_rdata, engine_name, engine_id, fence_id, i,
9018
fences[fence_id],
9019
statuses[i]);
9020
if (rc)
9021
goto free_fences;
9022
}
9023
9024
rc = 0;
9025
9026
free_fences:
9027
kfree(fences);
9028
free_status:
9029
kfree(statuses);
9030
out:
9031
return rc;
9032
}
9033
9034
9035
static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9036
.monitor_valid = gaudi_monitor_valid,
9037
.print_single_monitor = gaudi_print_single_monitor,
9038
.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9039
.print_fences_single_engine = gaudi_print_fences_single_engine,
9040
};
9041
9042
static void gaudi_state_dump_init(struct hl_device *hdev)
9043
{
9044
struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9045
int i;
9046
9047
for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9048
hash_add(sds->so_id_to_str_tb,
9049
&gaudi_so_id_to_str[i].node,
9050
gaudi_so_id_to_str[i].id);
9051
9052
for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9053
hash_add(sds->monitor_id_to_str_tb,
9054
&gaudi_monitor_id_to_str[i].node,
9055
gaudi_monitor_id_to_str[i].id);
9056
9057
sds->props = gaudi_state_dump_specs_props;
9058
9059
sds->sync_namager_names = gaudi_sync_manager_names;
9060
9061
sds->funcs = gaudi_state_dump_funcs;
9062
}
9063
9064
static u32 *gaudi_get_stream_master_qid_arr(void)
9065
{
9066
return gaudi_stream_master;
9067
}
9068
9069
static int gaudi_set_dram_properties(struct hl_device *hdev)
9070
{
9071
return 0;
9072
}
9073
9074
static int gaudi_set_binning_masks(struct hl_device *hdev)
9075
{
9076
return 0;
9077
}
9078
9079
static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9080
{
9081
}
9082
9083
static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9084
{
9085
struct hl_device *hdev = dev_get_drvdata(dev);
9086
struct cpucp_info *cpucp_info;
9087
9088
cpucp_info = &hdev->asic_prop.cpucp_info;
9089
9090
return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9091
}
9092
9093
static DEVICE_ATTR_RO(infineon_ver);
9094
9095
static struct attribute *gaudi_vrm_dev_attrs[] = {
9096
&dev_attr_infineon_ver.attr,
9097
NULL,
9098
};
9099
9100
static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9101
struct attribute_group *dev_vrm_attr_grp)
9102
{
9103
hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9104
dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9105
}
9106
9107
static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9108
{
9109
return 0;
9110
}
9111
9112
static const struct hl_asic_funcs gaudi_funcs = {
9113
.early_init = gaudi_early_init,
9114
.early_fini = gaudi_early_fini,
9115
.late_init = gaudi_late_init,
9116
.late_fini = gaudi_late_fini,
9117
.sw_init = gaudi_sw_init,
9118
.sw_fini = gaudi_sw_fini,
9119
.hw_init = gaudi_hw_init,
9120
.hw_fini = gaudi_hw_fini,
9121
.halt_engines = gaudi_halt_engines,
9122
.suspend = gaudi_suspend,
9123
.resume = gaudi_resume,
9124
.mmap = gaudi_mmap,
9125
.ring_doorbell = gaudi_ring_doorbell,
9126
.pqe_write = gaudi_pqe_write,
9127
.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9128
.asic_dma_free_coherent = gaudi_dma_free_coherent,
9129
.scrub_device_mem = gaudi_scrub_device_mem,
9130
.scrub_device_dram = gaudi_scrub_device_dram,
9131
.get_int_queue_base = gaudi_get_int_queue_base,
9132
.test_queues = gaudi_test_queues,
9133
.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9134
.asic_dma_pool_free = gaudi_dma_pool_free,
9135
.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9136
.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9137
.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9138
.cs_parser = gaudi_cs_parser,
9139
.dma_map_sgtable = hl_asic_dma_map_sgtable,
9140
.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9141
.update_eq_ci = gaudi_update_eq_ci,
9142
.context_switch = gaudi_context_switch,
9143
.restore_phase_topology = gaudi_restore_phase_topology,
9144
.debugfs_read_dma = gaudi_debugfs_read_dma,
9145
.add_device_attr = gaudi_add_device_attr,
9146
.handle_eqe = gaudi_handle_eqe,
9147
.get_events_stat = gaudi_get_events_stat,
9148
.read_pte = gaudi_read_pte,
9149
.write_pte = gaudi_write_pte,
9150
.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9151
.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9152
.mmu_prefetch_cache_range = NULL,
9153
.send_heartbeat = gaudi_send_heartbeat,
9154
.debug_coresight = gaudi_debug_coresight,
9155
.is_device_idle = gaudi_is_device_idle,
9156
.compute_reset_late_init = gaudi_compute_reset_late_init,
9157
.hw_queues_lock = gaudi_hw_queues_lock,
9158
.hw_queues_unlock = gaudi_hw_queues_unlock,
9159
.get_pci_id = gaudi_get_pci_id,
9160
.get_eeprom_data = gaudi_get_eeprom_data,
9161
.get_monitor_dump = gaudi_get_monitor_dump,
9162
.send_cpu_message = gaudi_send_cpu_message,
9163
.pci_bars_map = gaudi_pci_bars_map,
9164
.init_iatu = gaudi_init_iatu,
9165
.rreg = hl_rreg,
9166
.wreg = hl_wreg,
9167
.halt_coresight = gaudi_halt_coresight,
9168
.ctx_init = gaudi_ctx_init,
9169
.ctx_fini = gaudi_ctx_fini,
9170
.pre_schedule_cs = gaudi_pre_schedule_cs,
9171
.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9172
.load_firmware_to_device = gaudi_load_firmware_to_device,
9173
.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9174
.get_signal_cb_size = gaudi_get_signal_cb_size,
9175
.get_wait_cb_size = gaudi_get_wait_cb_size,
9176
.gen_signal_cb = gaudi_gen_signal_cb,
9177
.gen_wait_cb = gaudi_gen_wait_cb,
9178
.reset_sob = gaudi_reset_sob,
9179
.reset_sob_group = gaudi_reset_sob_group,
9180
.get_device_time = gaudi_get_device_time,
9181
.pb_print_security_errors = NULL,
9182
.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9183
.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9184
.get_dec_base_addr = NULL,
9185
.scramble_addr = hl_mmu_scramble_addr,
9186
.descramble_addr = hl_mmu_descramble_addr,
9187
.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9188
.get_hw_block_id = gaudi_get_hw_block_id,
9189
.hw_block_mmap = gaudi_block_mmap,
9190
.enable_events_from_fw = gaudi_enable_events_from_fw,
9191
.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9192
.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9193
.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9194
.init_firmware_loader = gaudi_init_firmware_loader,
9195
.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9196
.state_dump_init = gaudi_state_dump_init,
9197
.get_sob_addr = gaudi_get_sob_addr,
9198
.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9199
.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9200
.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9201
.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9202
.access_dev_mem = hl_access_dev_mem,
9203
.set_dram_bar_base = gaudi_set_hbm_bar_base,
9204
.send_device_activity = gaudi_send_device_activity,
9205
.set_dram_properties = gaudi_set_dram_properties,
9206
.set_binning_masks = gaudi_set_binning_masks,
9207
};
9208
9209
/**
9210
* gaudi_set_asic_funcs - set GAUDI function pointers
9211
*
9212
* @hdev: pointer to hl_device structure
9213
*
9214
*/
9215
void gaudi_set_asic_funcs(struct hl_device *hdev)
9216
{
9217
hdev->asic_funcs = &gaudi_funcs;
9218
}
9219
9220