Path: blob/master/drivers/accel/habanalabs/gaudi/gaudi.c
26439 views
// SPDX-License-Identifier: GPL-2.012/*3* Copyright 2016-2022 HabanaLabs, Ltd.4* All Rights Reserved.5*/67#include "gaudiP.h"8#include "../include/hw_ip/mmu/mmu_general.h"9#include "../include/hw_ip/mmu/mmu_v1_1.h"10#include "../include/gaudi/gaudi_masks.h"11#include "../include/gaudi/gaudi_fw_if.h"12#include "../include/gaudi/gaudi_reg_map.h"13#include "../include/gaudi/gaudi_async_ids_map_extended.h"1415#include <linux/module.h>16#include <linux/pci.h>17#include <linux/firmware.h>18#include <linux/hwmon.h>19#include <linux/iommu.h>20#include <linux/seq_file.h>2122/*23* Gaudi security scheme:24*25* 1. Host is protected by:26* - Range registers27* - MMU28*29* 2. DDR is protected by:30* - Range registers (protect the first 512MB)31*32* 3. Configuration is protected by:33* - Range registers34* - Protection bits35*36* MMU is always enabled.37*38* QMAN DMA channels 0,1 (PCI DMAN):39* - DMA is not secured.40* - PQ and CQ are secured.41* - CP is secured: The driver needs to parse CB but WREG should be allowed42* because of TDMA (tensor DMA). Hence, WREG is always not43* secured.44*45* When the driver needs to use DMA it will check that Gaudi is idle, set DMA46* channel 0 to be secured, execute the DMA and change it back to not secured.47* Currently, the driver doesn't use the DMA while there are compute jobs48* running.49*50* The current use cases for the driver to use the DMA are:51* - Clear SRAM on context switch (happens on context switch when device is52* idle)53* - MMU page tables area clear (happens on init)54*55* QMAN DMA 2-7, TPC, MME, NIC:56* PQ is secured and is located on the Host (HBM CON TPC3 bug)57* CQ, CP and the engine are not secured58*59*/6061#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"62#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"63#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"6465MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);66MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);67MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);6869#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */7071#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */72#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */73#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */74#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */7576#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */77#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */78#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */79#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)80#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)81#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)82#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */83#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */84#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */8586#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB98788#define GAUDI_MAX_STRING_LEN 208990#define GAUDI_CB_POOL_CB_CNT 51291#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */9293#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 39495#define GAUDI_NUM_OF_TPC_INTR_CAUSE 209697#define GAUDI_NUM_OF_QM_ERR_CAUSE 169899#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3100101#define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */102103#define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */104105#define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")106107#define MONITOR_SOB_STRING_SIZE 256108109static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {110GAUDI_QUEUE_ID_DMA_0_0,111GAUDI_QUEUE_ID_DMA_0_1,112GAUDI_QUEUE_ID_DMA_0_2,113GAUDI_QUEUE_ID_DMA_0_3,114GAUDI_QUEUE_ID_DMA_1_0,115GAUDI_QUEUE_ID_DMA_1_1,116GAUDI_QUEUE_ID_DMA_1_2,117GAUDI_QUEUE_ID_DMA_1_3118};119120static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {121[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,122[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,123[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,124[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,125[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,126[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,127[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,128[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7129};130131static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {132[0] = GAUDI_QUEUE_ID_DMA_0_0,133[1] = GAUDI_QUEUE_ID_DMA_0_1,134[2] = GAUDI_QUEUE_ID_DMA_0_2,135[3] = GAUDI_QUEUE_ID_DMA_0_3,136[4] = GAUDI_QUEUE_ID_DMA_1_0,137[5] = GAUDI_QUEUE_ID_DMA_1_1,138[6] = GAUDI_QUEUE_ID_DMA_1_2,139[7] = GAUDI_QUEUE_ID_DMA_1_3,140};141142static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {143[PACKET_WREG_32] = sizeof(struct packet_wreg32),144[PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),145[PACKET_MSG_LONG] = sizeof(struct packet_msg_long),146[PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),147[PACKET_CP_DMA] = sizeof(struct packet_cp_dma),148[PACKET_REPEAT] = sizeof(struct packet_repeat),149[PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),150[PACKET_FENCE] = sizeof(struct packet_fence),151[PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),152[PACKET_NOP] = sizeof(struct packet_nop),153[PACKET_STOP] = sizeof(struct packet_stop),154[PACKET_ARB_POINT] = sizeof(struct packet_arb_point),155[PACKET_WAIT] = sizeof(struct packet_wait),156[PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)157};158159static inline bool validate_packet_id(enum packet_id id)160{161switch (id) {162case PACKET_WREG_32:163case PACKET_WREG_BULK:164case PACKET_MSG_LONG:165case PACKET_MSG_SHORT:166case PACKET_CP_DMA:167case PACKET_REPEAT:168case PACKET_MSG_PROT:169case PACKET_FENCE:170case PACKET_LIN_DMA:171case PACKET_NOP:172case PACKET_STOP:173case PACKET_ARB_POINT:174case PACKET_WAIT:175case PACKET_LOAD_AND_EXE:176return true;177default:178return false;179}180}181182static const char * const183gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {184"tpc_address_exceed_slm",185"tpc_div_by_0",186"tpc_spu_mac_overflow",187"tpc_spu_addsub_overflow",188"tpc_spu_abs_overflow",189"tpc_spu_fp_dst_nan_inf",190"tpc_spu_fp_dst_denorm",191"tpc_vpu_mac_overflow",192"tpc_vpu_addsub_overflow",193"tpc_vpu_abs_overflow",194"tpc_vpu_fp_dst_nan_inf",195"tpc_vpu_fp_dst_denorm",196"tpc_assertions",197"tpc_illegal_instruction",198"tpc_pc_wrap_around",199"tpc_qm_sw_err",200"tpc_hbw_rresp_err",201"tpc_hbw_bresp_err",202"tpc_lbw_rresp_err",203"tpc_lbw_bresp_err"204};205206static const char * const207gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {208"PQ AXI HBW error",209"CQ AXI HBW error",210"CP AXI HBW error",211"CP error due to undefined OPCODE",212"CP encountered STOP OPCODE",213"CP AXI LBW error",214"CP WRREG32 or WRBULK returned error",215"N/A",216"FENCE 0 inc over max value and clipped",217"FENCE 1 inc over max value and clipped",218"FENCE 2 inc over max value and clipped",219"FENCE 3 inc over max value and clipped",220"FENCE 0 dec under min value and clipped",221"FENCE 1 dec under min value and clipped",222"FENCE 2 dec under min value and clipped",223"FENCE 3 dec under min value and clipped"224};225226static const char * const227gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {228"Choice push while full error",229"Choice Q watchdog error",230"MSG AXI LBW returned with error"231};232233static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {234QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */235QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */236QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */237QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */238QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */239QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */240QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */241QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */242QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */243QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */244QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */245QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */246QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */247QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */248QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */249QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */250QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */251QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */252QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */253QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */254QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */255QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */256QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */257QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */258QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */259QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */260QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */261QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */262QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */263QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */264QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */265QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */266QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */267QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */268QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */269QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */270QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */271QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */272QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */273QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */274QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */275QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */276QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */277QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */278QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */279QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */280QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */281QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */282QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */283QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */284QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */285QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */286QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */287QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */288QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */289QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */290QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */291QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */292QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */293QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */294QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */295QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */296QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */297QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */298QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */299QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */300QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */301QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */302QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */303QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */304QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */305QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */306QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */307QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */308QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */309QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */310QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */311QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */312QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */313QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */314QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */315QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */316QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */317QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */318QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */319QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */320QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */321QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */322QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */323QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */324QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */325QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */326QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */327QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */328QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */329QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */330QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */331QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */332QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */333QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */334QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */335QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */336QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */337QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */338QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */339QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */340QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */341QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */342QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */343QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */344QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */345QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */346QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */347};348349static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {350{ .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },351{ .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },352{ .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },353{ .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },354{ .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },355{ .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },356{ .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },357{ .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },358{ .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },359{ .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },360{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },361{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },362{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },363{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },364{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },365{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },366{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },367{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },368{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },369{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },370{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },371{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },372{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },373{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },374{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },375{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },376{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },377};378379static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {380{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },381{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },382{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },383{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },384{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },385{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },386{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },387{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },388{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },389{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },390{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },391};392393static s64 gaudi_state_dump_specs_props[] = {394[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,395[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,396[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,397[SP_MON_OBJ_WR_ADDR_LOW] =398mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,399[SP_MON_OBJ_WR_ADDR_HIGH] =400mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,401[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,402[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,403[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,404[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,405[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,406[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,407[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,408[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,409[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,410[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,411[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,412[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,413[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,414[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,415[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,416[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,417[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,418[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,419[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,420[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,421[SP_FENCE0_CNT_OFFSET] =422mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,423[SP_FENCE0_RDATA_OFFSET] =424mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,425[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,426[SP_NUM_CORES] = 1,427};428429static const int gaudi_queue_id_to_engine_id[] = {430[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,431[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,432[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,433[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,434[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,435[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,436[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,437[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,438[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,439[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,440[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,441[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,442[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,443[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,444[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,445[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,446[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,447[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,448[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,449[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,450[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,451[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,452[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,453[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,454[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,455[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,456[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,457[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,458[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,459};460461/* The order here is opposite to the order of the indexing in the h/w.462* i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.463*/464static const char * const gaudi_sync_manager_names[] = {465"SYNC_MGR_E_N",466"SYNC_MGR_W_N",467"SYNC_MGR_E_S",468"SYNC_MGR_W_S",469NULL470};471472struct ecc_info_extract_params {473u64 block_address;474u32 num_memories;475bool derr;476};477478static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,479u64 phys_addr);480static int gaudi_send_job_on_qman0(struct hl_device *hdev,481struct hl_cs_job *job);482static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,483u32 size, u64 val);484static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,485u32 num_regs, u32 val);486static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,487u32 tpc_id);488static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);489static int gaudi_cpucp_info_get(struct hl_device *hdev);490static void gaudi_disable_clock_gating(struct hl_device *hdev);491static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);492static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,493u32 size, bool eb);494static u32 gaudi_gen_wait_cb(struct hl_device *hdev,495struct hl_gen_wait_properties *prop);496static inline enum hl_collective_mode497get_collective_mode(struct hl_device *hdev, u32 queue_id)498{499if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)500return HL_COLLECTIVE_MASTER;501502if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&503queue_id <= GAUDI_QUEUE_ID_DMA_5_3)504return HL_COLLECTIVE_SLAVE;505506if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&507queue_id <= GAUDI_QUEUE_ID_TPC_7_3)508return HL_COLLECTIVE_SLAVE;509510if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&511queue_id <= GAUDI_QUEUE_ID_NIC_9_3)512return HL_COLLECTIVE_SLAVE;513514return HL_COLLECTIVE_NOT_SUPPORTED;515}516517static inline void set_default_power_values(struct hl_device *hdev)518{519struct asic_fixed_properties *prop = &hdev->asic_prop;520521if (hdev->card_type == cpucp_card_type_pmc) {522prop->max_power_default = MAX_POWER_DEFAULT_PMC;523524if (prop->fw_security_enabled)525prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;526else527prop->dc_power_default = DC_POWER_DEFAULT_PMC;528} else {529prop->max_power_default = MAX_POWER_DEFAULT_PCI;530prop->dc_power_default = DC_POWER_DEFAULT_PCI;531}532}533534static int gaudi_set_fixed_properties(struct hl_device *hdev)535{536struct asic_fixed_properties *prop = &hdev->asic_prop;537u32 num_sync_stream_queues = 0;538int i;539540prop->max_queues = GAUDI_QUEUE_ID_SIZE;541prop->hw_queues_props = kcalloc(prop->max_queues,542sizeof(struct hw_queue_properties),543GFP_KERNEL);544545if (!prop->hw_queues_props)546return -ENOMEM;547548for (i = 0 ; i < prop->max_queues ; i++) {549if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {550prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;551prop->hw_queues_props[i].driver_only = 0;552prop->hw_queues_props[i].supports_sync_stream = 1;553prop->hw_queues_props[i].cb_alloc_flags =554CB_ALLOC_KERNEL;555num_sync_stream_queues++;556} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {557prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;558prop->hw_queues_props[i].driver_only = 1;559prop->hw_queues_props[i].supports_sync_stream = 0;560prop->hw_queues_props[i].cb_alloc_flags =561CB_ALLOC_KERNEL;562} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {563prop->hw_queues_props[i].type = QUEUE_TYPE_INT;564prop->hw_queues_props[i].driver_only = 0;565prop->hw_queues_props[i].supports_sync_stream = 0;566prop->hw_queues_props[i].cb_alloc_flags =567CB_ALLOC_USER;568569}570prop->hw_queues_props[i].collective_mode =571get_collective_mode(hdev, i);572}573574prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;575prop->cfg_base_address = CFG_BASE;576prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;577prop->host_base_address = HOST_PHYS_BASE;578prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;579prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;580prop->completion_mode = HL_COMPLETION_MODE_JOB;581prop->collective_first_sob = 0;582prop->collective_first_mon = 0;583584/* 2 SOBs per internal queue stream are reserved for collective */585prop->sync_stream_first_sob =586ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)587* QMAN_STREAMS * HL_RSVD_SOBS;588589/* 1 monitor per internal queue stream are reserved for collective590* 2 monitors per external queue stream are reserved for collective591*/592prop->sync_stream_first_mon =593(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +594(NUMBER_OF_EXT_HW_QUEUES * 2);595596prop->dram_base_address = DRAM_PHYS_BASE;597prop->dram_size = GAUDI_HBM_SIZE_32GB;598prop->dram_end_address = prop->dram_base_address + prop->dram_size;599prop->dram_user_base_address = DRAM_BASE_ADDR_USER;600601prop->sram_base_address = SRAM_BASE_ADDR;602prop->sram_size = SRAM_SIZE;603prop->sram_end_address = prop->sram_base_address + prop->sram_size;604prop->sram_user_base_address =605prop->sram_base_address + SRAM_USER_BASE_OFFSET;606607prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;608prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;609610prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;611if (hdev->pldm)612prop->mmu_pgt_size = 0x800000; /* 8MB */613else614prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;615prop->mmu_pte_size = HL_PTE_SIZE;616prop->dram_page_size = PAGE_SIZE_2MB;617prop->device_mem_alloc_default_page_size = prop->dram_page_size;618prop->dram_supports_virtual_memory = false;619620prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;621prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;622prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;623prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;624prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;625prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;626prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;627prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;628prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;629prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;630prop->pmmu.start_addr = VA_HOST_SPACE_START;631prop->pmmu.end_addr =632(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;633prop->pmmu.page_size = PAGE_SIZE_4KB;634prop->pmmu.num_hops = MMU_ARCH_5_HOPS;635prop->pmmu.last_mask = LAST_MASK;636/* TODO: will be duplicated until implementing per-MMU props */637prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;638prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;639640/* PMMU and HPMMU are the same except of page size */641memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));642prop->pmmu_huge.page_size = PAGE_SIZE_2MB;643644/* shifts and masks are the same in PMMU and DMMU */645memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));646prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);647prop->dmmu.end_addr = VA_HOST_SPACE_END;648prop->dmmu.page_size = PAGE_SIZE_2MB;649prop->dmmu.pgt_size = prop->mmu_pgt_size;650651prop->cfg_size = CFG_SIZE;652prop->max_asid = MAX_ASID;653prop->num_of_events = GAUDI_EVENT_SIZE;654prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;655prop->tpc_enabled_mask = TPC_ENABLED_MASK;656657set_default_power_values(hdev);658659prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;660prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;661662prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;663prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;664665strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,666CARD_NAME_MAX_LEN);667668prop->max_pending_cs = GAUDI_MAX_PENDING_CS;669670prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =671prop->sync_stream_first_sob +672(num_sync_stream_queues * HL_RSVD_SOBS);673prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =674prop->sync_stream_first_mon +675(num_sync_stream_queues * HL_RSVD_MONS);676677prop->first_available_user_interrupt = USHRT_MAX;678prop->tpc_interrupt_id = USHRT_MAX;679680/* single msi */681prop->eq_interrupt_id = 0;682683for (i = 0 ; i < HL_MAX_DCORES ; i++)684prop->first_available_cq[i] = USHRT_MAX;685686prop->fw_cpu_boot_dev_sts0_valid = false;687prop->fw_cpu_boot_dev_sts1_valid = false;688prop->hard_reset_done_by_fw = false;689prop->gic_interrupts_enable = true;690691prop->server_type = HL_SERVER_TYPE_UNKNOWN;692693prop->clk_pll_index = HL_GAUDI_MME_PLL;694prop->max_freq_value = GAUDI_MAX_CLK_FREQ;695696prop->use_get_power_for_reset_history = true;697698prop->configurable_stop_on_err = true;699700prop->set_max_power_on_device_init = true;701702prop->dma_mask = 48;703704prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;705706return 0;707}708709static int gaudi_pci_bars_map(struct hl_device *hdev)710{711static const char * const name[] = {"SRAM", "CFG", "HBM"};712bool is_wc[3] = {false, false, true};713int rc;714715rc = hl_pci_bars_map(hdev, name, is_wc);716if (rc)717return rc;718719hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +720(CFG_BASE - SPI_FLASH_BASE_ADDR);721722return 0;723}724725static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)726{727struct gaudi_device *gaudi = hdev->asic_specific;728struct hl_inbound_pci_region pci_region;729u64 old_addr = addr;730int rc;731732if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))733return old_addr;734735if (hdev->asic_prop.iatu_done_by_fw)736return U64_MAX;737738/* Inbound Region 2 - Bar 4 - Point to HBM */739pci_region.mode = PCI_BAR_MATCH_MODE;740pci_region.bar = HBM_BAR_ID;741pci_region.addr = addr;742rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);743if (rc)744return U64_MAX;745746if (gaudi) {747old_addr = gaudi->hbm_bar_cur_addr;748gaudi->hbm_bar_cur_addr = addr;749}750751return old_addr;752}753754static int gaudi_init_iatu(struct hl_device *hdev)755{756struct hl_inbound_pci_region inbound_region;757struct hl_outbound_pci_region outbound_region;758int rc;759760if (hdev->asic_prop.iatu_done_by_fw)761return 0;762763/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */764inbound_region.mode = PCI_BAR_MATCH_MODE;765inbound_region.bar = SRAM_BAR_ID;766inbound_region.addr = SRAM_BASE_ADDR;767rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);768if (rc)769goto done;770771/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */772inbound_region.mode = PCI_BAR_MATCH_MODE;773inbound_region.bar = CFG_BAR_ID;774inbound_region.addr = SPI_FLASH_BASE_ADDR;775rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);776if (rc)777goto done;778779/* Inbound Region 2 - Bar 4 - Point to HBM */780inbound_region.mode = PCI_BAR_MATCH_MODE;781inbound_region.bar = HBM_BAR_ID;782inbound_region.addr = DRAM_PHYS_BASE;783rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);784if (rc)785goto done;786787/* Outbound Region 0 - Point to Host */788outbound_region.addr = HOST_PHYS_BASE;789outbound_region.size = HOST_PHYS_SIZE;790rc = hl_pci_set_outbound_region(hdev, &outbound_region);791792done:793return rc;794}795796static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)797{798return RREG32(mmHW_STATE);799}800801static int gaudi_early_init(struct hl_device *hdev)802{803struct asic_fixed_properties *prop = &hdev->asic_prop;804struct pci_dev *pdev = hdev->pdev;805resource_size_t pci_bar_size;806u32 fw_boot_status;807int rc;808809rc = gaudi_set_fixed_properties(hdev);810if (rc) {811dev_err(hdev->dev, "Failed setting fixed properties\n");812return rc;813}814815/* Check BAR sizes */816pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);817818if (pci_bar_size != SRAM_BAR_SIZE) {819dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",820SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);821rc = -ENODEV;822goto free_queue_props;823}824825pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);826827if (pci_bar_size != CFG_BAR_SIZE) {828dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",829CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);830rc = -ENODEV;831goto free_queue_props;832}833834prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);835hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);836837/* If FW security is enabled at this point it means no access to ELBI */838if (hdev->asic_prop.fw_security_enabled) {839hdev->asic_prop.iatu_done_by_fw = true;840841/*842* GIC-security-bit can ONLY be set by CPUCP, so in this stage843* decision can only be taken based on PCI ID security.844*/845hdev->asic_prop.gic_interrupts_enable = false;846goto pci_init;847}848849rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,850&fw_boot_status);851if (rc)852goto free_queue_props;853854/* Check whether FW is configuring iATU */855if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&856(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))857hdev->asic_prop.iatu_done_by_fw = true;858859pci_init:860rc = hl_pci_init(hdev);861if (rc)862goto free_queue_props;863864/* Before continuing in the initialization, we need to read the preboot865* version to determine whether we run with a security-enabled firmware866*/867rc = hl_fw_read_preboot_status(hdev);868if (rc) {869if (hdev->reset_on_preboot_fail)870/* we are already on failure flow, so don't check if hw_fini fails. */871hdev->asic_funcs->hw_fini(hdev, true, false);872goto pci_fini;873}874875if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {876dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");877rc = hdev->asic_funcs->hw_fini(hdev, true, false);878if (rc) {879dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);880goto pci_fini;881}882}883884return 0;885886pci_fini:887hl_pci_fini(hdev);888free_queue_props:889kfree(hdev->asic_prop.hw_queues_props);890return rc;891}892893static int gaudi_early_fini(struct hl_device *hdev)894{895kfree(hdev->asic_prop.hw_queues_props);896hl_pci_fini(hdev);897898return 0;899}900901/**902* gaudi_fetch_psoc_frequency - Fetch PSOC frequency values903*904* @hdev: pointer to hl_device structure905*906*/907static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)908{909u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;910struct asic_fixed_properties *prop = &hdev->asic_prop;911u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;912int rc;913914if ((hdev->fw_components & FW_TYPE_LINUX) &&915(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {916struct gaudi_device *gaudi = hdev->asic_specific;917918if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))919return 0;920921rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);922923if (rc)924return rc;925926freq = pll_freq_arr[2];927} else {928/* Backward compatibility */929div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);930div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);931nr = RREG32(mmPSOC_CPU_PLL_NR);932nf = RREG32(mmPSOC_CPU_PLL_NF);933od = RREG32(mmPSOC_CPU_PLL_OD);934935if (div_sel == DIV_SEL_REF_CLK ||936div_sel == DIV_SEL_DIVIDED_REF) {937if (div_sel == DIV_SEL_REF_CLK)938freq = PLL_REF_CLK;939else940freq = PLL_REF_CLK / (div_fctr + 1);941} else if (div_sel == DIV_SEL_PLL_CLK ||942div_sel == DIV_SEL_DIVIDED_PLL) {943pll_clk = PLL_REF_CLK * (nf + 1) /944((nr + 1) * (od + 1));945if (div_sel == DIV_SEL_PLL_CLK)946freq = pll_clk;947else948freq = pll_clk / (div_fctr + 1);949} else {950dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);951freq = 0;952}953}954955prop->psoc_timestamp_frequency = freq;956prop->psoc_pci_pll_nr = nr;957prop->psoc_pci_pll_nf = nf;958prop->psoc_pci_pll_od = od;959prop->psoc_pci_pll_div_factor = div_fctr;960961return 0;962}963964static int _gaudi_init_tpc_mem(struct hl_device *hdev,965dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)966{967struct asic_fixed_properties *prop = &hdev->asic_prop;968struct packet_lin_dma *init_tpc_mem_pkt;969struct hl_cs_job *job;970struct hl_cb *cb;971u64 dst_addr;972u32 cb_size, ctl;973u8 tpc_id;974int rc;975976cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);977if (!cb)978return -EFAULT;979980init_tpc_mem_pkt = cb->kernel_address;981cb_size = sizeof(*init_tpc_mem_pkt);982memset(init_tpc_mem_pkt, 0, cb_size);983984init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);985986ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);987ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);988ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);989ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);990991init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);992993init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);994995/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */996dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,997round_up(prop->sram_user_base_address, SZ_8K));998init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);9991000job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);1001if (!job) {1002dev_err(hdev->dev, "Failed to allocate a new job\n");1003rc = -ENOMEM;1004goto release_cb;1005}10061007job->id = 0;1008job->user_cb = cb;1009atomic_inc(&job->user_cb->cs_cnt);1010job->user_cb_size = cb_size;1011job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;1012job->patched_cb = job->user_cb;1013job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);10141015hl_debugfs_add_job(hdev, job);10161017rc = gaudi_send_job_on_qman0(hdev, job);10181019if (rc)1020goto free_job;10211022for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {1023rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);1024if (rc)1025break;1026}10271028free_job:1029hl_userptr_delete_list(hdev, &job->userptr_list);1030hl_debugfs_remove_job(hdev, job);1031kfree(job);1032atomic_dec(&cb->cs_cnt);10331034release_cb:1035hl_cb_put(cb);1036hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);10371038return rc;1039}10401041/*1042* gaudi_init_tpc_mem() - Initialize TPC memories.1043* @hdev: Pointer to hl_device structure.1044*1045* Copy TPC kernel fw from firmware file and run it to initialize TPC memories.1046*1047* Return: 0 for success, negative value for error.1048*/1049static int gaudi_init_tpc_mem(struct hl_device *hdev)1050{1051const struct firmware *fw;1052size_t fw_size;1053void *cpu_addr;1054dma_addr_t dma_handle;1055int rc, count = 5;10561057again:1058rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);1059if (rc == -EINTR && count-- > 0) {1060msleep(50);1061goto again;1062}10631064if (rc) {1065dev_err(hdev->dev, "Failed to load firmware file %s\n",1066GAUDI_TPC_FW_FILE);1067goto out;1068}10691070fw_size = fw->size;1071cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);1072if (!cpu_addr) {1073dev_err(hdev->dev,1074"Failed to allocate %zu of dma memory for TPC kernel\n",1075fw_size);1076rc = -ENOMEM;1077goto out;1078}10791080memcpy(cpu_addr, fw->data, fw_size);10811082rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);10831084hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);10851086out:1087release_firmware(fw);1088return rc;1089}10901091static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)1092{1093struct gaudi_device *gaudi = hdev->asic_specific;1094struct gaudi_collective_properties *prop = &gaudi->collective_props;1095struct hl_hw_queue *q;1096u32 i, sob_id, sob_group_id, queue_id;10971098/* Iterate through SOB groups and assign a SOB for each slave queue */1099sob_group_id =1100stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];1101sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;11021103queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;1104for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {1105q = &hdev->kernel_queues[queue_id + (4 * i)];1106q->sync_stream_prop.collective_sob_id = sob_id + i;1107}11081109/* Both DMA5 and TPC7 use the same resources since only a single1110* engine need to participate in the reduction process1111*/1112queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;1113q = &hdev->kernel_queues[queue_id];1114q->sync_stream_prop.collective_sob_id =1115sob_id + NIC_NUMBER_OF_ENGINES;11161117queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;1118q = &hdev->kernel_queues[queue_id];1119q->sync_stream_prop.collective_sob_id =1120sob_id + NIC_NUMBER_OF_ENGINES;1121}11221123static void gaudi_sob_group_hw_reset(struct kref *ref)1124{1125struct gaudi_hw_sob_group *hw_sob_group =1126container_of(ref, struct gaudi_hw_sob_group, kref);1127struct hl_device *hdev = hw_sob_group->hdev;1128int i;11291130for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)1131WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +1132(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);11331134kref_init(&hw_sob_group->kref);1135}11361137static void gaudi_sob_group_reset_error(struct kref *ref)1138{1139struct gaudi_hw_sob_group *hw_sob_group =1140container_of(ref, struct gaudi_hw_sob_group, kref);1141struct hl_device *hdev = hw_sob_group->hdev;11421143dev_crit(hdev->dev,1144"SOB release shouldn't be called here, base_sob_id: %d\n",1145hw_sob_group->base_sob_id);1146}11471148static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)1149{1150struct gaudi_collective_properties *prop;1151int i;11521153prop = &gaudi->collective_props;11541155memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));11561157for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)1158if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))1159prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=1160BIT(i % HL_MAX_SOBS_PER_MONITOR);1161/* Set collective engine bit */1162prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=1163BIT(i % HL_MAX_SOBS_PER_MONITOR);1164}11651166static int gaudi_collective_init(struct hl_device *hdev)1167{1168u32 i, sob_id, reserved_sobs_per_group;1169struct gaudi_collective_properties *prop;1170struct gaudi_device *gaudi;11711172gaudi = hdev->asic_specific;1173prop = &gaudi->collective_props;1174sob_id = hdev->asic_prop.collective_first_sob;11751176/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */1177reserved_sobs_per_group =1178ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);11791180/* Init SOB groups */1181for (i = 0 ; i < NUM_SOB_GROUPS; i++) {1182prop->hw_sob_group[i].hdev = hdev;1183prop->hw_sob_group[i].base_sob_id = sob_id;1184sob_id += reserved_sobs_per_group;1185gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);1186}11871188for (i = 0 ; i < QMAN_STREAMS; i++) {1189prop->next_sob_group_val[i] = 1;1190prop->curr_sob_group_idx[i] = 0;1191gaudi_collective_map_sobs(hdev, i);1192}11931194gaudi_collective_mstr_sob_mask_set(gaudi);11951196return 0;1197}11981199static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)1200{1201struct gaudi_device *gaudi = hdev->asic_specific;1202struct gaudi_collective_properties *cprop = &gaudi->collective_props;12031204kref_put(&cprop->hw_sob_group[sob_group].kref,1205gaudi_sob_group_hw_reset);1206}12071208static void gaudi_collective_master_init_job(struct hl_device *hdev,1209struct hl_cs_job *job, u32 stream, u32 sob_group_offset)1210{1211u32 master_sob_base, master_monitor, queue_id, cb_size = 0;1212struct gaudi_collective_properties *cprop;1213struct hl_gen_wait_properties wait_prop;1214struct hl_sync_stream_properties *prop;1215struct gaudi_device *gaudi;12161217gaudi = hdev->asic_specific;1218cprop = &gaudi->collective_props;1219queue_id = job->hw_queue_id;1220prop = &hdev->kernel_queues[queue_id].sync_stream_prop;12211222master_sob_base =1223cprop->hw_sob_group[sob_group_offset].base_sob_id;1224master_monitor = prop->collective_mstr_mon_id[0];12251226cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;12271228dev_dbg(hdev->dev,1229"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",1230master_sob_base, cprop->mstr_sob_mask[0],1231cprop->next_sob_group_val[stream],1232master_monitor, queue_id);12331234wait_prop.data = (void *) job->patched_cb;1235wait_prop.sob_base = master_sob_base;1236wait_prop.sob_mask = cprop->mstr_sob_mask[0];1237wait_prop.sob_val = cprop->next_sob_group_val[stream];1238wait_prop.mon_id = master_monitor;1239wait_prop.q_idx = queue_id;1240wait_prop.size = cb_size;1241cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);12421243master_sob_base += HL_MAX_SOBS_PER_MONITOR;1244master_monitor = prop->collective_mstr_mon_id[1];12451246dev_dbg(hdev->dev,1247"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",1248master_sob_base, cprop->mstr_sob_mask[1],1249cprop->next_sob_group_val[stream],1250master_monitor, queue_id);12511252wait_prop.sob_base = master_sob_base;1253wait_prop.sob_mask = cprop->mstr_sob_mask[1];1254wait_prop.mon_id = master_monitor;1255wait_prop.size = cb_size;1256cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);1257}12581259static void gaudi_collective_slave_init_job(struct hl_device *hdev,1260struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)1261{1262struct hl_gen_wait_properties wait_prop;1263struct hl_sync_stream_properties *prop;1264u32 queue_id, cb_size = 0;12651266queue_id = job->hw_queue_id;1267prop = &hdev->kernel_queues[queue_id].sync_stream_prop;12681269if (job->cs->encaps_signals) {1270/* use the encaps signal handle store earlier in the flow1271* and set the SOB information from the encaps1272* signals handle1273*/1274hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,1275cs_cmpl);12761277dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",1278job->cs->sequence,1279cs_cmpl->hw_sob->sob_id,1280cs_cmpl->sob_val);1281}12821283/* Add to wait CBs using slave monitor */1284wait_prop.data = (void *) job->user_cb;1285wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;1286wait_prop.sob_mask = 0x1;1287wait_prop.sob_val = cs_cmpl->sob_val;1288wait_prop.mon_id = prop->collective_slave_mon_id;1289wait_prop.q_idx = queue_id;1290wait_prop.size = cb_size;12911292dev_dbg(hdev->dev,1293"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",1294cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,1295prop->collective_slave_mon_id, queue_id);12961297cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);12981299dev_dbg(hdev->dev,1300"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",1301prop->collective_sob_id, queue_id);13021303cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,1304prop->collective_sob_id, cb_size, false);1305}13061307static int gaudi_collective_wait_init_cs(struct hl_cs *cs)1308{1309struct hl_cs_compl *signal_cs_cmpl =1310container_of(cs->signal_fence, struct hl_cs_compl, base_fence);1311struct hl_cs_compl *cs_cmpl =1312container_of(cs->fence, struct hl_cs_compl, base_fence);1313struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;1314struct gaudi_collective_properties *cprop;1315u32 stream, queue_id, sob_group_offset;1316struct gaudi_device *gaudi;1317struct hl_device *hdev;1318struct hl_cs_job *job;1319struct hl_ctx *ctx;13201321ctx = cs->ctx;1322hdev = ctx->hdev;1323gaudi = hdev->asic_specific;1324cprop = &gaudi->collective_props;13251326if (cs->encaps_signals) {1327cs_cmpl->hw_sob = handle->hw_sob;1328/* at this checkpoint we only need the hw_sob pointer1329* for the completion check before start going over the jobs1330* of the master/slaves, the sob_value will be taken later on1331* in gaudi_collective_slave_init_job depends on each1332* job wait offset value.1333*/1334cs_cmpl->sob_val = 0;1335} else {1336/* copy the SOB id and value of the signal CS */1337cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;1338cs_cmpl->sob_val = signal_cs_cmpl->sob_val;1339}13401341/* check again if the signal cs already completed.1342* if yes then don't send any wait cs since the hw_sob1343* could be in reset already. if signal is not completed1344* then get refcount to hw_sob to prevent resetting the sob1345* while wait cs is not submitted.1346* note that this check is protected by two locks,1347* hw queue lock and completion object lock,1348* and the same completion object lock also protects1349* the hw_sob reset handler function.1350* The hw_queue lock prevent out of sync of hw_sob1351* refcount value, changed by signal/wait flows.1352*/1353spin_lock(&signal_cs_cmpl->lock);13541355if (completion_done(&cs->signal_fence->completion)) {1356spin_unlock(&signal_cs_cmpl->lock);1357return -EINVAL;1358}1359/* Increment kref since all slave queues are now waiting on it */1360kref_get(&cs_cmpl->hw_sob->kref);13611362spin_unlock(&signal_cs_cmpl->lock);13631364/* Calculate the stream from collective master queue (1st job) */1365job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);1366stream = job->hw_queue_id % 4;1367sob_group_offset =1368stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];13691370list_for_each_entry(job, &cs->job_list, cs_node) {1371queue_id = job->hw_queue_id;13721373if (hdev->kernel_queues[queue_id].collective_mode ==1374HL_COLLECTIVE_MASTER)1375gaudi_collective_master_init_job(hdev, job, stream,1376sob_group_offset);1377else1378gaudi_collective_slave_init_job(hdev, job, cs_cmpl);1379}13801381cs_cmpl->sob_group = sob_group_offset;13821383/* Handle sob group kref and wraparound */1384kref_get(&cprop->hw_sob_group[sob_group_offset].kref);1385cprop->next_sob_group_val[stream]++;13861387if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {1388/*1389* Decrement as we reached the max value.1390* The release function won't be called here as we've1391* just incremented the refcount.1392*/1393kref_put(&cprop->hw_sob_group[sob_group_offset].kref,1394gaudi_sob_group_reset_error);1395cprop->next_sob_group_val[stream] = 1;1396/* only two SOBs are currently in use */1397cprop->curr_sob_group_idx[stream] =1398(cprop->curr_sob_group_idx[stream] + 1) &1399(HL_RSVD_SOBS - 1);14001401gaudi_collective_map_sobs(hdev, stream);14021403dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",1404cprop->curr_sob_group_idx[stream], stream);1405}14061407mb();1408hl_fence_put(cs->signal_fence);1409cs->signal_fence = NULL;14101411return 0;1412}14131414static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)1415{1416u32 cacheline_end, additional_commands;14171418cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);1419additional_commands = sizeof(struct packet_msg_prot) * 2;14201421if (user_cb_size + additional_commands > cacheline_end)1422return cacheline_end - user_cb_size + additional_commands;1423else1424return additional_commands;1425}14261427static int gaudi_collective_wait_create_job(struct hl_device *hdev,1428struct hl_ctx *ctx, struct hl_cs *cs,1429enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,1430u32 encaps_signal_offset)1431{1432struct hw_queue_properties *hw_queue_prop;1433struct hl_cs_counters_atomic *cntr;1434struct hl_cs_job *job;1435struct hl_cb *cb;1436u32 cb_size;1437bool patched_cb;14381439cntr = &hdev->aggregated_cs_counters;14401441if (mode == HL_COLLECTIVE_MASTER) {1442/* CB size of collective master queue contains1443* 4 msg short packets for monitor 1 configuration1444* 1 fence packet1445* 4 msg short packets for monitor 2 configuration1446* 1 fence packet1447* 2 msg prot packets for completion and MSI1448*/1449cb_size = sizeof(struct packet_msg_short) * 8 +1450sizeof(struct packet_fence) * 2 +1451sizeof(struct packet_msg_prot) * 2;1452patched_cb = true;1453} else {1454/* CB size of collective slave queues contains1455* 4 msg short packets for monitor configuration1456* 1 fence packet1457* 1 additional msg short packet for sob signal1458*/1459cb_size = sizeof(struct packet_msg_short) * 5 +1460sizeof(struct packet_fence);1461patched_cb = false;1462}14631464hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];1465job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);1466if (!job) {1467atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);1468atomic64_inc(&cntr->out_of_mem_drop_cnt);1469dev_err(hdev->dev, "Failed to allocate a new job\n");1470return -ENOMEM;1471}14721473/* Allocate internal mapped CB for non patched CBs */1474cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);1475if (!cb) {1476atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);1477atomic64_inc(&cntr->out_of_mem_drop_cnt);1478kfree(job);1479return -EFAULT;1480}14811482job->id = 0;1483job->cs = cs;1484job->user_cb = cb;1485atomic_inc(&job->user_cb->cs_cnt);1486job->user_cb_size = cb_size;1487job->hw_queue_id = queue_id;14881489/* since its guaranteed to have only one chunk in the collective wait1490* cs, we can use this chunk to set the encapsulated signal offset1491* in the jobs.1492*/1493if (cs->encaps_signals)1494job->encaps_sig_wait_offset = encaps_signal_offset;14951496/*1497* No need in parsing, user CB is the patched CB.1498* We call hl_cb_destroy() out of two reasons - we don't need1499* the CB in the CB idr anymore and to decrement its refcount as1500* it was incremented inside hl_cb_kernel_create().1501*/1502if (patched_cb)1503job->patched_cb = job->user_cb;1504else1505job->patched_cb = NULL;15061507job->job_cb_size = job->user_cb_size;1508hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);15091510/* increment refcount as for external queues we get completion */1511if (hw_queue_prop->type == QUEUE_TYPE_EXT)1512cs_get(cs);15131514cs->jobs_in_queue_cnt[job->hw_queue_id]++;15151516list_add_tail(&job->cs_node, &cs->job_list);15171518hl_debugfs_add_job(hdev, job);15191520return 0;1521}15221523static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,1524struct hl_ctx *ctx, struct hl_cs *cs,1525u32 wait_queue_id, u32 collective_engine_id,1526u32 encaps_signal_offset)1527{1528struct gaudi_device *gaudi = hdev->asic_specific;1529struct hw_queue_properties *hw_queue_prop;1530u32 queue_id, collective_queue, num_jobs;1531u32 stream, nic_queue, nic_idx = 0;1532bool skip;1533int i, rc = 0;15341535/* Verify wait queue id is configured as master */1536hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];1537if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {1538dev_err(hdev->dev,1539"Queue %d is not configured as collective master\n",1540wait_queue_id);1541return -EINVAL;1542}15431544/* Verify engine id is supported */1545if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&1546collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {1547dev_err(hdev->dev,1548"Collective wait does not support engine %u\n",1549collective_engine_id);1550return -EINVAL;1551}15521553stream = wait_queue_id % 4;15541555if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)1556collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;1557else1558collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;15591560num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;1561nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;15621563/* First job goes to the collective master queue, it will wait for1564* the collective slave queues to finish execution.1565* The synchronization is done using two monitors:1566* First monitor for NICs 0-7, second monitor for NICs 8-9 and the1567* reduction engine (DMA5/TPC7).1568*1569* Rest of the jobs goes to the collective slave queues which will1570* all wait for the user to signal sob 'cs_cmpl->sob_val'.1571*/1572for (i = 0 ; i < num_jobs ; i++) {1573if (i == 0) {1574queue_id = wait_queue_id;1575rc = gaudi_collective_wait_create_job(hdev, ctx, cs,1576HL_COLLECTIVE_MASTER, queue_id,1577wait_queue_id, encaps_signal_offset);1578} else {1579if (nic_idx < NIC_NUMBER_OF_ENGINES) {1580if (gaudi->hw_cap_initialized &1581BIT(HW_CAP_NIC_SHIFT + nic_idx))1582skip = false;1583else1584skip = true;15851586queue_id = nic_queue;1587nic_queue += 4;1588nic_idx++;15891590if (skip)1591continue;1592} else {1593queue_id = collective_queue;1594}15951596rc = gaudi_collective_wait_create_job(hdev, ctx, cs,1597HL_COLLECTIVE_SLAVE, queue_id,1598wait_queue_id, encaps_signal_offset);1599}16001601if (rc)1602return rc;1603}16041605return rc;1606}16071608static int gaudi_late_init(struct hl_device *hdev)1609{1610struct gaudi_device *gaudi = hdev->asic_specific;1611int rc;16121613rc = gaudi->cpucp_info_get(hdev);1614if (rc) {1615dev_err(hdev->dev, "Failed to get cpucp info\n");1616return rc;1617}16181619if ((hdev->card_type == cpucp_card_type_pci) &&1620(hdev->nic_ports_mask & 0x3)) {1621dev_info(hdev->dev,1622"PCI card detected, only 8 ports are enabled\n");1623hdev->nic_ports_mask &= ~0x3;16241625/* Stop and disable unused NIC QMANs */1626WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |1627NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |1628NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);16291630WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |1631NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |1632NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);16331634WREG32(mmNIC0_QM0_GLBL_CFG0, 0);1635WREG32(mmNIC0_QM1_GLBL_CFG0, 0);16361637gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);1638}16391640rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);1641if (rc)1642return rc;16431644/* Scrub both SRAM and DRAM */1645rc = hdev->asic_funcs->scrub_device_mem(hdev);1646if (rc)1647goto disable_pci_access;16481649rc = gaudi_fetch_psoc_frequency(hdev);1650if (rc) {1651dev_err(hdev->dev, "Failed to fetch psoc frequency\n");1652goto disable_pci_access;1653}16541655rc = gaudi_mmu_clear_pgt_range(hdev);1656if (rc) {1657dev_err(hdev->dev, "Failed to clear MMU page tables range\n");1658goto disable_pci_access;1659}16601661rc = gaudi_init_tpc_mem(hdev);1662if (rc) {1663dev_err(hdev->dev, "Failed to initialize TPC memories\n");1664goto disable_pci_access;1665}16661667rc = gaudi_collective_init(hdev);1668if (rc) {1669dev_err(hdev->dev, "Failed to init collective\n");1670goto disable_pci_access;1671}16721673/* We only support a single ASID for the user, so for the sake of optimization, just1674* initialize the ASID one time during device initialization with the fixed value of 11675*/1676gaudi_mmu_prepare(hdev, 1);16771678hl_fw_set_pll_profile(hdev);16791680return 0;16811682disable_pci_access:1683hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);16841685return rc;1686}16871688static void gaudi_late_fini(struct hl_device *hdev)1689{1690hl_hwmon_release_resources(hdev);1691}16921693static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)1694{1695dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;1696void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};1697int i, j, rc = 0;16981699/*1700* The device CPU works with 40-bits addresses, while bit 39 must be set1701* to '1' when accessing the host.1702* Bits 49:39 of the full host address are saved for a later1703* configuration of the HW to perform extension to 50 bits.1704* Because there is a single HW register that holds the extension bits,1705* these bits must be identical in all allocated range.1706*/17071708for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {1709virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,1710&dma_addr_arr[i],1711GFP_KERNEL | __GFP_ZERO);1712if (!virt_addr_arr[i]) {1713rc = -ENOMEM;1714goto free_dma_mem_arr;1715}17161717end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;1718if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==1719GAUDI_CPU_PCI_MSB_ADDR(end_addr))1720break;1721}17221723if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {1724dev_err(hdev->dev,1725"MSB of CPU accessible DMA memory are not identical in all range\n");1726rc = -EFAULT;1727goto free_dma_mem_arr;1728}17291730hdev->cpu_accessible_dma_mem = virt_addr_arr[i];1731hdev->cpu_accessible_dma_address = dma_addr_arr[i];1732hdev->cpu_pci_msb_addr =1733GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);17341735if (!hdev->asic_prop.fw_security_enabled)1736GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);17371738free_dma_mem_arr:1739for (j = 0 ; j < i ; j++)1740hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],1741dma_addr_arr[j]);17421743return rc;1744}17451746static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)1747{1748struct gaudi_device *gaudi = hdev->asic_specific;1749struct gaudi_internal_qman_info *q;1750u32 i;17511752for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {1753q = &gaudi->internal_qmans[i];1754if (!q->pq_kernel_addr)1755continue;1756hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);1757}1758}17591760static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)1761{1762struct gaudi_device *gaudi = hdev->asic_specific;1763struct gaudi_internal_qman_info *q;1764int rc, i;17651766for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {1767if (gaudi_queue_type[i] != QUEUE_TYPE_INT)1768continue;17691770q = &gaudi->internal_qmans[i];17711772switch (i) {1773case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:1774q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;1775break;1776case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:1777q->pq_size = MME_QMAN_SIZE_IN_BYTES;1778break;1779case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:1780q->pq_size = TPC_QMAN_SIZE_IN_BYTES;1781break;1782case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:1783q->pq_size = NIC_QMAN_SIZE_IN_BYTES;1784break;1785default:1786dev_err(hdev->dev, "Bad internal queue index %d", i);1787rc = -EINVAL;1788goto free_internal_qmans_pq_mem;1789}17901791q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,1792GFP_KERNEL | __GFP_ZERO);1793if (!q->pq_kernel_addr) {1794rc = -ENOMEM;1795goto free_internal_qmans_pq_mem;1796}1797}17981799return 0;18001801free_internal_qmans_pq_mem:1802gaudi_free_internal_qmans_pq_mem(hdev);1803return rc;1804}18051806static void gaudi_set_pci_memory_regions(struct hl_device *hdev)1807{1808struct asic_fixed_properties *prop = &hdev->asic_prop;1809struct pci_mem_region *region;18101811/* CFG */1812region = &hdev->pci_mem_region[PCI_REGION_CFG];1813region->region_base = CFG_BASE;1814region->region_size = CFG_SIZE;1815region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;1816region->bar_size = CFG_BAR_SIZE;1817region->bar_id = CFG_BAR_ID;1818region->used = 1;18191820/* SRAM */1821region = &hdev->pci_mem_region[PCI_REGION_SRAM];1822region->region_base = SRAM_BASE_ADDR;1823region->region_size = SRAM_SIZE;1824region->offset_in_bar = 0;1825region->bar_size = SRAM_BAR_SIZE;1826region->bar_id = SRAM_BAR_ID;1827region->used = 1;18281829/* DRAM */1830region = &hdev->pci_mem_region[PCI_REGION_DRAM];1831region->region_base = DRAM_PHYS_BASE;1832region->region_size = hdev->asic_prop.dram_size;1833region->offset_in_bar = 0;1834region->bar_size = prop->dram_pci_bar_size;1835region->bar_id = HBM_BAR_ID;1836region->used = 1;18371838/* SP SRAM */1839region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];1840region->region_base = PSOC_SCRATCHPAD_ADDR;1841region->region_size = PSOC_SCRATCHPAD_SIZE;1842region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;1843region->bar_size = CFG_BAR_SIZE;1844region->bar_id = CFG_BAR_ID;1845region->used = 1;1846}18471848static int gaudi_sw_init(struct hl_device *hdev)1849{1850struct gaudi_device *gaudi;1851u32 i, event_id = 0;1852int rc;18531854/* Allocate device structure */1855gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);1856if (!gaudi)1857return -ENOMEM;18581859for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {1860if (gaudi_irq_map_table[i].valid) {1861if (event_id == GAUDI_EVENT_SIZE) {1862dev_err(hdev->dev,1863"Event array exceeds the limit of %u events\n",1864GAUDI_EVENT_SIZE);1865rc = -EINVAL;1866goto free_gaudi_device;1867}18681869gaudi->events[event_id++] =1870gaudi_irq_map_table[i].fc_id;1871}1872}18731874gaudi->cpucp_info_get = gaudi_cpucp_info_get;18751876hdev->asic_specific = gaudi;18771878/* Create DMA pool for small allocations */1879hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),1880&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);1881if (!hdev->dma_pool) {1882dev_err(hdev->dev, "failed to create DMA pool\n");1883rc = -ENOMEM;1884goto free_gaudi_device;1885}18861887rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);1888if (rc)1889goto free_dma_pool;18901891hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);1892if (!hdev->cpu_accessible_dma_pool) {1893dev_err(hdev->dev,1894"Failed to create CPU accessible DMA pool\n");1895rc = -ENOMEM;1896goto free_cpu_dma_mem;1897}18981899rc = gen_pool_add(hdev->cpu_accessible_dma_pool,1900(uintptr_t) hdev->cpu_accessible_dma_mem,1901HL_CPU_ACCESSIBLE_MEM_SIZE, -1);1902if (rc) {1903dev_err(hdev->dev,1904"Failed to add memory to CPU accessible DMA pool\n");1905rc = -EFAULT;1906goto free_cpu_accessible_dma_pool;1907}19081909rc = gaudi_alloc_internal_qmans_pq_mem(hdev);1910if (rc)1911goto free_cpu_accessible_dma_pool;19121913spin_lock_init(&gaudi->hw_queues_lock);19141915hdev->supports_sync_stream = true;1916hdev->supports_coresight = true;1917hdev->supports_staged_submission = true;1918hdev->supports_wait_for_multi_cs = true;19191920hdev->asic_funcs->set_pci_memory_regions(hdev);1921hdev->stream_master_qid_arr =1922hdev->asic_funcs->get_stream_master_qid_arr();1923hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;19241925return 0;19261927free_cpu_accessible_dma_pool:1928gen_pool_destroy(hdev->cpu_accessible_dma_pool);1929free_cpu_dma_mem:1930if (!hdev->asic_prop.fw_security_enabled)1931GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,1932hdev->cpu_pci_msb_addr);1933hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,1934hdev->cpu_accessible_dma_address);1935free_dma_pool:1936dma_pool_destroy(hdev->dma_pool);1937free_gaudi_device:1938kfree(gaudi);1939return rc;1940}19411942static int gaudi_sw_fini(struct hl_device *hdev)1943{1944struct gaudi_device *gaudi = hdev->asic_specific;19451946gaudi_free_internal_qmans_pq_mem(hdev);19471948gen_pool_destroy(hdev->cpu_accessible_dma_pool);19491950if (!hdev->asic_prop.fw_security_enabled)1951GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,1952hdev->cpu_pci_msb_addr);19531954hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,1955hdev->cpu_accessible_dma_address);19561957dma_pool_destroy(hdev->dma_pool);19581959kfree(gaudi);19601961return 0;1962}19631964static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)1965{1966struct hl_device *hdev = arg;1967int i;19681969if (hdev->disabled)1970return IRQ_HANDLED;19711972for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)1973hl_irq_handler_cq(irq, &hdev->completion_queue[i]);19741975hl_irq_handler_eq(irq, &hdev->event_queue);19761977return IRQ_HANDLED;1978}19791980/*1981* For backward compatibility, new MSI interrupts should be set after the1982* existing CPU and NIC interrupts.1983*/1984static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,1985bool cpu_eq)1986{1987int msi_vec;19881989if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))1990dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",1991GAUDI_EVENT_QUEUE_MSI_IDX);19921993msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :1994(nr + NIC_NUMBER_OF_ENGINES + 1);19951996return pci_irq_vector(hdev->pdev, msi_vec);1997}19981999static int gaudi_enable_msi_single(struct hl_device *hdev)2000{2001int rc, irq;20022003dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");20042005irq = gaudi_pci_irq_vector(hdev, 0, false);2006rc = request_irq(irq, gaudi_irq_handler_single, 0,2007"gaudi single msi", hdev);2008if (rc)2009dev_err(hdev->dev,2010"Failed to request single MSI IRQ\n");20112012return rc;2013}20142015static int gaudi_enable_msi(struct hl_device *hdev)2016{2017struct gaudi_device *gaudi = hdev->asic_specific;2018int rc;20192020if (gaudi->hw_cap_initialized & HW_CAP_MSI)2021return 0;20222023rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);2024if (rc < 0) {2025dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);2026return rc;2027}20282029rc = gaudi_enable_msi_single(hdev);2030if (rc)2031goto free_pci_irq_vectors;20322033gaudi->hw_cap_initialized |= HW_CAP_MSI;20342035return 0;20362037free_pci_irq_vectors:2038pci_free_irq_vectors(hdev->pdev);2039return rc;2040}20412042static void gaudi_sync_irqs(struct hl_device *hdev)2043{2044struct gaudi_device *gaudi = hdev->asic_specific;20452046if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))2047return;20482049/* Wait for all pending IRQs to be finished */2050synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));2051}20522053static void gaudi_disable_msi(struct hl_device *hdev)2054{2055struct gaudi_device *gaudi = hdev->asic_specific;20562057if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))2058return;20592060gaudi_sync_irqs(hdev);2061free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);2062pci_free_irq_vectors(hdev->pdev);20632064gaudi->hw_cap_initialized &= ~HW_CAP_MSI;2065}20662067static void gaudi_init_scrambler_sram(struct hl_device *hdev)2068{2069struct gaudi_device *gaudi = hdev->asic_specific;20702071if (hdev->asic_prop.fw_security_enabled)2072return;20732074if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &2075CPU_BOOT_DEV_STS0_SRAM_SCR_EN)2076return;20772078if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)2079return;20802081WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,20821 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2083WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,20841 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2085WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,20861 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2087WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,20881 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2089WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,20901 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2091WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,20921 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2093WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,20941 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2095WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,20961 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);20972098WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,20991 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2100WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,21011 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2102WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,21031 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2104WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,21051 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2106WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,21071 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2108WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,21091 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2110WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,21111 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);2112WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,21131 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);21142115WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,21161 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);2117WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,21181 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);2119WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,21201 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);2121WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,21221 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);2123WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,21241 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);2125WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,21261 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);2127WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,21281 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);2129WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,21301 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);21312132gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;2133}21342135static void gaudi_init_scrambler_hbm(struct hl_device *hdev)2136{2137struct gaudi_device *gaudi = hdev->asic_specific;21382139if (hdev->asic_prop.fw_security_enabled)2140return;21412142if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &2143CPU_BOOT_DEV_STS0_DRAM_SCR_EN)2144return;21452146if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)2147return;21482149WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,21501 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2151WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,21521 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2153WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,21541 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2155WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,21561 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2157WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,21581 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2159WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,21601 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2161WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,21621 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2163WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,21641 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);21652166WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,21671 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2168WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,21691 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2170WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,21711 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2172WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,21731 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2174WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,21751 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2176WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,21771 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2178WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,21791 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);2180WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,21811 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);21822183WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,21841 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);2185WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,21861 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);2187WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,21881 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);2189WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,21901 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);2191WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,21921 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);2193WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,21941 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);2195WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,21961 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);2197WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,21981 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);21992200gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;2201}22022203static void gaudi_init_e2e(struct hl_device *hdev)2204{2205if (hdev->asic_prop.fw_security_enabled)2206return;22072208if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &2209CPU_BOOT_DEV_STS0_E2E_CRED_EN)2210return;22112212WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);2213WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);2214WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);2215WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);22162217WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);2218WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);2219WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);2220WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);22212222WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);2223WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);2224WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);2225WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);22262227WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);2228WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);2229WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);2230WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);22312232WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);2233WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);2234WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);2235WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);22362237WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);2238WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);2239WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);2240WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);22412242WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);2243WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);2244WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);2245WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);22462247WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);2248WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);2249WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);2250WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);22512252WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);2253WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);2254WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);2255WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);22562257WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);2258WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);2259WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);2260WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);22612262WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);2263WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);2264WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);2265WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);22662267WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);2268WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);2269WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);2270WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);22712272WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);2273WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);2274WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);2275WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);22762277WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);2278WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);2279WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);2280WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);22812282WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);2283WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);2284WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);2285WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);22862287WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);2288WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);2289WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);2290WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);22912292WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);2293WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);2294WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);2295WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);22962297WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);2298WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);2299WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);2300WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);23012302WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);2303WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);2304WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);2305WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);23062307WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);2308WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);2309WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);2310WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);23112312WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);2313WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);2314WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);2315WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);23162317WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);2318WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);2319WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);2320WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);23212322WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);2323WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);2324WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);2325WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);23262327WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);2328WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);2329WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);2330WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);23312332WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,23331 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2334WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,23351 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23362337WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,23381 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2339WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,23401 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23412342WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,23431 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2344WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,23451 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23462347WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,23481 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2349WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,23501 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23512352WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,23531 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2354WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,23551 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23562357WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,23581 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2359WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,23601 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23612362WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,23631 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2364WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,23651 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23662367WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,23681 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2369WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,23701 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23712372WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,23731 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2374WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,23751 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23762377WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,23781 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2379WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,23801 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23812382WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,23831 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2384WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,23851 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23862387WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,23881 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2389WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,23901 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23912392WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,23931 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2394WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,23951 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);23962397WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,23981 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2399WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,24001 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);24012402WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,24031 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2404WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,24051 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);24062407WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,24081 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);2409WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,24101 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);24112412WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,24131 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2414WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,24151 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);24162417WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,24181 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2419WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,24201 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);24212422WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,24231 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2424WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,24251 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);24262427WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,24281 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2429WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,24301 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);24312432WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,24331 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2434WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,24351 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);24362437WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,24381 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2439WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,24401 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);24412442WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,24431 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2444WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,24451 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);24462447WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,24481 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);2449WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,24501 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);2451}24522453static void gaudi_init_hbm_cred(struct hl_device *hdev)2454{2455u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;24562457if (hdev->asic_prop.fw_security_enabled)2458return;24592460if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &2461CPU_BOOT_DEV_STS0_HBM_CRED_EN)2462return;24632464hbm0_wr = 0x33333333;2465hbm0_rd = 0x77777777;2466hbm1_wr = 0x55555555;2467hbm1_rd = 0xDDDDDDDD;24682469WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);2470WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);2471WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);2472WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);24732474WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);2475WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);2476WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);2477WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);24782479WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);2480WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);2481WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);2482WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);24832484WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);2485WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);2486WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);2487WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);24882489WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,2490(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2491(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));2492WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,2493(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2494(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));2495WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,2496(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2497(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));2498WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,2499(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2500(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));25012502WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,2503(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2504(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));2505WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,2506(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2507(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));2508WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,2509(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2510(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));2511WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,2512(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |2513(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));2514}25152516static void gaudi_init_golden_registers(struct hl_device *hdev)2517{2518u32 tpc_offset;2519int tpc_id, i;25202521gaudi_init_e2e(hdev);2522gaudi_init_hbm_cred(hdev);25232524for (tpc_id = 0, tpc_offset = 0;2525tpc_id < TPC_NUMBER_OF_ENGINES;2526tpc_id++, tpc_offset += TPC_CFG_OFFSET) {2527/* Mask all arithmetic interrupts from TPC */2528WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);2529/* Set 16 cache lines */2530WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,2531ICACHE_FETCH_LINE_NUM, 2);2532}25332534/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */2535for (i = 0 ; i < 128 ; i += 8)2536writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);25372538WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);2539WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);2540WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);2541WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);2542}25432544static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,2545int qman_id, dma_addr_t qman_pq_addr)2546{2547struct cpu_dyn_regs *dyn_regs =2548&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;2549u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;2550u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;2551u32 q_off, dma_qm_offset;2552u32 dma_qm_err_cfg, irq_handler_offset;25532554dma_qm_offset = dma_id * DMA_QMAN_OFFSET;25552556mtr_base_en_lo = lower_32_bits(CFG_BASE +2557mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2558mtr_base_en_hi = upper_32_bits(CFG_BASE +2559mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2560so_base_en_lo = lower_32_bits(CFG_BASE +2561mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);2562so_base_en_hi = upper_32_bits(CFG_BASE +2563mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);2564mtr_base_ws_lo = lower_32_bits(CFG_BASE +2565mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2566mtr_base_ws_hi = upper_32_bits(CFG_BASE +2567mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2568so_base_ws_lo = lower_32_bits(CFG_BASE +2569mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);2570so_base_ws_hi = upper_32_bits(CFG_BASE +2571mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);25722573q_off = dma_qm_offset + qman_id * 4;25742575WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));2576WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));25772578WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));2579WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);2580WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);25812582WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);2583WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,2584QMAN_LDMA_SRC_OFFSET);2585WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,2586QMAN_LDMA_DST_OFFSET);25872588WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);2589WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);2590WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);2591WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);2592WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);2593WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);2594WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);2595WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);25962597WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);25982599/* The following configuration is needed only once per QMAN */2600if (qman_id == 0) {2601irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?2602mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :2603le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);26042605/* Configure RAZWI IRQ */2606dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;2607if (hdev->stop_on_err)2608dma_qm_err_cfg |=2609PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;26102611WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);26122613WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,2614lower_32_bits(CFG_BASE + irq_handler_offset));2615WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,2616upper_32_bits(CFG_BASE + irq_handler_offset));26172618WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,2619gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +2620dma_id);26212622WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,2623QM_ARB_ERR_MSG_EN_MASK);26242625/* Set timeout to maximum */2626WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);26272628WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,2629QMAN_EXTERNAL_MAKE_TRUSTED);26302631WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);2632}2633}26342635static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)2636{2637struct cpu_dyn_regs *dyn_regs =2638&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;2639u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;2640u32 dma_offset = dma_id * DMA_CORE_OFFSET;2641u32 irq_handler_offset;26422643/* Set to maximum possible according to physical size */2644WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);2645WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);26462647/* WA for H/W bug H3-2116 */2648WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);26492650/* STOP_ON bit implies no completion to operation in case of RAZWI */2651if (hdev->stop_on_err)2652dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;26532654WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);26552656irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?2657mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :2658le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);26592660WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,2661lower_32_bits(CFG_BASE + irq_handler_offset));2662WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,2663upper_32_bits(CFG_BASE + irq_handler_offset));26642665WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,2666gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);2667WREG32(mmDMA0_CORE_PROT + dma_offset,26681 << DMA0_CORE_PROT_ERR_VAL_SHIFT);2669/* If the channel is secured, it should be in MMU bypass mode */2670WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,26711 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);2672WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);2673}26742675static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,2676u32 enable_mask)2677{2678u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;26792680WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);2681}26822683static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)2684{2685struct gaudi_device *gaudi = hdev->asic_specific;2686struct hl_hw_queue *q;2687int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;26882689if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)2690return;26912692for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {2693dma_id = gaudi_dma_assignment[i];2694/*2695* For queues after the CPU Q need to add 1 to get the correct2696* queue. In addition, need to add the CPU EQ and NIC IRQs in2697* order to get the correct MSI register.2698*/2699if (dma_id > 1) {2700cpu_skip = 1;2701nic_skip = NIC_NUMBER_OF_ENGINES;2702} else {2703cpu_skip = 0;2704nic_skip = 0;2705}27062707for (j = 0 ; j < QMAN_STREAMS ; j++) {2708q_idx = 4 * dma_id + j + cpu_skip;2709q = &hdev->kernel_queues[q_idx];2710q->cq_id = cq_id++;2711q->msi_vec = nic_skip + cpu_skip + msi_vec++;2712gaudi_init_pci_dma_qman(hdev, dma_id, j,2713q->bus_address);2714}27152716gaudi_init_dma_core(hdev, dma_id);27172718gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);2719}27202721gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;2722}27232724static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,2725int qman_id, u64 qman_base_addr)2726{2727struct cpu_dyn_regs *dyn_regs =2728&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;2729u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;2730u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;2731u32 dma_qm_err_cfg, irq_handler_offset;2732u32 q_off, dma_qm_offset;27332734dma_qm_offset = dma_id * DMA_QMAN_OFFSET;27352736mtr_base_en_lo = lower_32_bits(CFG_BASE +2737mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2738mtr_base_en_hi = upper_32_bits(CFG_BASE +2739mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2740so_base_en_lo = lower_32_bits(CFG_BASE +2741mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);2742so_base_en_hi = upper_32_bits(CFG_BASE +2743mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);2744mtr_base_ws_lo = lower_32_bits(CFG_BASE +2745mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2746mtr_base_ws_hi = upper_32_bits(CFG_BASE +2747mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2748so_base_ws_lo = lower_32_bits(CFG_BASE +2749mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);2750so_base_ws_hi = upper_32_bits(CFG_BASE +2751mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);27522753q_off = dma_qm_offset + qman_id * 4;27542755if (qman_id < 4) {2756WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,2757lower_32_bits(qman_base_addr));2758WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,2759upper_32_bits(qman_base_addr));27602761WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));2762WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);2763WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);27642765WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,2766QMAN_CPDMA_SIZE_OFFSET);2767WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,2768QMAN_CPDMA_SRC_OFFSET);2769WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,2770QMAN_CPDMA_DST_OFFSET);2771} else {2772irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?2773mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :2774le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);27752776WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,2777QMAN_LDMA_SIZE_OFFSET);2778WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,2779QMAN_LDMA_SRC_OFFSET);2780WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,2781QMAN_LDMA_DST_OFFSET);27822783/* Configure RAZWI IRQ */2784dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;2785if (hdev->stop_on_err)2786dma_qm_err_cfg |=2787HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;27882789WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);27902791WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,2792lower_32_bits(CFG_BASE + irq_handler_offset));2793WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,2794upper_32_bits(CFG_BASE + irq_handler_offset));27952796WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,2797gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +2798dma_id);27992800WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,2801QM_ARB_ERR_MSG_EN_MASK);28022803/* Set timeout to maximum */2804WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);28052806WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);2807WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,2808QMAN_INTERNAL_MAKE_TRUSTED);2809}28102811WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);2812WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);2813WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);2814WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);28152816/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */2817if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {2818WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,2819mtr_base_ws_lo);2820WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,2821mtr_base_ws_hi);2822WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,2823so_base_ws_lo);2824WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,2825so_base_ws_hi);2826}2827}28282829static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)2830{2831struct gaudi_device *gaudi = hdev->asic_specific;2832struct gaudi_internal_qman_info *q;2833u64 qman_base_addr;2834int i, j, dma_id, internal_q_index;28352836if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)2837return;28382839for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {2840dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];28412842for (j = 0 ; j < QMAN_STREAMS ; j++) {2843/*2844* Add the CPU queue in order to get the correct queue2845* number as all internal queue are placed after it2846*/2847internal_q_index = dma_id * QMAN_STREAMS + j + 1;28482849q = &gaudi->internal_qmans[internal_q_index];2850qman_base_addr = (u64) q->pq_dma_addr;2851gaudi_init_hbm_dma_qman(hdev, dma_id, j,2852qman_base_addr);2853}28542855/* Initializing lower CP for HBM DMA QMAN */2856gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);28572858gaudi_init_dma_core(hdev, dma_id);28592860gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);2861}28622863gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;2864}28652866static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,2867int qman_id, u64 qman_base_addr)2868{2869struct cpu_dyn_regs *dyn_regs =2870&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;2871u32 mtr_base_lo, mtr_base_hi;2872u32 so_base_lo, so_base_hi;2873u32 irq_handler_offset;2874u32 q_off, mme_id;2875u32 mme_qm_err_cfg;28762877mtr_base_lo = lower_32_bits(CFG_BASE +2878mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2879mtr_base_hi = upper_32_bits(CFG_BASE +2880mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);2881so_base_lo = lower_32_bits(CFG_BASE +2882mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);2883so_base_hi = upper_32_bits(CFG_BASE +2884mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);28852886q_off = mme_offset + qman_id * 4;28872888if (qman_id < 4) {2889WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,2890lower_32_bits(qman_base_addr));2891WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,2892upper_32_bits(qman_base_addr));28932894WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));2895WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);2896WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);28972898WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,2899QMAN_CPDMA_SIZE_OFFSET);2900WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,2901QMAN_CPDMA_SRC_OFFSET);2902WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,2903QMAN_CPDMA_DST_OFFSET);2904} else {2905irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?2906mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :2907le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);29082909WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,2910QMAN_LDMA_SIZE_OFFSET);2911WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,2912QMAN_LDMA_SRC_OFFSET);2913WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,2914QMAN_LDMA_DST_OFFSET);29152916/* Configure RAZWI IRQ */2917mme_id = mme_offset /2918(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;29192920mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;2921if (hdev->stop_on_err)2922mme_qm_err_cfg |=2923MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;29242925WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);29262927WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,2928lower_32_bits(CFG_BASE + irq_handler_offset));2929WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,2930upper_32_bits(CFG_BASE + irq_handler_offset));29312932WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,2933gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +2934mme_id);29352936WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,2937QM_ARB_ERR_MSG_EN_MASK);29382939/* Set timeout to maximum */2940WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);29412942WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);2943WREG32(mmMME0_QM_GLBL_PROT + mme_offset,2944QMAN_INTERNAL_MAKE_TRUSTED);2945}29462947WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);2948WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);2949WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);2950WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);2951}29522953static void gaudi_init_mme_qmans(struct hl_device *hdev)2954{2955struct gaudi_device *gaudi = hdev->asic_specific;2956struct gaudi_internal_qman_info *q;2957u64 qman_base_addr;2958u32 mme_offset;2959int i, internal_q_index;29602961if (gaudi->hw_cap_initialized & HW_CAP_MME)2962return;29632964/*2965* map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)2966* and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)2967*/29682969mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;29702971for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {2972internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;2973q = &gaudi->internal_qmans[internal_q_index];2974qman_base_addr = (u64) q->pq_dma_addr;2975gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),2976qman_base_addr);2977if (i == 3)2978mme_offset = 0;2979}29802981/* Initializing lower CP for MME QMANs */2982mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;2983gaudi_init_mme_qman(hdev, mme_offset, 4, 0);2984gaudi_init_mme_qman(hdev, 0, 4, 0);29852986WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);2987WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);29882989gaudi->hw_cap_initialized |= HW_CAP_MME;2990}29912992static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,2993int qman_id, u64 qman_base_addr)2994{2995struct cpu_dyn_regs *dyn_regs =2996&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;2997u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;2998u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;2999u32 tpc_qm_err_cfg, irq_handler_offset;3000u32 q_off, tpc_id;30013002mtr_base_en_lo = lower_32_bits(CFG_BASE +3003mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3004mtr_base_en_hi = upper_32_bits(CFG_BASE +3005mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3006so_base_en_lo = lower_32_bits(CFG_BASE +3007mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);3008so_base_en_hi = upper_32_bits(CFG_BASE +3009mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);3010mtr_base_ws_lo = lower_32_bits(CFG_BASE +3011mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3012mtr_base_ws_hi = upper_32_bits(CFG_BASE +3013mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3014so_base_ws_lo = lower_32_bits(CFG_BASE +3015mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);3016so_base_ws_hi = upper_32_bits(CFG_BASE +3017mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);30183019q_off = tpc_offset + qman_id * 4;30203021tpc_id = tpc_offset /3022(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);30233024if (qman_id < 4) {3025WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,3026lower_32_bits(qman_base_addr));3027WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,3028upper_32_bits(qman_base_addr));30293030WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));3031WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);3032WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);30333034WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,3035QMAN_CPDMA_SIZE_OFFSET);3036WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,3037QMAN_CPDMA_SRC_OFFSET);3038WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,3039QMAN_CPDMA_DST_OFFSET);3040} else {3041irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?3042mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :3043le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);30443045WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,3046QMAN_LDMA_SIZE_OFFSET);3047WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,3048QMAN_LDMA_SRC_OFFSET);3049WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,3050QMAN_LDMA_DST_OFFSET);30513052/* Configure RAZWI IRQ */3053tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;3054if (hdev->stop_on_err)3055tpc_qm_err_cfg |=3056TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;30573058WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);30593060WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,3061lower_32_bits(CFG_BASE + irq_handler_offset));3062WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,3063upper_32_bits(CFG_BASE + irq_handler_offset));30643065WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,3066gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +3067tpc_id);30683069WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,3070QM_ARB_ERR_MSG_EN_MASK);30713072/* Set timeout to maximum */3073WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);30743075WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);3076WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,3077QMAN_INTERNAL_MAKE_TRUSTED);3078}30793080WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);3081WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);3082WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);3083WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);30843085/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */3086if (tpc_id == 6) {3087WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,3088mtr_base_ws_lo);3089WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,3090mtr_base_ws_hi);3091WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,3092so_base_ws_lo);3093WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,3094so_base_ws_hi);3095}3096}30973098static void gaudi_init_tpc_qmans(struct hl_device *hdev)3099{3100struct gaudi_device *gaudi = hdev->asic_specific;3101struct gaudi_internal_qman_info *q;3102u64 qman_base_addr;3103u32 so_base_hi, tpc_offset = 0;3104u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -3105mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;3106int i, tpc_id, internal_q_index;31073108if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)3109return;31103111so_base_hi = upper_32_bits(CFG_BASE +3112mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);31133114for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {3115for (i = 0 ; i < QMAN_STREAMS ; i++) {3116internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +3117tpc_id * QMAN_STREAMS + i;3118q = &gaudi->internal_qmans[internal_q_index];3119qman_base_addr = (u64) q->pq_dma_addr;3120gaudi_init_tpc_qman(hdev, tpc_offset, i,3121qman_base_addr);31223123if (i == 3) {3124/* Initializing lower CP for TPC QMAN */3125gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);31263127/* Enable the QMAN and TPC channel */3128WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,3129QMAN_TPC_ENABLE);3130}3131}31323133WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,3134so_base_hi);31353136tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;31373138gaudi->hw_cap_initialized |=3139FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);3140}3141}31423143static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,3144int qman_id, u64 qman_base_addr, int nic_id)3145{3146struct cpu_dyn_regs *dyn_regs =3147&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;3148u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;3149u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;3150u32 nic_qm_err_cfg, irq_handler_offset;3151u32 q_off;31523153mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +3154mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3155mtr_base_en_hi = upper_32_bits(CFG_BASE +3156mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3157so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +3158mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);3159so_base_en_hi = upper_32_bits(CFG_BASE +3160mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);3161mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +3162mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3163mtr_base_ws_hi = upper_32_bits(CFG_BASE +3164mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);3165so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +3166mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);3167so_base_ws_hi = upper_32_bits(CFG_BASE +3168mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);31693170q_off = nic_offset + qman_id * 4;31713172WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));3173WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));31743175WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));3176WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);3177WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);31783179WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,3180QMAN_LDMA_SIZE_OFFSET);3181WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,3182QMAN_LDMA_SRC_OFFSET);3183WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,3184QMAN_LDMA_DST_OFFSET);31853186WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);3187WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);3188WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);3189WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);31903191/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */3192WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);3193WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);3194WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);3195WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);31963197if (qman_id == 0) {3198irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?3199mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :3200le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);32013202/* Configure RAZWI IRQ */3203nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;3204if (hdev->stop_on_err)3205nic_qm_err_cfg |=3206NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;32073208WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);32093210WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,3211lower_32_bits(CFG_BASE + irq_handler_offset));3212WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,3213upper_32_bits(CFG_BASE + irq_handler_offset));32143215WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,3216gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +3217nic_id);32183219WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,3220QM_ARB_ERR_MSG_EN_MASK);32213222/* Set timeout to maximum */3223WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);32243225WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);3226WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,3227QMAN_INTERNAL_MAKE_TRUSTED);3228}3229}32303231static void gaudi_init_nic_qmans(struct hl_device *hdev)3232{3233struct gaudi_device *gaudi = hdev->asic_specific;3234struct gaudi_internal_qman_info *q;3235u64 qman_base_addr;3236u32 nic_offset = 0;3237u32 nic_delta_between_qmans =3238mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;3239u32 nic_delta_between_nics =3240mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;3241int i, nic_id, internal_q_index;32423243if (!hdev->nic_ports_mask)3244return;32453246if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)3247return;32483249dev_dbg(hdev->dev, "Initializing NIC QMANs\n");32503251for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {3252if (!(hdev->nic_ports_mask & (1 << nic_id))) {3253nic_offset += nic_delta_between_qmans;3254if (nic_id & 1) {3255nic_offset -= (nic_delta_between_qmans * 2);3256nic_offset += nic_delta_between_nics;3257}3258continue;3259}32603261for (i = 0 ; i < QMAN_STREAMS ; i++) {3262internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +3263nic_id * QMAN_STREAMS + i;3264q = &gaudi->internal_qmans[internal_q_index];3265qman_base_addr = (u64) q->pq_dma_addr;3266gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),3267qman_base_addr, nic_id);3268}32693270/* Enable the QMAN */3271WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);32723273nic_offset += nic_delta_between_qmans;3274if (nic_id & 1) {3275nic_offset -= (nic_delta_between_qmans * 2);3276nic_offset += nic_delta_between_nics;3277}32783279gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);3280}3281}32823283static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)3284{3285struct gaudi_device *gaudi = hdev->asic_specific;32863287if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))3288return;32893290WREG32(mmDMA0_QM_GLBL_CFG0, 0);3291WREG32(mmDMA1_QM_GLBL_CFG0, 0);3292WREG32(mmDMA5_QM_GLBL_CFG0, 0);3293}32943295static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)3296{3297struct gaudi_device *gaudi = hdev->asic_specific;32983299if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))3300return;33013302WREG32(mmDMA2_QM_GLBL_CFG0, 0);3303WREG32(mmDMA3_QM_GLBL_CFG0, 0);3304WREG32(mmDMA4_QM_GLBL_CFG0, 0);3305WREG32(mmDMA6_QM_GLBL_CFG0, 0);3306WREG32(mmDMA7_QM_GLBL_CFG0, 0);3307}33083309static void gaudi_disable_mme_qmans(struct hl_device *hdev)3310{3311struct gaudi_device *gaudi = hdev->asic_specific;33123313if (!(gaudi->hw_cap_initialized & HW_CAP_MME))3314return;33153316WREG32(mmMME2_QM_GLBL_CFG0, 0);3317WREG32(mmMME0_QM_GLBL_CFG0, 0);3318}33193320static void gaudi_disable_tpc_qmans(struct hl_device *hdev)3321{3322struct gaudi_device *gaudi = hdev->asic_specific;3323u32 tpc_offset = 0;3324int tpc_id;33253326if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))3327return;33283329for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {3330WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);3331tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;3332}3333}33343335static void gaudi_disable_nic_qmans(struct hl_device *hdev)3336{3337struct gaudi_device *gaudi = hdev->asic_specific;3338u32 nic_mask, nic_offset = 0;3339u32 nic_delta_between_qmans =3340mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;3341u32 nic_delta_between_nics =3342mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;3343int nic_id;33443345for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {3346nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);33473348if (gaudi->hw_cap_initialized & nic_mask)3349WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);33503351nic_offset += nic_delta_between_qmans;3352if (nic_id & 1) {3353nic_offset -= (nic_delta_between_qmans * 2);3354nic_offset += nic_delta_between_nics;3355}3356}3357}33583359static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)3360{3361struct gaudi_device *gaudi = hdev->asic_specific;33623363if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))3364return;33653366/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */3367WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3368WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3369WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3370}33713372static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)3373{3374struct gaudi_device *gaudi = hdev->asic_specific;33753376if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))3377return;33783379/* Stop CPs of HBM DMA QMANs */33803381WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3382WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3383WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3384WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3385WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);3386}33873388static void gaudi_stop_mme_qmans(struct hl_device *hdev)3389{3390struct gaudi_device *gaudi = hdev->asic_specific;33913392if (!(gaudi->hw_cap_initialized & HW_CAP_MME))3393return;33943395/* Stop CPs of MME QMANs */3396WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);3397WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);3398}33993400static void gaudi_stop_tpc_qmans(struct hl_device *hdev)3401{3402struct gaudi_device *gaudi = hdev->asic_specific;34033404if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))3405return;34063407WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3408WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3409WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3410WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3411WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3412WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3413WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3414WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);3415}34163417static void gaudi_stop_nic_qmans(struct hl_device *hdev)3418{3419struct gaudi_device *gaudi = hdev->asic_specific;34203421/* Stop upper CPs of QMANs */34223423if (gaudi->hw_cap_initialized & HW_CAP_NIC0)3424WREG32(mmNIC0_QM0_GLBL_CFG1,3425NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3426NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3427NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34283429if (gaudi->hw_cap_initialized & HW_CAP_NIC1)3430WREG32(mmNIC0_QM1_GLBL_CFG1,3431NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3432NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3433NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34343435if (gaudi->hw_cap_initialized & HW_CAP_NIC2)3436WREG32(mmNIC1_QM0_GLBL_CFG1,3437NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3438NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3439NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34403441if (gaudi->hw_cap_initialized & HW_CAP_NIC3)3442WREG32(mmNIC1_QM1_GLBL_CFG1,3443NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3444NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3445NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34463447if (gaudi->hw_cap_initialized & HW_CAP_NIC4)3448WREG32(mmNIC2_QM0_GLBL_CFG1,3449NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3450NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3451NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34523453if (gaudi->hw_cap_initialized & HW_CAP_NIC5)3454WREG32(mmNIC2_QM1_GLBL_CFG1,3455NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3456NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3457NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34583459if (gaudi->hw_cap_initialized & HW_CAP_NIC6)3460WREG32(mmNIC3_QM0_GLBL_CFG1,3461NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3462NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3463NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34643465if (gaudi->hw_cap_initialized & HW_CAP_NIC7)3466WREG32(mmNIC3_QM1_GLBL_CFG1,3467NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3468NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3469NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34703471if (gaudi->hw_cap_initialized & HW_CAP_NIC8)3472WREG32(mmNIC4_QM0_GLBL_CFG1,3473NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3474NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3475NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);34763477if (gaudi->hw_cap_initialized & HW_CAP_NIC9)3478WREG32(mmNIC4_QM1_GLBL_CFG1,3479NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |3480NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |3481NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);3482}34833484static void gaudi_pci_dma_stall(struct hl_device *hdev)3485{3486struct gaudi_device *gaudi = hdev->asic_specific;34873488if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))3489return;34903491WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3492WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3493WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3494}34953496static void gaudi_hbm_dma_stall(struct hl_device *hdev)3497{3498struct gaudi_device *gaudi = hdev->asic_specific;34993500if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))3501return;35023503WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3504WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3505WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3506WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3507WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);3508}35093510static void gaudi_mme_stall(struct hl_device *hdev)3511{3512struct gaudi_device *gaudi = hdev->asic_specific;35133514if (!(gaudi->hw_cap_initialized & HW_CAP_MME))3515return;35163517/* WA for H3-1800 bug: do ACC and SBAB writes twice */3518WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3519WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3520WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3521WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3522WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3523WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3524WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3525WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3526WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3527WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3528WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3529WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3530WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3531WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);3532WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3533WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);3534}35353536static void gaudi_tpc_stall(struct hl_device *hdev)3537{3538struct gaudi_device *gaudi = hdev->asic_specific;35393540if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))3541return;35423543WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3544WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3545WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3546WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3547WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3548WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3549WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3550WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);3551}35523553static void gaudi_disable_clock_gating(struct hl_device *hdev)3554{3555u32 qman_offset;3556int i;35573558if (hdev->asic_prop.fw_security_enabled)3559return;35603561for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {3562WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);3563WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);35643565qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);3566}35673568WREG32(mmMME0_QM_CGM_CFG, 0);3569WREG32(mmMME0_QM_CGM_CFG1, 0);3570WREG32(mmMME2_QM_CGM_CFG, 0);3571WREG32(mmMME2_QM_CGM_CFG1, 0);35723573for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {3574WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);3575WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);35763577qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);3578}3579}35803581static void gaudi_enable_timestamp(struct hl_device *hdev)3582{3583/* Disable the timestamp counter */3584WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);35853586/* Zero the lower/upper parts of the 64-bit counter */3587WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);3588WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);35893590/* Enable the counter */3591WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);3592}35933594static void gaudi_disable_timestamp(struct hl_device *hdev)3595{3596/* Disable the timestamp counter */3597WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);3598}35993600static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)3601{3602u32 wait_timeout_ms;36033604if (hdev->pldm)3605wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;3606else3607wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;36083609if (fw_reset)3610goto skip_engines;36113612gaudi_stop_nic_qmans(hdev);3613gaudi_stop_mme_qmans(hdev);3614gaudi_stop_tpc_qmans(hdev);3615gaudi_stop_hbm_dma_qmans(hdev);3616gaudi_stop_pci_dma_qmans(hdev);36173618msleep(wait_timeout_ms);36193620gaudi_pci_dma_stall(hdev);3621gaudi_hbm_dma_stall(hdev);3622gaudi_tpc_stall(hdev);3623gaudi_mme_stall(hdev);36243625msleep(wait_timeout_ms);36263627gaudi_disable_nic_qmans(hdev);3628gaudi_disable_mme_qmans(hdev);3629gaudi_disable_tpc_qmans(hdev);3630gaudi_disable_hbm_dma_qmans(hdev);3631gaudi_disable_pci_dma_qmans(hdev);36323633gaudi_disable_timestamp(hdev);36343635skip_engines:3636gaudi_disable_msi(hdev);3637}36383639static int gaudi_mmu_init(struct hl_device *hdev)3640{3641struct asic_fixed_properties *prop = &hdev->asic_prop;3642struct gaudi_device *gaudi = hdev->asic_specific;3643u64 hop0_addr;3644int rc, i;36453646if (gaudi->hw_cap_initialized & HW_CAP_MMU)3647return 0;36483649for (i = 0 ; i < prop->max_asid ; i++) {3650hop0_addr = prop->mmu_pgt_addr +3651(i * prop->dmmu.hop_table_size);36523653rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);3654if (rc) {3655dev_err(hdev->dev,3656"failed to set hop0 addr for asid %d\n", i);3657return rc;3658}3659}36603661/* init MMU cache manage page */3662WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);3663WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);36643665/* mem cache invalidation */3666WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);36673668rc = hl_mmu_invalidate_cache(hdev, true, 0);3669if (rc)3670return rc;36713672WREG32(mmMMU_UP_MMU_ENABLE, 1);3673WREG32(mmMMU_UP_SPI_MASK, 0xF);36743675WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);36763677/*3678* The H/W expects the first PI after init to be 1. After wraparound3679* we'll write 0.3680*/3681gaudi->mmu_cache_inv_pi = 1;36823683gaudi->hw_cap_initialized |= HW_CAP_MMU;36843685return 0;3686}36873688static int gaudi_load_firmware_to_device(struct hl_device *hdev)3689{3690void __iomem *dst;36913692dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;36933694return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);3695}36963697static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)3698{3699void __iomem *dst;37003701dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;37023703return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);3704}37053706static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)3707{3708struct dynamic_fw_load_mgr *dynamic_loader;3709struct cpu_dyn_regs *dyn_regs;37103711dynamic_loader = &hdev->fw_loader.dynamic_loader;37123713/*3714* here we update initial values for few specific dynamic regs (as3715* before reading the first descriptor from FW those value has to be3716* hard-coded) in later stages of the protocol those values will be3717* updated automatically by reading the FW descriptor so data there3718* will always be up-to-date3719*/3720dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;3721dyn_regs->kmd_msg_to_cpu =3722cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);3723dyn_regs->cpu_cmd_status_to_host =3724cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);37253726dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;3727}37283729static void gaudi_init_static_firmware_loader(struct hl_device *hdev)3730{3731struct static_fw_load_mgr *static_loader;37323733static_loader = &hdev->fw_loader.static_loader;37343735static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;3736static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;3737static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;3738static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;3739static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;3740static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;3741static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;3742static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;3743static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;3744static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;3745static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;3746static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));3747static_loader->cpu_reset_wait_msec = hdev->pldm ?3748GAUDI_PLDM_RESET_WAIT_MSEC :3749GAUDI_CPU_RESET_WAIT_MSEC;3750}37513752static void gaudi_init_firmware_preload_params(struct hl_device *hdev)3753{3754struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;37553756pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;3757pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;3758pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;3759pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;3760pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;3761pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;3762}37633764static void gaudi_init_firmware_loader(struct hl_device *hdev)3765{3766struct asic_fixed_properties *prop = &hdev->asic_prop;3767struct fw_load_mgr *fw_loader = &hdev->fw_loader;37683769/* fill common fields */3770fw_loader->fw_comp_loaded = FW_TYPE_NONE;3771fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;3772fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;3773fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;3774fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;3775fw_loader->skip_bmc = !hdev->bmc_enable;3776fw_loader->sram_bar_id = SRAM_BAR_ID;3777fw_loader->dram_bar_id = HBM_BAR_ID;37783779if (prop->dynamic_fw_load)3780gaudi_init_dynamic_firmware_loader(hdev);3781else3782gaudi_init_static_firmware_loader(hdev);3783}37843785static int gaudi_init_cpu(struct hl_device *hdev)3786{3787struct gaudi_device *gaudi = hdev->asic_specific;3788int rc;37893790if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))3791return 0;37923793if (gaudi->hw_cap_initialized & HW_CAP_CPU)3794return 0;37953796/*3797* The device CPU works with 40 bits addresses.3798* This register sets the extension to 50 bits.3799*/3800if (!hdev->asic_prop.fw_security_enabled)3801WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);38023803rc = hl_fw_init_cpu(hdev);38043805if (rc)3806return rc;38073808gaudi->hw_cap_initialized |= HW_CAP_CPU;38093810return 0;3811}38123813static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)3814{3815struct cpu_dyn_regs *dyn_regs =3816&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;3817struct asic_fixed_properties *prop = &hdev->asic_prop;3818struct gaudi_device *gaudi = hdev->asic_specific;3819u32 status, irq_handler_offset;3820struct hl_eq *eq;3821struct hl_hw_queue *cpu_pq =3822&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];3823int err;38243825if (!hdev->cpu_queues_enable)3826return 0;38273828if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)3829return 0;38303831eq = &hdev->event_queue;38323833WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));3834WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));38353836WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));3837WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));38383839WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,3840lower_32_bits(hdev->cpu_accessible_dma_address));3841WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,3842upper_32_bits(hdev->cpu_accessible_dma_address));38433844WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);3845WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);3846WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);38473848/* Used for EQ CI */3849WREG32(mmCPU_IF_EQ_RD_OFFS, 0);38503851WREG32(mmCPU_IF_PF_PQ_PI, 0);38523853WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);38543855irq_handler_offset = prop->gic_interrupts_enable ?3856mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :3857le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);38583859WREG32(irq_handler_offset,3860gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);38613862err = hl_poll_timeout(3863hdev,3864mmCPU_IF_QUEUE_INIT,3865status,3866(status == PQ_INIT_STATUS_READY_FOR_HOST),38671000,3868cpu_timeout);38693870if (err) {3871dev_err(hdev->dev,3872"Failed to communicate with Device CPU (CPU-CP timeout)\n");3873return -EIO;3874}38753876/* update FW application security bits */3877if (prop->fw_cpu_boot_dev_sts0_valid)3878prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);3879if (prop->fw_cpu_boot_dev_sts1_valid)3880prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);38813882gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;3883return 0;3884}38853886static void gaudi_pre_hw_init(struct hl_device *hdev)3887{3888/* Perform read from the device to make sure device is up */3889RREG32(mmHW_STATE);38903891if (!hdev->asic_prop.fw_security_enabled) {3892/* Set the access through PCI bars (Linux driver only) as3893* secured3894*/3895WREG32(mmPCIE_WRAP_LBW_PROT_OVR,3896(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |3897PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));38983899/* Perform read to flush the waiting writes to ensure3900* configuration was set in the device3901*/3902RREG32(mmPCIE_WRAP_LBW_PROT_OVR);3903}39043905/*3906* Let's mark in the H/W that we have reached this point. We check3907* this value in the reset_before_init function to understand whether3908* we need to reset the chip before doing H/W init. This register is3909* cleared by the H/W upon H/W reset3910*/3911WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);3912}39133914static int gaudi_hw_init(struct hl_device *hdev)3915{3916struct gaudi_device *gaudi = hdev->asic_specific;3917int rc;39183919gaudi_pre_hw_init(hdev);39203921/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.3922* So we set it here and if anyone tries to move it later to3923* a different address, there will be an error3924*/3925if (hdev->asic_prop.iatu_done_by_fw)3926gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;39273928/*3929* Before pushing u-boot/linux to device, need to set the hbm bar to3930* base address of dram3931*/3932if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {3933dev_err(hdev->dev,3934"failed to map HBM bar to DRAM base address\n");3935return -EIO;3936}39373938rc = gaudi_init_cpu(hdev);3939if (rc) {3940dev_err(hdev->dev, "failed to initialize CPU\n");3941return rc;3942}39433944/* In case the clock gating was enabled in preboot we need to disable3945* it here before touching the MME/TPC registers.3946*/3947gaudi_disable_clock_gating(hdev);39483949/* SRAM scrambler must be initialized after CPU is running from HBM */3950gaudi_init_scrambler_sram(hdev);39513952/* This is here just in case we are working without CPU */3953gaudi_init_scrambler_hbm(hdev);39543955gaudi_init_golden_registers(hdev);39563957rc = gaudi_mmu_init(hdev);3958if (rc)3959return rc;39603961gaudi_init_security(hdev);39623963gaudi_init_pci_dma_qmans(hdev);39643965gaudi_init_hbm_dma_qmans(hdev);39663967gaudi_init_mme_qmans(hdev);39683969gaudi_init_tpc_qmans(hdev);39703971gaudi_init_nic_qmans(hdev);39723973gaudi_enable_timestamp(hdev);39743975/* MSI must be enabled before CPU queues and NIC are initialized */3976rc = gaudi_enable_msi(hdev);3977if (rc)3978goto disable_queues;39793980/* must be called after MSI was enabled */3981rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);3982if (rc) {3983dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",3984rc);3985goto disable_msi;3986}39873988/* Perform read from the device to flush all configuration */3989RREG32(mmHW_STATE);39903991return 0;39923993disable_msi:3994gaudi_disable_msi(hdev);3995disable_queues:3996gaudi_disable_mme_qmans(hdev);3997gaudi_disable_pci_dma_qmans(hdev);39983999return rc;4000}40014002static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)4003{4004struct cpu_dyn_regs *dyn_regs =4005&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;4006u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;4007struct gaudi_device *gaudi = hdev->asic_specific;4008bool driver_performs_reset;40094010if (!hard_reset) {4011dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");4012return 0;4013}40144015if (hdev->pldm) {4016reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;4017cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;4018} else {4019reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;4020cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;4021}40224023if (fw_reset) {4024dev_dbg(hdev->dev,4025"Firmware performs HARD reset, going to wait %dms\n",4026reset_timeout_ms);40274028goto skip_reset;4029}40304031driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&4032!hdev->asic_prop.hard_reset_done_by_fw);40334034/* Set device to handle FLR by H/W as we will put the device CPU to4035* halt mode4036*/4037if (driver_performs_reset)4038WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |4039PCIE_AUX_FLR_CTRL_INT_MASK_MASK));40404041/* If linux is loaded in the device CPU we need to communicate with it4042* via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU4043* registers in case of old F/Ws4044*/4045if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {4046irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?4047mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :4048le32_to_cpu(dyn_regs->gic_host_halt_irq);40494050WREG32(irq_handler_offset,4051gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);40524053/* This is a hail-mary attempt to revive the card in the small chance that the4054* f/w has experienced a watchdog event, which caused it to return back to preboot.4055* In that case, triggering reset through GIC won't help. We need to trigger the4056* reset as if Linux wasn't loaded.4057*4058* We do it only if the reset cause was HB, because that would be the indication4059* of such an event.4060*4061* In case watchdog hasn't expired but we still got HB, then this won't do any4062* damage.4063*/4064if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {4065if (hdev->asic_prop.hard_reset_done_by_fw)4066hl_fw_ask_hard_reset_without_linux(hdev);4067else4068hl_fw_ask_halt_machine_without_linux(hdev);4069}4070} else {4071if (hdev->asic_prop.hard_reset_done_by_fw)4072hl_fw_ask_hard_reset_without_linux(hdev);4073else4074hl_fw_ask_halt_machine_without_linux(hdev);4075}40764077if (driver_performs_reset) {40784079/* Configure the reset registers. Must be done as early as4080* possible in case we fail during H/W initialization4081*/4082WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,4083(CFG_RST_H_DMA_MASK |4084CFG_RST_H_MME_MASK |4085CFG_RST_H_SM_MASK |4086CFG_RST_H_TPC_7_MASK));40874088WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);40894090WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,4091(CFG_RST_H_HBM_MASK |4092CFG_RST_H_TPC_7_MASK |4093CFG_RST_H_NIC_MASK |4094CFG_RST_H_SM_MASK |4095CFG_RST_H_DMA_MASK |4096CFG_RST_H_MME_MASK |4097CFG_RST_H_CPU_MASK |4098CFG_RST_H_MMU_MASK));40994100WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,4101(CFG_RST_L_IF_MASK |4102CFG_RST_L_PSOC_MASK |4103CFG_RST_L_TPC_MASK));41044105msleep(cpu_timeout_ms);41064107/* Tell ASIC not to re-initialize PCIe */4108WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);41094110/* Restart BTL/BLR upon hard-reset */4111WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);41124113WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,41141 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);41154116dev_dbg(hdev->dev,4117"Issued HARD reset command, going to wait %dms\n",4118reset_timeout_ms);4119} else {4120dev_dbg(hdev->dev,4121"Firmware performs HARD reset, going to wait %dms\n",4122reset_timeout_ms);4123}41244125skip_reset:4126/*4127* After hard reset, we can't poll the BTM_FSM register because the PSOC4128* itself is in reset. Need to wait until the reset is deasserted4129*/4130msleep(reset_timeout_ms);41314132status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);4133if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {4134dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);4135return -ETIMEDOUT;4136}41374138if (gaudi) {4139gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |4140HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |4141HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |4142HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |4143HW_CAP_HBM_SCRAMBLER);41444145memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));41464147hdev->device_cpu_is_halted = false;4148}4149return 0;4150}41514152static int gaudi_suspend(struct hl_device *hdev)4153{4154return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);4155}41564157static int gaudi_resume(struct hl_device *hdev)4158{4159return gaudi_init_iatu(hdev);4160}41614162static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,4163void *cpu_addr, dma_addr_t dma_addr, size_t size)4164{4165int rc;41664167vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |4168VM_DONTCOPY | VM_NORESERVE);41694170rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,4171(dma_addr - HOST_PHYS_BASE), size);4172if (rc)4173dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);41744175return rc;4176}41774178static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)4179{4180struct cpu_dyn_regs *dyn_regs =4181&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;4182u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;4183struct gaudi_device *gaudi = hdev->asic_specific;4184bool invalid_queue = false;4185int dma_id;41864187switch (hw_queue_id) {4188case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:4189dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];4190dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4191q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;4192db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4193break;41944195case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:4196dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];4197dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4198q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;4199db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4200break;42014202case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:4203dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];4204dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4205q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;4206db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4207break;42084209case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:4210dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];4211dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4212q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;4213db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4214break;42154216case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:4217dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];4218dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4219q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;4220db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4221break;42224223case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:4224dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];4225dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4226q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;4227db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4228break;42294230case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:4231dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];4232dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4233q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;4234db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4235break;42364237case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:4238dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];4239dma_qm_offset = dma_id * DMA_QMAN_OFFSET;4240q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;4241db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;4242break;42434244case GAUDI_QUEUE_ID_CPU_PQ:4245if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)4246db_reg_offset = mmCPU_IF_PF_PQ_PI;4247else4248invalid_queue = true;4249break;42504251case GAUDI_QUEUE_ID_MME_0_0:4252db_reg_offset = mmMME2_QM_PQ_PI_0;4253break;42544255case GAUDI_QUEUE_ID_MME_0_1:4256db_reg_offset = mmMME2_QM_PQ_PI_1;4257break;42584259case GAUDI_QUEUE_ID_MME_0_2:4260db_reg_offset = mmMME2_QM_PQ_PI_2;4261break;42624263case GAUDI_QUEUE_ID_MME_0_3:4264db_reg_offset = mmMME2_QM_PQ_PI_3;4265break;42664267case GAUDI_QUEUE_ID_MME_1_0:4268db_reg_offset = mmMME0_QM_PQ_PI_0;4269break;42704271case GAUDI_QUEUE_ID_MME_1_1:4272db_reg_offset = mmMME0_QM_PQ_PI_1;4273break;42744275case GAUDI_QUEUE_ID_MME_1_2:4276db_reg_offset = mmMME0_QM_PQ_PI_2;4277break;42784279case GAUDI_QUEUE_ID_MME_1_3:4280db_reg_offset = mmMME0_QM_PQ_PI_3;4281break;42824283case GAUDI_QUEUE_ID_TPC_0_0:4284db_reg_offset = mmTPC0_QM_PQ_PI_0;4285break;42864287case GAUDI_QUEUE_ID_TPC_0_1:4288db_reg_offset = mmTPC0_QM_PQ_PI_1;4289break;42904291case GAUDI_QUEUE_ID_TPC_0_2:4292db_reg_offset = mmTPC0_QM_PQ_PI_2;4293break;42944295case GAUDI_QUEUE_ID_TPC_0_3:4296db_reg_offset = mmTPC0_QM_PQ_PI_3;4297break;42984299case GAUDI_QUEUE_ID_TPC_1_0:4300db_reg_offset = mmTPC1_QM_PQ_PI_0;4301break;43024303case GAUDI_QUEUE_ID_TPC_1_1:4304db_reg_offset = mmTPC1_QM_PQ_PI_1;4305break;43064307case GAUDI_QUEUE_ID_TPC_1_2:4308db_reg_offset = mmTPC1_QM_PQ_PI_2;4309break;43104311case GAUDI_QUEUE_ID_TPC_1_3:4312db_reg_offset = mmTPC1_QM_PQ_PI_3;4313break;43144315case GAUDI_QUEUE_ID_TPC_2_0:4316db_reg_offset = mmTPC2_QM_PQ_PI_0;4317break;43184319case GAUDI_QUEUE_ID_TPC_2_1:4320db_reg_offset = mmTPC2_QM_PQ_PI_1;4321break;43224323case GAUDI_QUEUE_ID_TPC_2_2:4324db_reg_offset = mmTPC2_QM_PQ_PI_2;4325break;43264327case GAUDI_QUEUE_ID_TPC_2_3:4328db_reg_offset = mmTPC2_QM_PQ_PI_3;4329break;43304331case GAUDI_QUEUE_ID_TPC_3_0:4332db_reg_offset = mmTPC3_QM_PQ_PI_0;4333break;43344335case GAUDI_QUEUE_ID_TPC_3_1:4336db_reg_offset = mmTPC3_QM_PQ_PI_1;4337break;43384339case GAUDI_QUEUE_ID_TPC_3_2:4340db_reg_offset = mmTPC3_QM_PQ_PI_2;4341break;43424343case GAUDI_QUEUE_ID_TPC_3_3:4344db_reg_offset = mmTPC3_QM_PQ_PI_3;4345break;43464347case GAUDI_QUEUE_ID_TPC_4_0:4348db_reg_offset = mmTPC4_QM_PQ_PI_0;4349break;43504351case GAUDI_QUEUE_ID_TPC_4_1:4352db_reg_offset = mmTPC4_QM_PQ_PI_1;4353break;43544355case GAUDI_QUEUE_ID_TPC_4_2:4356db_reg_offset = mmTPC4_QM_PQ_PI_2;4357break;43584359case GAUDI_QUEUE_ID_TPC_4_3:4360db_reg_offset = mmTPC4_QM_PQ_PI_3;4361break;43624363case GAUDI_QUEUE_ID_TPC_5_0:4364db_reg_offset = mmTPC5_QM_PQ_PI_0;4365break;43664367case GAUDI_QUEUE_ID_TPC_5_1:4368db_reg_offset = mmTPC5_QM_PQ_PI_1;4369break;43704371case GAUDI_QUEUE_ID_TPC_5_2:4372db_reg_offset = mmTPC5_QM_PQ_PI_2;4373break;43744375case GAUDI_QUEUE_ID_TPC_5_3:4376db_reg_offset = mmTPC5_QM_PQ_PI_3;4377break;43784379case GAUDI_QUEUE_ID_TPC_6_0:4380db_reg_offset = mmTPC6_QM_PQ_PI_0;4381break;43824383case GAUDI_QUEUE_ID_TPC_6_1:4384db_reg_offset = mmTPC6_QM_PQ_PI_1;4385break;43864387case GAUDI_QUEUE_ID_TPC_6_2:4388db_reg_offset = mmTPC6_QM_PQ_PI_2;4389break;43904391case GAUDI_QUEUE_ID_TPC_6_3:4392db_reg_offset = mmTPC6_QM_PQ_PI_3;4393break;43944395case GAUDI_QUEUE_ID_TPC_7_0:4396db_reg_offset = mmTPC7_QM_PQ_PI_0;4397break;43984399case GAUDI_QUEUE_ID_TPC_7_1:4400db_reg_offset = mmTPC7_QM_PQ_PI_1;4401break;44024403case GAUDI_QUEUE_ID_TPC_7_2:4404db_reg_offset = mmTPC7_QM_PQ_PI_2;4405break;44064407case GAUDI_QUEUE_ID_TPC_7_3:4408db_reg_offset = mmTPC7_QM_PQ_PI_3;4409break;44104411case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:4412if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))4413invalid_queue = true;44144415q_off = ((hw_queue_id - 1) & 0x3) * 4;4416db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;4417break;44184419case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:4420if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))4421invalid_queue = true;44224423q_off = ((hw_queue_id - 1) & 0x3) * 4;4424db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;4425break;44264427case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:4428if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))4429invalid_queue = true;44304431q_off = ((hw_queue_id - 1) & 0x3) * 4;4432db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;4433break;44344435case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:4436if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))4437invalid_queue = true;44384439q_off = ((hw_queue_id - 1) & 0x3) * 4;4440db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;4441break;44424443case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:4444if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))4445invalid_queue = true;44464447q_off = ((hw_queue_id - 1) & 0x3) * 4;4448db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;4449break;44504451case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:4452if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))4453invalid_queue = true;44544455q_off = ((hw_queue_id - 1) & 0x3) * 4;4456db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;4457break;44584459case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:4460if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))4461invalid_queue = true;44624463q_off = ((hw_queue_id - 1) & 0x3) * 4;4464db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;4465break;44664467case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:4468if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))4469invalid_queue = true;44704471q_off = ((hw_queue_id - 1) & 0x3) * 4;4472db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;4473break;44744475case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:4476if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))4477invalid_queue = true;44784479q_off = ((hw_queue_id - 1) & 0x3) * 4;4480db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;4481break;44824483case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:4484if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))4485invalid_queue = true;44864487q_off = ((hw_queue_id - 1) & 0x3) * 4;4488db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;4489break;44904491default:4492invalid_queue = true;4493}44944495if (invalid_queue) {4496/* Should never get here */4497dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",4498hw_queue_id);4499return;4500}45014502db_value = pi;45034504/* ring the doorbell */4505WREG32(db_reg_offset, db_value);45064507if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {4508/* make sure device CPU will read latest data from host */4509mb();45104511irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?4512mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :4513le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);45144515WREG32(irq_handler_offset,4516gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);4517}4518}45194520static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,4521struct hl_bd *bd)4522{4523__le64 *pbd = (__le64 *) bd;45244525/* The QMANs are on the host memory so a simple copy suffice */4526pqe[0] = pbd[0];4527pqe[1] = pbd[1];4528}45294530static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,4531dma_addr_t *dma_handle, gfp_t flags)4532{4533void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,4534dma_handle, flags);45354536/* Shift to the device's base physical address of host memory */4537if (kernel_addr)4538*dma_handle += HOST_PHYS_BASE;45394540return kernel_addr;4541}45424543static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,4544void *cpu_addr, dma_addr_t dma_handle)4545{4546/* Cancel the device's base physical address of host memory */4547dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;45484549dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);4550}45514552static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)4553{4554struct asic_fixed_properties *prop = &hdev->asic_prop;4555u64 cur_addr = prop->dram_user_base_address;4556u32 chunk_size, busy;4557int rc, dma_id;45584559while (cur_addr < prop->dram_end_address) {4560for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {4561u32 dma_offset = dma_id * DMA_CORE_OFFSET;45624563chunk_size =4564min((u64)SZ_2G, prop->dram_end_address - cur_addr);45654566dev_dbg(hdev->dev,4567"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",4568cur_addr, cur_addr + chunk_size);45694570WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,4571lower_32_bits(val));4572WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,4573upper_32_bits(val));4574WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,4575lower_32_bits(cur_addr));4576WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,4577upper_32_bits(cur_addr));4578WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,4579chunk_size);4580WREG32(mmDMA0_CORE_COMMIT + dma_offset,4581((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |4582(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));45834584cur_addr += chunk_size;45854586if (cur_addr == prop->dram_end_address)4587break;4588}45894590for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {4591u32 dma_offset = dma_id * DMA_CORE_OFFSET;45924593rc = hl_poll_timeout(4594hdev,4595mmDMA0_CORE_STS0 + dma_offset,4596busy,4597((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),45981000,4599HBM_SCRUBBING_TIMEOUT_US);46004601if (rc) {4602dev_err(hdev->dev,4603"DMA Timeout during HBM scrubbing of DMA #%d\n",4604dma_id);4605return -EIO;4606}4607}4608}46094610return 0;4611}46124613static int gaudi_scrub_device_mem(struct hl_device *hdev)4614{4615struct asic_fixed_properties *prop = &hdev->asic_prop;4616u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;4617u64 addr, size, val = hdev->memory_scrub_val;4618ktime_t timeout;4619int rc = 0;46204621if (!hdev->memory_scrub)4622return 0;46234624timeout = ktime_add_us(ktime_get(), wait_to_idle_time);4625while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {4626if (ktime_compare(ktime_get(), timeout) > 0) {4627dev_err(hdev->dev, "waiting for idle timeout\n");4628return -ETIMEDOUT;4629}4630usleep_range((1000 >> 2) + 1, 1000);4631}46324633/* Scrub SRAM */4634addr = prop->sram_user_base_address;4635size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;46364637dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",4638addr, addr + size, val);4639rc = gaudi_memset_device_memory(hdev, addr, size, val);4640if (rc) {4641dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);4642return rc;4643}46444645/* Scrub HBM using all DMA channels in parallel */4646rc = gaudi_scrub_device_dram(hdev, val);4647if (rc) {4648dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);4649return rc;4650}46514652return 0;4653}46544655static void *gaudi_get_int_queue_base(struct hl_device *hdev,4656u32 queue_id, dma_addr_t *dma_handle,4657u16 *queue_len)4658{4659struct gaudi_device *gaudi = hdev->asic_specific;4660struct gaudi_internal_qman_info *q;46614662if (queue_id >= GAUDI_QUEUE_ID_SIZE ||4663gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {4664dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);4665return NULL;4666}46674668q = &gaudi->internal_qmans[queue_id];4669*dma_handle = q->pq_dma_addr;4670*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;46714672return q->pq_kernel_addr;4673}46744675static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,4676u16 len, u32 timeout, u64 *result)4677{4678struct gaudi_device *gaudi = hdev->asic_specific;46794680if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {4681if (result)4682*result = 0;4683return 0;4684}46854686if (!timeout)4687timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;46884689return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,4690timeout, result);4691}46924693static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)4694{4695struct packet_msg_prot *fence_pkt;4696dma_addr_t pkt_dma_addr;4697u32 fence_val, tmp, timeout_usec;4698dma_addr_t fence_dma_addr;4699u32 *fence_ptr;4700int rc;47014702if (hdev->pldm)4703timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;4704else4705timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;47064707fence_val = GAUDI_QMAN0_FENCE_VAL;47084709fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);4710if (!fence_ptr) {4711dev_err(hdev->dev,4712"Failed to allocate memory for H/W queue %d testing\n",4713hw_queue_id);4714return -ENOMEM;4715}47164717*fence_ptr = 0;47184719fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,4720&pkt_dma_addr);4721if (!fence_pkt) {4722dev_err(hdev->dev,4723"Failed to allocate packet for H/W queue %d testing\n",4724hw_queue_id);4725rc = -ENOMEM;4726goto free_fence_ptr;4727}47284729tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);4730tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);4731tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);47324733fence_pkt->ctl = cpu_to_le32(tmp);4734fence_pkt->value = cpu_to_le32(fence_val);4735fence_pkt->addr = cpu_to_le64(fence_dma_addr);47364737rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,4738sizeof(struct packet_msg_prot),4739pkt_dma_addr);4740if (rc) {4741dev_err(hdev->dev,4742"Failed to send fence packet to H/W queue %d\n",4743hw_queue_id);4744goto free_pkt;4745}47464747rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),47481000, timeout_usec, true);47494750hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);47514752if (rc == -ETIMEDOUT) {4753dev_err(hdev->dev,4754"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",4755hw_queue_id, (unsigned long long) fence_dma_addr, tmp);4756rc = -EIO;4757}47584759free_pkt:4760hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);4761free_fence_ptr:4762hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);4763return rc;4764}47654766static int gaudi_test_cpu_queue(struct hl_device *hdev)4767{4768struct gaudi_device *gaudi = hdev->asic_specific;47694770/*4771* check capability here as send_cpu_message() won't update the result4772* value if no capability4773*/4774if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))4775return 0;47764777return hl_fw_test_cpu_queue(hdev);4778}47794780static int gaudi_test_queues(struct hl_device *hdev)4781{4782int i, rc, ret_val = 0;47834784for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {4785if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {4786rc = gaudi_test_queue(hdev, i);4787if (rc)4788ret_val = -EINVAL;4789}4790}47914792rc = gaudi_test_cpu_queue(hdev);4793if (rc)4794ret_val = -EINVAL;47954796return ret_val;4797}47984799static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,4800gfp_t mem_flags, dma_addr_t *dma_handle)4801{4802void *kernel_addr;48034804if (size > GAUDI_DMA_POOL_BLK_SIZE)4805return NULL;48064807kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);48084809/* Shift to the device's base physical address of host memory */4810if (kernel_addr)4811*dma_handle += HOST_PHYS_BASE;48124813return kernel_addr;4814}48154816static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,4817dma_addr_t dma_addr)4818{4819/* Cancel the device's base physical address of host memory */4820dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;48214822dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);4823}48244825static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,4826size_t size, dma_addr_t *dma_handle)4827{4828return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);4829}48304831static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,4832size_t size, void *vaddr)4833{4834hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);4835}48364837static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)4838{4839struct scatterlist *sg, *sg_next_iter;4840u32 count, dma_desc_cnt;4841u64 len, len_next;4842dma_addr_t addr, addr_next;48434844dma_desc_cnt = 0;48454846for_each_sgtable_dma_sg(sgt, sg, count) {4847len = sg_dma_len(sg);4848addr = sg_dma_address(sg);48494850if (len == 0)4851break;48524853while ((count + 1) < sgt->nents) {4854sg_next_iter = sg_next(sg);4855len_next = sg_dma_len(sg_next_iter);4856addr_next = sg_dma_address(sg_next_iter);48574858if (len_next == 0)4859break;48604861if ((addr + len == addr_next) &&4862(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {4863len += len_next;4864count++;4865sg = sg_next_iter;4866} else {4867break;4868}4869}48704871dma_desc_cnt++;4872}48734874return dma_desc_cnt * sizeof(struct packet_lin_dma);4875}48764877static int gaudi_pin_memory_before_cs(struct hl_device *hdev,4878struct hl_cs_parser *parser,4879struct packet_lin_dma *user_dma_pkt,4880u64 addr, enum dma_data_direction dir)4881{4882struct hl_userptr *userptr;4883int rc;48844885if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),4886parser->job_userptr_list, &userptr))4887goto already_pinned;48884889userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);4890if (!userptr)4891return -ENOMEM;48924893rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),4894userptr);4895if (rc)4896goto free_userptr;48974898list_add_tail(&userptr->job_node, parser->job_userptr_list);48994900rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);4901if (rc) {4902dev_err(hdev->dev, "failed to map sgt with DMA region\n");4903goto unpin_memory;4904}49054906userptr->dma_mapped = true;4907userptr->dir = dir;49084909already_pinned:4910parser->patched_cb_size +=4911gaudi_get_dma_desc_list_size(hdev, userptr->sgt);49124913return 0;49144915unpin_memory:4916list_del(&userptr->job_node);4917hl_unpin_host_memory(hdev, userptr);4918free_userptr:4919kfree(userptr);4920return rc;4921}49224923static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,4924struct hl_cs_parser *parser,4925struct packet_lin_dma *user_dma_pkt,4926bool src_in_host)4927{4928enum dma_data_direction dir;4929bool skip_host_mem_pin = false, user_memset;4930u64 addr;4931int rc = 0;49324933user_memset = (le32_to_cpu(user_dma_pkt->ctl) &4934GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>4935GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;49364937if (src_in_host) {4938if (user_memset)4939skip_host_mem_pin = true;49404941dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");4942dir = DMA_TO_DEVICE;4943addr = le64_to_cpu(user_dma_pkt->src_addr);4944} else {4945dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");4946dir = DMA_FROM_DEVICE;4947addr = (le64_to_cpu(user_dma_pkt->dst_addr) &4948GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>4949GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;4950}49514952if (skip_host_mem_pin)4953parser->patched_cb_size += sizeof(*user_dma_pkt);4954else4955rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,4956addr, dir);49574958return rc;4959}49604961static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,4962struct hl_cs_parser *parser,4963struct packet_lin_dma *user_dma_pkt)4964{4965bool src_in_host = false;4966u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &4967GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>4968GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;49694970dev_dbg(hdev->dev, "DMA packet details:\n");4971dev_dbg(hdev->dev, "source == 0x%llx\n",4972le64_to_cpu(user_dma_pkt->src_addr));4973dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);4974dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));49754976/*4977* Special handling for DMA with size 0. Bypass all validations4978* because no transactions will be done except for WR_COMP, which4979* is not a security issue4980*/4981if (!le32_to_cpu(user_dma_pkt->tsize)) {4982parser->patched_cb_size += sizeof(*user_dma_pkt);4983return 0;4984}49854986if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)4987src_in_host = true;49884989return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,4990src_in_host);4991}49924993static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,4994struct hl_cs_parser *parser,4995struct packet_load_and_exe *user_pkt)4996{4997u32 cfg;49984999cfg = le32_to_cpu(user_pkt->cfg);50005001if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {5002dev_err(hdev->dev,5003"User not allowed to use Load and Execute\n");5004return -EPERM;5005}50065007parser->patched_cb_size += sizeof(struct packet_load_and_exe);50085009return 0;5010}50115012static int gaudi_validate_cb(struct hl_device *hdev,5013struct hl_cs_parser *parser, bool is_mmu)5014{5015u32 cb_parsed_length = 0;5016int rc = 0;50175018parser->patched_cb_size = 0;50195020/* cb_user_size is more than 0 so loop will always be executed */5021while (cb_parsed_length < parser->user_cb_size) {5022enum packet_id pkt_id;5023u16 pkt_size;5024struct gaudi_packet *user_pkt;50255026user_pkt = parser->user_cb->kernel_address + cb_parsed_length;50275028pkt_id = (enum packet_id) (5029(le64_to_cpu(user_pkt->header) &5030PACKET_HEADER_PACKET_ID_MASK) >>5031PACKET_HEADER_PACKET_ID_SHIFT);50325033if (!validate_packet_id(pkt_id)) {5034dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);5035rc = -EINVAL;5036break;5037}50385039pkt_size = gaudi_packet_sizes[pkt_id];5040cb_parsed_length += pkt_size;5041if (cb_parsed_length > parser->user_cb_size) {5042dev_err(hdev->dev,5043"packet 0x%x is out of CB boundary\n", pkt_id);5044rc = -EINVAL;5045break;5046}50475048switch (pkt_id) {5049case PACKET_MSG_PROT:5050dev_err(hdev->dev,5051"User not allowed to use MSG_PROT\n");5052rc = -EPERM;5053break;50545055case PACKET_CP_DMA:5056dev_err(hdev->dev, "User not allowed to use CP_DMA\n");5057rc = -EPERM;5058break;50595060case PACKET_STOP:5061dev_err(hdev->dev, "User not allowed to use STOP\n");5062rc = -EPERM;5063break;50645065case PACKET_WREG_BULK:5066dev_err(hdev->dev,5067"User not allowed to use WREG_BULK\n");5068rc = -EPERM;5069break;50705071case PACKET_LOAD_AND_EXE:5072rc = gaudi_validate_load_and_exe_pkt(hdev, parser,5073(struct packet_load_and_exe *) user_pkt);5074break;50755076case PACKET_LIN_DMA:5077parser->contains_dma_pkt = true;5078if (is_mmu)5079parser->patched_cb_size += pkt_size;5080else5081rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,5082(struct packet_lin_dma *) user_pkt);5083break;50845085case PACKET_WREG_32:5086case PACKET_MSG_LONG:5087case PACKET_MSG_SHORT:5088case PACKET_REPEAT:5089case PACKET_FENCE:5090case PACKET_NOP:5091case PACKET_ARB_POINT:5092parser->patched_cb_size += pkt_size;5093break;50945095default:5096dev_err(hdev->dev, "Invalid packet header 0x%x\n",5097pkt_id);5098rc = -EINVAL;5099break;5100}51015102if (rc)5103break;5104}51055106/*5107* The new CB should have space at the end for two MSG_PROT packets:5108* 1. Optional NOP padding for cacheline alignment5109* 2. A packet that will act as a completion packet5110* 3. A packet that will generate MSI interrupt5111*/5112if (parser->completion)5113parser->patched_cb_size += gaudi_get_patched_cb_extra_size(5114parser->patched_cb_size);51155116return rc;5117}51185119static int gaudi_patch_dma_packet(struct hl_device *hdev,5120struct hl_cs_parser *parser,5121struct packet_lin_dma *user_dma_pkt,5122struct packet_lin_dma *new_dma_pkt,5123u32 *new_dma_pkt_size)5124{5125struct hl_userptr *userptr;5126struct scatterlist *sg, *sg_next_iter;5127u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;5128u64 len, len_next;5129dma_addr_t dma_addr, dma_addr_next;5130u64 device_memory_addr, addr;5131enum dma_data_direction dir;5132struct sg_table *sgt;5133bool src_in_host = false;5134bool skip_host_mem_pin = false;5135bool user_memset;51365137ctl = le32_to_cpu(user_dma_pkt->ctl);51385139if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)5140src_in_host = true;51415142user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>5143GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;51445145if (src_in_host) {5146addr = le64_to_cpu(user_dma_pkt->src_addr);5147device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);5148dir = DMA_TO_DEVICE;5149if (user_memset)5150skip_host_mem_pin = true;5151} else {5152addr = le64_to_cpu(user_dma_pkt->dst_addr);5153device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);5154dir = DMA_FROM_DEVICE;5155}51565157if ((!skip_host_mem_pin) &&5158(!hl_userptr_is_pinned(hdev, addr,5159le32_to_cpu(user_dma_pkt->tsize),5160parser->job_userptr_list, &userptr))) {5161dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",5162addr, user_dma_pkt->tsize);5163return -EFAULT;5164}51655166if ((user_memset) && (dir == DMA_TO_DEVICE)) {5167memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));5168*new_dma_pkt_size = sizeof(*user_dma_pkt);5169return 0;5170}51715172user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;51735174sgt = userptr->sgt;5175dma_desc_cnt = 0;51765177for_each_sgtable_dma_sg(sgt, sg, count) {5178len = sg_dma_len(sg);5179dma_addr = sg_dma_address(sg);51805181if (len == 0)5182break;51835184while ((count + 1) < sgt->nents) {5185sg_next_iter = sg_next(sg);5186len_next = sg_dma_len(sg_next_iter);5187dma_addr_next = sg_dma_address(sg_next_iter);51885189if (len_next == 0)5190break;51915192if ((dma_addr + len == dma_addr_next) &&5193(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {5194len += len_next;5195count++;5196sg = sg_next_iter;5197} else {5198break;5199}5200}52015202ctl = le32_to_cpu(user_dma_pkt->ctl);5203if (likely(dma_desc_cnt))5204ctl &= ~GAUDI_PKT_CTL_EB_MASK;5205ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;5206new_dma_pkt->ctl = cpu_to_le32(ctl);5207new_dma_pkt->tsize = cpu_to_le32(len);52085209if (dir == DMA_TO_DEVICE) {5210new_dma_pkt->src_addr = cpu_to_le64(dma_addr);5211new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);5212} else {5213new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);5214new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);5215}52165217if (!user_memset)5218device_memory_addr += len;5219dma_desc_cnt++;5220new_dma_pkt++;5221}52225223if (!dma_desc_cnt) {5224dev_err(hdev->dev,5225"Error of 0 SG entries when patching DMA packet\n");5226return -EFAULT;5227}52285229/* Fix the last dma packet - wrcomp must be as user set it */5230new_dma_pkt--;5231new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);52325233*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);52345235return 0;5236}52375238static int gaudi_patch_cb(struct hl_device *hdev,5239struct hl_cs_parser *parser)5240{5241u32 cb_parsed_length = 0;5242u32 cb_patched_cur_length = 0;5243int rc = 0;52445245/* cb_user_size is more than 0 so loop will always be executed */5246while (cb_parsed_length < parser->user_cb_size) {5247enum packet_id pkt_id;5248u16 pkt_size;5249u32 new_pkt_size = 0;5250struct gaudi_packet *user_pkt, *kernel_pkt;52515252user_pkt = parser->user_cb->kernel_address + cb_parsed_length;5253kernel_pkt = parser->patched_cb->kernel_address +5254cb_patched_cur_length;52555256pkt_id = (enum packet_id) (5257(le64_to_cpu(user_pkt->header) &5258PACKET_HEADER_PACKET_ID_MASK) >>5259PACKET_HEADER_PACKET_ID_SHIFT);52605261if (!validate_packet_id(pkt_id)) {5262dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);5263rc = -EINVAL;5264break;5265}52665267pkt_size = gaudi_packet_sizes[pkt_id];5268cb_parsed_length += pkt_size;5269if (cb_parsed_length > parser->user_cb_size) {5270dev_err(hdev->dev,5271"packet 0x%x is out of CB boundary\n", pkt_id);5272rc = -EINVAL;5273break;5274}52755276switch (pkt_id) {5277case PACKET_LIN_DMA:5278rc = gaudi_patch_dma_packet(hdev, parser,5279(struct packet_lin_dma *) user_pkt,5280(struct packet_lin_dma *) kernel_pkt,5281&new_pkt_size);5282cb_patched_cur_length += new_pkt_size;5283break;52845285case PACKET_MSG_PROT:5286dev_err(hdev->dev,5287"User not allowed to use MSG_PROT\n");5288rc = -EPERM;5289break;52905291case PACKET_CP_DMA:5292dev_err(hdev->dev, "User not allowed to use CP_DMA\n");5293rc = -EPERM;5294break;52955296case PACKET_STOP:5297dev_err(hdev->dev, "User not allowed to use STOP\n");5298rc = -EPERM;5299break;53005301case PACKET_WREG_32:5302case PACKET_WREG_BULK:5303case PACKET_MSG_LONG:5304case PACKET_MSG_SHORT:5305case PACKET_REPEAT:5306case PACKET_FENCE:5307case PACKET_NOP:5308case PACKET_ARB_POINT:5309case PACKET_LOAD_AND_EXE:5310memcpy(kernel_pkt, user_pkt, pkt_size);5311cb_patched_cur_length += pkt_size;5312break;53135314default:5315dev_err(hdev->dev, "Invalid packet header 0x%x\n",5316pkt_id);5317rc = -EINVAL;5318break;5319}53205321if (rc)5322break;5323}53245325return rc;5326}53275328static int gaudi_parse_cb_mmu(struct hl_device *hdev,5329struct hl_cs_parser *parser)5330{5331u64 handle;5332u32 patched_cb_size;5333struct hl_cb *user_cb;5334int rc;53355336/*5337* The new CB should have space at the end for two MSG_PROT packets:5338* 1. Optional NOP padding for cacheline alignment5339* 2. A packet that will act as a completion packet5340* 3. A packet that will generate MSI interrupt5341*/5342if (parser->completion)5343parser->patched_cb_size = parser->user_cb_size +5344gaudi_get_patched_cb_extra_size(parser->user_cb_size);5345else5346parser->patched_cb_size = parser->user_cb_size;53475348rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,5349parser->patched_cb_size, false, false,5350&handle);53515352if (rc) {5353dev_err(hdev->dev,5354"Failed to allocate patched CB for DMA CS %d\n",5355rc);5356return rc;5357}53585359parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);5360/* hl_cb_get should never fail */5361if (!parser->patched_cb) {5362dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);5363rc = -EFAULT;5364goto out;5365}53665367/*5368* We are protected from overflow because the check5369* "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()5370* in the common code. That check is done only if is_kernel_allocated_cb is true.5371*5372* There is no option to reach here without going through that check because:5373* 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to5374* an external queue.5375* 2. For Gaudi, we only parse CBs that were submitted to the external queues.5376*/5377memcpy(parser->patched_cb->kernel_address,5378parser->user_cb->kernel_address,5379parser->user_cb_size);53805381patched_cb_size = parser->patched_cb_size;53825383/* Validate patched CB instead of user CB */5384user_cb = parser->user_cb;5385parser->user_cb = parser->patched_cb;5386rc = gaudi_validate_cb(hdev, parser, true);5387parser->user_cb = user_cb;53885389if (rc) {5390hl_cb_put(parser->patched_cb);5391goto out;5392}53935394if (patched_cb_size != parser->patched_cb_size) {5395dev_err(hdev->dev, "user CB size mismatch\n");5396hl_cb_put(parser->patched_cb);5397rc = -EINVAL;5398goto out;5399}54005401out:5402/*5403* Always call cb destroy here because we still have 1 reference5404* to it by calling cb_get earlier. After the job will be completed,5405* cb_put will release it, but here we want to remove it from the5406* idr5407*/5408hl_cb_destroy(&hdev->kernel_mem_mgr, handle);54095410return rc;5411}54125413static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,5414struct hl_cs_parser *parser)5415{5416u64 handle;5417int rc;54185419rc = gaudi_validate_cb(hdev, parser, false);54205421if (rc)5422goto free_userptr;54235424rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,5425parser->patched_cb_size, false, false,5426&handle);5427if (rc) {5428dev_err(hdev->dev,5429"Failed to allocate patched CB for DMA CS %d\n", rc);5430goto free_userptr;5431}54325433parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);5434/* hl_cb_get should never fail here */5435if (!parser->patched_cb) {5436dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);5437rc = -EFAULT;5438goto out;5439}54405441rc = gaudi_patch_cb(hdev, parser);54425443if (rc)5444hl_cb_put(parser->patched_cb);54455446out:5447/*5448* Always call cb destroy here because we still have 1 reference5449* to it by calling cb_get earlier. After the job will be completed,5450* cb_put will release it, but here we want to remove it from the5451* idr5452*/5453hl_cb_destroy(&hdev->kernel_mem_mgr, handle);54545455free_userptr:5456if (rc)5457hl_userptr_delete_list(hdev, parser->job_userptr_list);5458return rc;5459}54605461static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,5462struct hl_cs_parser *parser)5463{5464struct asic_fixed_properties *asic_prop = &hdev->asic_prop;5465struct gaudi_device *gaudi = hdev->asic_specific;5466u32 nic_queue_offset, nic_mask_q_id;54675468if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&5469(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {5470nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;5471nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));54725473if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {5474dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);5475return -EINVAL;5476}5477}54785479/* For internal queue jobs just check if CB address is valid */5480if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,5481parser->user_cb_size,5482asic_prop->sram_user_base_address,5483asic_prop->sram_end_address))5484return 0;54855486if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,5487parser->user_cb_size,5488asic_prop->dram_user_base_address,5489asic_prop->dram_end_address))5490return 0;54915492/* PMMU and HPMMU addresses are equal, check only one of them */5493if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,5494parser->user_cb_size,5495asic_prop->pmmu.start_addr,5496asic_prop->pmmu.end_addr))5497return 0;54985499dev_err(hdev->dev,5500"CB address 0x%px + 0x%x for internal QMAN is not valid\n",5501parser->user_cb, parser->user_cb_size);55025503return -EFAULT;5504}55055506static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)5507{5508struct gaudi_device *gaudi = hdev->asic_specific;55095510if (parser->queue_type == QUEUE_TYPE_INT)5511return gaudi_parse_cb_no_ext_queue(hdev, parser);55125513if (gaudi->hw_cap_initialized & HW_CAP_MMU)5514return gaudi_parse_cb_mmu(hdev, parser);5515else5516return gaudi_parse_cb_no_mmu(hdev, parser);5517}55185519static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,5520u32 len, u32 original_len, u64 cq_addr, u32 cq_val,5521u32 msi_vec, bool eb)5522{5523struct packet_msg_prot *cq_pkt;5524struct packet_nop *cq_padding;5525u64 msi_addr;5526u32 tmp;55275528cq_padding = kernel_address + original_len;5529cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);55305531while ((void *)cq_padding < (void *)cq_pkt) {5532cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));5533cq_padding++;5534}55355536tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);5537tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);55385539if (eb)5540tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);55415542cq_pkt->ctl = cpu_to_le32(tmp);5543cq_pkt->value = cpu_to_le32(cq_val);5544cq_pkt->addr = cpu_to_le64(cq_addr);55455546cq_pkt++;55475548tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);5549tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);5550cq_pkt->ctl = cpu_to_le32(tmp);5551cq_pkt->value = cpu_to_le32(1);5552msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;5553cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);5554}55555556static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)5557{5558WREG32(mmCPU_IF_EQ_RD_OFFS, val);5559}55605561static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,5562u32 size, u64 val)5563{5564struct packet_lin_dma *lin_dma_pkt;5565struct hl_cs_job *job;5566u32 cb_size, ctl, err_cause;5567struct hl_cb *cb;5568int rc;55695570cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);5571if (!cb)5572return -EFAULT;55735574lin_dma_pkt = cb->kernel_address;5575memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));5576cb_size = sizeof(*lin_dma_pkt);55775578ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);5579ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);5580ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);5581ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);5582ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);55835584lin_dma_pkt->ctl = cpu_to_le32(ctl);5585lin_dma_pkt->src_addr = cpu_to_le64(val);5586lin_dma_pkt->dst_addr |= cpu_to_le64(addr);5587lin_dma_pkt->tsize = cpu_to_le32(size);55885589job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);5590if (!job) {5591dev_err(hdev->dev, "Failed to allocate a new job\n");5592rc = -ENOMEM;5593goto release_cb;5594}55955596/* Verify DMA is OK */5597err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);5598if (err_cause && !hdev->init_done) {5599dev_dbg(hdev->dev,5600"Clearing DMA0 engine from errors (cause 0x%x)\n",5601err_cause);5602WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);5603}56045605job->id = 0;5606job->user_cb = cb;5607atomic_inc(&job->user_cb->cs_cnt);5608job->user_cb_size = cb_size;5609job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;5610job->patched_cb = job->user_cb;5611job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);56125613hl_debugfs_add_job(hdev, job);56145615rc = gaudi_send_job_on_qman0(hdev, job);5616hl_debugfs_remove_job(hdev, job);5617kfree(job);5618atomic_dec(&cb->cs_cnt);56195620/* Verify DMA is OK */5621err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);5622if (err_cause) {5623dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);5624rc = -EIO;5625if (!hdev->init_done) {5626dev_dbg(hdev->dev,5627"Clearing DMA0 engine from errors (cause 0x%x)\n",5628err_cause);5629WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);5630}5631}56325633release_cb:5634hl_cb_put(cb);5635hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);56365637return rc;5638}56395640static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,5641u32 num_regs, u32 val)5642{5643struct packet_msg_long *pkt;5644struct hl_cs_job *job;5645u32 cb_size, ctl;5646struct hl_cb *cb;5647int i, rc;56485649cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);56505651if (cb_size > SZ_2M) {5652dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);5653return -ENOMEM;5654}56555656cb = hl_cb_kernel_create(hdev, cb_size, false);5657if (!cb)5658return -EFAULT;56595660pkt = cb->kernel_address;56615662ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */5663ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);5664ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);5665ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);5666ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);56675668for (i = 0; i < num_regs ; i++, pkt++) {5669pkt->ctl = cpu_to_le32(ctl);5670pkt->value = cpu_to_le32(val);5671pkt->addr = cpu_to_le64(reg_base + (i * 4));5672}56735674job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);5675if (!job) {5676dev_err(hdev->dev, "Failed to allocate a new job\n");5677rc = -ENOMEM;5678goto release_cb;5679}56805681job->id = 0;5682job->user_cb = cb;5683atomic_inc(&job->user_cb->cs_cnt);5684job->user_cb_size = cb_size;5685job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;5686job->patched_cb = job->user_cb;5687job->job_cb_size = cb_size;56885689hl_debugfs_add_job(hdev, job);56905691rc = gaudi_send_job_on_qman0(hdev, job);5692hl_debugfs_remove_job(hdev, job);5693kfree(job);5694atomic_dec(&cb->cs_cnt);56955696release_cb:5697hl_cb_put(cb);5698hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);56995700return rc;5701}57025703static int gaudi_restore_sm_registers(struct hl_device *hdev)5704{5705u64 base_addr;5706u32 num_regs;5707int rc;57085709base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;5710num_regs = NUM_OF_SOB_IN_BLOCK;5711rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5712if (rc) {5713dev_err(hdev->dev, "failed resetting SM registers");5714return -ENOMEM;5715}57165717base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;5718num_regs = NUM_OF_SOB_IN_BLOCK;5719rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5720if (rc) {5721dev_err(hdev->dev, "failed resetting SM registers");5722return -ENOMEM;5723}57245725base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;5726num_regs = NUM_OF_SOB_IN_BLOCK;5727rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5728if (rc) {5729dev_err(hdev->dev, "failed resetting SM registers");5730return -ENOMEM;5731}57325733base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;5734num_regs = NUM_OF_MONITORS_IN_BLOCK;5735rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5736if (rc) {5737dev_err(hdev->dev, "failed resetting SM registers");5738return -ENOMEM;5739}57405741base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;5742num_regs = NUM_OF_MONITORS_IN_BLOCK;5743rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5744if (rc) {5745dev_err(hdev->dev, "failed resetting SM registers");5746return -ENOMEM;5747}57485749base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;5750num_regs = NUM_OF_MONITORS_IN_BLOCK;5751rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5752if (rc) {5753dev_err(hdev->dev, "failed resetting SM registers");5754return -ENOMEM;5755}57565757base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +5758(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);5759num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;5760rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5761if (rc) {5762dev_err(hdev->dev, "failed resetting SM registers");5763return -ENOMEM;5764}57655766base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +5767(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);5768num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;5769rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);5770if (rc) {5771dev_err(hdev->dev, "failed resetting SM registers");5772return -ENOMEM;5773}57745775return 0;5776}57775778static void gaudi_restore_dma_registers(struct hl_device *hdev)5779{5780u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -5781mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;5782int i;57835784for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {5785u64 sob_addr = CFG_BASE +5786mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +5787(i * sob_delta);5788u32 dma_offset = i * DMA_CORE_OFFSET;57895790WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,5791lower_32_bits(sob_addr));5792WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,5793upper_32_bits(sob_addr));5794WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);57955796/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be5797* modified by the user for SRAM reduction5798*/5799if (i > 1)5800WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,58010x00000001);5802}5803}58045805static void gaudi_restore_qm_registers(struct hl_device *hdev)5806{5807u32 qman_offset;5808int i;58095810for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {5811qman_offset = i * DMA_QMAN_OFFSET;5812WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);5813}58145815for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {5816qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);5817WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);5818}58195820for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {5821qman_offset = i * TPC_QMAN_OFFSET;5822WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);5823}58245825for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {5826qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +5827(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;5828WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);5829}5830}58315832static int gaudi_restore_user_registers(struct hl_device *hdev)5833{5834int rc;58355836rc = gaudi_restore_sm_registers(hdev);5837if (rc)5838return rc;58395840gaudi_restore_dma_registers(hdev);5841gaudi_restore_qm_registers(hdev);58425843return 0;5844}58455846static int gaudi_context_switch(struct hl_device *hdev, u32 asid)5847{5848return 0;5849}58505851static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)5852{5853u32 size = hdev->asic_prop.mmu_pgt_size +5854hdev->asic_prop.mmu_cache_mng_size;5855struct gaudi_device *gaudi = hdev->asic_specific;5856u64 addr = hdev->asic_prop.mmu_pgt_addr;58575858if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))5859return 0;58605861return gaudi_memset_device_memory(hdev, addr, size, 0);5862}58635864static void gaudi_restore_phase_topology(struct hl_device *hdev)5865{58665867}58685869static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,5870u32 size_to_dma, dma_addr_t dma_addr)5871{5872u32 err_cause, val;5873u64 dma_offset;5874int rc;58755876dma_offset = dma_id * DMA_CORE_OFFSET;58775878WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));5879WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));5880WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));5881WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));5882WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);5883WREG32(mmDMA0_CORE_COMMIT + dma_offset,5884(1 << DMA0_CORE_COMMIT_LIN_SHIFT));58855886rc = hl_poll_timeout(5887hdev,5888mmDMA0_CORE_STS0 + dma_offset,5889val,5890((val & DMA0_CORE_STS0_BUSY_MASK) == 0),58910,58921000000);58935894if (rc) {5895dev_err(hdev->dev,5896"DMA %d timed-out during reading of 0x%llx\n",5897dma_id, addr);5898return -EIO;5899}59005901/* Verify DMA is OK */5902err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);5903if (err_cause) {5904dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);5905dev_dbg(hdev->dev,5906"Clearing DMA0 engine from errors (cause 0x%x)\n",5907err_cause);5908WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);59095910return -EIO;5911}59125913return 0;5914}59155916static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,5917void *blob_addr)5918{5919u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;5920u32 qm_glbl_sts0, qm_cgm_sts;5921u64 dma_offset, qm_offset;5922dma_addr_t dma_addr;5923void *kernel_addr;5924bool is_eng_idle;5925int rc = 0, dma_id;59265927kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);59285929if (!kernel_addr)5930return -ENOMEM;59315932hdev->asic_funcs->hw_queues_lock(hdev);59335934dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];5935dma_offset = dma_id * DMA_CORE_OFFSET;5936qm_offset = dma_id * DMA_QMAN_OFFSET;5937dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);5938qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);5939qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);5940is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&5941IS_DMA_IDLE(dma_core_sts0);59425943if (!is_eng_idle) {5944dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];5945dma_offset = dma_id * DMA_CORE_OFFSET;5946qm_offset = dma_id * DMA_QMAN_OFFSET;5947dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);5948qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);5949qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);5950is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&5951IS_DMA_IDLE(dma_core_sts0);59525953if (!is_eng_idle) {5954dev_err_ratelimited(hdev->dev,5955"Can't read via DMA because it is BUSY\n");5956rc = -EAGAIN;5957goto out;5958}5959}59605961cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);5962WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,59630xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);59645965/* TODO: remove this by mapping the DMA temporary buffer to the MMU5966* using the compute ctx ASID, if exists. If not, use the kernel ctx5967* ASID5968*/5969WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));59705971/* Verify DMA is OK */5972err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);5973if (err_cause) {5974dev_dbg(hdev->dev,5975"Clearing DMA0 engine from errors (cause 0x%x)\n",5976err_cause);5977WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);5978}59795980pos = 0;5981size_left = size;5982size_to_dma = SZ_2M;59835984while (size_left > 0) {59855986if (size_left < SZ_2M)5987size_to_dma = size_left;59885989rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,5990dma_addr);5991if (rc)5992break;59935994memcpy(blob_addr + pos, kernel_addr, size_to_dma);59955996if (size_left <= SZ_2M)5997break;59985999pos += SZ_2M;6000addr += SZ_2M;6001size_left -= SZ_2M;6002}60036004/* TODO: remove this by mapping the DMA temporary buffer to the MMU6005* using the compute ctx ASID, if exists. If not, use the kernel ctx6006* ASID6007*/6008WREG32_AND(mmDMA0_CORE_PROT + dma_offset,6009~BIT(DMA0_CORE_PROT_VAL_SHIFT));60106011WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);60126013out:6014hdev->asic_funcs->hw_queues_unlock(hdev);60156016hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);60176018return rc;6019}60206021static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)6022{6023struct gaudi_device *gaudi = hdev->asic_specific;60246025if (hdev->reset_info.hard_reset_pending)6026return U64_MAX;60276028return readq(hdev->pcie_bar[HBM_BAR_ID] +6029(addr - gaudi->hbm_bar_cur_addr));6030}60316032static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)6033{6034struct gaudi_device *gaudi = hdev->asic_specific;60356036if (hdev->reset_info.hard_reset_pending)6037return;60386039writeq(val, hdev->pcie_bar[HBM_BAR_ID] +6040(addr - gaudi->hbm_bar_cur_addr));6041}60426043void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)6044{6045/* mask to zero the MMBP and ASID bits */6046WREG32_AND(reg, ~0x7FF);6047WREG32_OR(reg, asid);6048}60496050static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)6051{6052struct gaudi_device *gaudi = hdev->asic_specific;60536054if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))6055return;60566057if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {6058dev_crit(hdev->dev, "asid %u is too big\n", asid);6059return;6060}60616062gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);6063gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);6064gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);6065gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);6066gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);60676068gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);6069gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);6070gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);6071gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);6072gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);60736074gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);6075gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);6076gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);6077gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);6078gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);60796080gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);6081gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);6082gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);6083gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);6084gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);60856086gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);6087gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);6088gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);6089gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);6090gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);60916092gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);6093gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);6094gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);6095gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);6096gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);60976098gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);6099gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);6100gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);6101gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);6102gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);61036104gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);6105gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);6106gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);6107gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);6108gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);61096110gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);6111gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);6112gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);6113gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);6114gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);6115gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);6116gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);6117gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);61186119gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);6120gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);6121gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);6122gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);6123gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);6124gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);6125gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);61266127gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);6128gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);6129gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);6130gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);6131gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);6132gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);6133gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);61346135gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);6136gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);6137gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);6138gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);6139gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);6140gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);6141gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);61426143gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);6144gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);6145gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);6146gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);6147gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);6148gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);6149gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);61506151gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);6152gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);6153gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);6154gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);6155gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);6156gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);6157gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);61586159gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);6160gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);6161gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);6162gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);6163gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);6164gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);6165gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);61666167gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);6168gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);6169gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);6170gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);6171gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);6172gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);6173gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);61746175gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);6176gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);6177gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);6178gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);6179gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);6180gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);6181gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);61826183gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);6184gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);6185gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);6186gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);6187gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);6188gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);6189gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);6190gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);6191gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);6192gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);61936194gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);6195gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);6196gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);6197gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);6198gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);6199gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);6200gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);6201gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);6202gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);6203gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);6204gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);6205gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);62066207if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {6208gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,6209asid);6210gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,6211asid);6212gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,6213asid);6214gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,6215asid);6216gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,6217asid);6218}62196220if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {6221gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,6222asid);6223gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,6224asid);6225gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,6226asid);6227gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,6228asid);6229gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,6230asid);6231}62326233if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {6234gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,6235asid);6236gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,6237asid);6238gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,6239asid);6240gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,6241asid);6242gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,6243asid);6244}62456246if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {6247gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,6248asid);6249gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,6250asid);6251gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,6252asid);6253gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,6254asid);6255gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,6256asid);6257}62586259if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {6260gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,6261asid);6262gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,6263asid);6264gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,6265asid);6266gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,6267asid);6268gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,6269asid);6270}62716272if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {6273gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,6274asid);6275gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,6276asid);6277gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,6278asid);6279gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,6280asid);6281gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,6282asid);6283}62846285if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {6286gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,6287asid);6288gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,6289asid);6290gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,6291asid);6292gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,6293asid);6294gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,6295asid);6296}62976298if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {6299gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,6300asid);6301gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,6302asid);6303gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,6304asid);6305gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,6306asid);6307gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,6308asid);6309}63106311if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {6312gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,6313asid);6314gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,6315asid);6316gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,6317asid);6318gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,6319asid);6320gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,6321asid);6322}63236324if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {6325gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,6326asid);6327gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,6328asid);6329gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,6330asid);6331gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,6332asid);6333gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,6334asid);6335}63366337gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);6338gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);6339}63406341static int gaudi_send_job_on_qman0(struct hl_device *hdev,6342struct hl_cs_job *job)6343{6344struct packet_msg_prot *fence_pkt;6345u32 *fence_ptr;6346dma_addr_t fence_dma_addr;6347struct hl_cb *cb;6348u32 tmp, timeout, dma_offset;6349int rc;63506351if (hdev->pldm)6352timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;6353else6354timeout = HL_DEVICE_TIMEOUT_USEC;63556356fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);6357if (!fence_ptr) {6358dev_err(hdev->dev,6359"Failed to allocate fence memory for QMAN0\n");6360return -ENOMEM;6361}63626363cb = job->patched_cb;63646365fence_pkt = cb->kernel_address +6366job->job_cb_size - sizeof(struct packet_msg_prot);63676368tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);6369tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);6370tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);63716372fence_pkt->ctl = cpu_to_le32(tmp);6373fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);6374fence_pkt->addr = cpu_to_le64(fence_dma_addr);63756376dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;63776378WREG32(mmDMA0_CORE_PROT + dma_offset,6379BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));63806381rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,6382job->job_cb_size, cb->bus_address);6383if (rc) {6384dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);6385goto free_fence_ptr;6386}63876388rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,6389(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,6390timeout, true);63916392hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);63936394if (rc == -ETIMEDOUT) {6395dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);6396goto free_fence_ptr;6397}63986399free_fence_ptr:6400WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));64016402hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);6403return rc;6404}64056406static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)6407{6408if (event_type >= GAUDI_EVENT_SIZE)6409goto event_not_supported;64106411if (!gaudi_irq_map_table[event_type].valid)6412goto event_not_supported;64136414snprintf(desc, size, gaudi_irq_map_table[event_type].name);64156416return;64176418event_not_supported:6419snprintf(desc, size, "N/A");6420}64216422static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,6423bool is_write, u16 *engine_id_1,6424u16 *engine_id_2)6425{6426u32 dma_id[2], dma_offset, err_cause[2], mask, i;64276428mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :6429DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;64306431switch (x_y) {6432case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:6433case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:6434dma_id[0] = 0;6435dma_id[1] = 2;6436break;6437case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:6438case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:6439dma_id[0] = 1;6440dma_id[1] = 3;6441break;6442case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:6443case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:6444dma_id[0] = 4;6445dma_id[1] = 6;6446break;6447case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:6448case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:6449dma_id[0] = 5;6450dma_id[1] = 7;6451break;6452default:6453goto unknown_initiator;6454}64556456for (i = 0 ; i < 2 ; i++) {6457dma_offset = dma_id[i] * DMA_CORE_OFFSET;6458err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);6459}64606461switch (x_y) {6462case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:6463case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:6464if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {6465*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;6466return "DMA0";6467} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {6468*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;6469return "DMA2";6470} else {6471*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;6472*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;6473return "DMA0 or DMA2";6474}6475case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:6476case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:6477if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {6478*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;6479return "DMA1";6480} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {6481*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;6482return "DMA3";6483} else {6484*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;6485*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;6486return "DMA1 or DMA3";6487}6488case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:6489case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:6490if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {6491*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;6492return "DMA4";6493} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {6494*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;6495return "DMA6";6496} else {6497*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;6498*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;6499return "DMA4 or DMA6";6500}6501case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:6502case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:6503if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {6504*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;6505return "DMA5";6506} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {6507*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;6508return "DMA7";6509} else {6510*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;6511*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;6512return "DMA5 or DMA7";6513}6514}65156516unknown_initiator:6517return "unknown initiator";6518}65196520static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,6521u16 *engine_id_1, u16 *engine_id_2)6522{6523u32 val, x_y, axi_id;65246525val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :6526RREG32(mmMMU_UP_RAZWI_READ_ID);6527x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |6528(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));6529axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<6530RAZWI_INITIATOR_AXI_ID_SHIFT);65316532switch (x_y) {6533case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:6534if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {6535*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;6536return "TPC0";6537}6538if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {6539*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;6540return "NIC0";6541}6542break;6543case RAZWI_INITIATOR_ID_X_Y_TPC1:6544*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;6545return "TPC1";6546case RAZWI_INITIATOR_ID_X_Y_MME0_0:6547case RAZWI_INITIATOR_ID_X_Y_MME0_1:6548*engine_id_1 = GAUDI_ENGINE_ID_MME_0;6549return "MME0";6550case RAZWI_INITIATOR_ID_X_Y_MME1_0:6551case RAZWI_INITIATOR_ID_X_Y_MME1_1:6552*engine_id_1 = GAUDI_ENGINE_ID_MME_1;6553return "MME1";6554case RAZWI_INITIATOR_ID_X_Y_TPC2:6555*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;6556return "TPC2";6557case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:6558if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {6559*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;6560return "TPC3";6561}6562/* PCI, CPU or PSOC does not have engine id*/6563if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))6564return "PCI";6565if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))6566return "CPU";6567if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))6568return "PSOC";6569break;6570case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:6571case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:6572case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:6573case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:6574case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:6575case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:6576case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:6577case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:6578return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,6579engine_id_1, engine_id_2);6580case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:6581if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {6582*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;6583return "TPC4";6584}6585if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {6586*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;6587return "NIC1";6588}6589if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {6590*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;6591return "NIC2";6592}6593break;6594case RAZWI_INITIATOR_ID_X_Y_TPC5:6595*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;6596return "TPC5";6597case RAZWI_INITIATOR_ID_X_Y_MME2_0:6598case RAZWI_INITIATOR_ID_X_Y_MME2_1:6599*engine_id_1 = GAUDI_ENGINE_ID_MME_2;6600return "MME2";6601case RAZWI_INITIATOR_ID_X_Y_MME3_0:6602case RAZWI_INITIATOR_ID_X_Y_MME3_1:6603*engine_id_1 = GAUDI_ENGINE_ID_MME_3;6604return "MME3";6605case RAZWI_INITIATOR_ID_X_Y_TPC6:6606*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;6607return "TPC6";6608case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:6609if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {6610*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;6611return "TPC7";6612}6613if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {6614*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;6615return "NIC4";6616}6617if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {6618*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;6619return "NIC5";6620}6621break;6622default:6623break;6624}66256626dev_err(hdev->dev,6627"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",6628val,6629(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,6630(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,6631(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &6632RAZWI_INITIATOR_AXI_ID_MASK);66336634return "unknown initiator";6635}66366637static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,6638u16 *engine_id_2, bool *is_read, bool *is_write)6639{66406641if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {6642dev_err_ratelimited(hdev->dev,6643"RAZWI event caused by illegal write of %s\n",6644gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));6645WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);6646*is_write = true;6647}66486649if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {6650dev_err_ratelimited(hdev->dev,6651"RAZWI event caused by illegal read of %s\n",6652gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));6653WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);6654*is_read = true;6655}6656}66576658static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)6659{6660struct gaudi_device *gaudi = hdev->asic_specific;6661u32 val;66626663if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))6664return;66656666val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);6667if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {6668*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;6669*addr <<= 32;6670*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);66716672dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);6673hl_handle_page_fault(hdev, *addr, 0, true, event_mask);66746675WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);6676}66776678val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);6679if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {6680*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;6681*addr <<= 32;6682*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);66836684dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);66856686WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);6687}6688}66896690/*6691* +-------------------+------------------------------------------------------+6692* | Configuration Reg | Description |6693* | Address | |6694* +-------------------+------------------------------------------------------+6695* | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|6696* | |0xF30 memory wrappers 31:0 (MSB to LSB) |6697* | |0xF34 memory wrappers 63:32 |6698* | |0xF38 memory wrappers 95:64 |6699* | |0xF3C memory wrappers 127:96 |6700* +-------------------+------------------------------------------------------+6701* | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|6702* | |0xF40 memory wrappers 31:0 (MSB to LSB) |6703* | |0xF44 memory wrappers 63:32 |6704* | |0xF48 memory wrappers 95:64 |6705* | |0xF4C memory wrappers 127:96 |6706* +-------------------+------------------------------------------------------+6707*/6708static int gaudi_extract_ecc_info(struct hl_device *hdev,6709struct ecc_info_extract_params *params, u64 *ecc_address,6710u64 *ecc_syndrom, u8 *memory_wrapper_idx)6711{6712u32 i, num_mem_regs, reg, err_bit;6713u64 err_addr, err_word = 0;67146715num_mem_regs = params->num_memories / 32 +6716((params->num_memories % 32) ? 1 : 0);67176718if (params->block_address >= CFG_BASE)6719params->block_address -= CFG_BASE;67206721if (params->derr)6722err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;6723else6724err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;67256726/* Set invalid wrapper index */6727*memory_wrapper_idx = 0xFF;67286729/* Iterate through memory wrappers, a single bit must be set */6730for (i = 0 ; i < num_mem_regs ; i++) {6731err_addr += i * 4;6732err_word = RREG32(err_addr);6733if (err_word) {6734err_bit = __ffs(err_word);6735*memory_wrapper_idx = err_bit + (32 * i);6736break;6737}6738}67396740if (*memory_wrapper_idx == 0xFF) {6741dev_err(hdev->dev, "ECC error information cannot be found\n");6742return -EINVAL;6743}67446745WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,6746*memory_wrapper_idx);67476748*ecc_address =6749RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);6750*ecc_syndrom =6751RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);67526753/* Clear error indication */6754reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);6755if (params->derr)6756reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);6757else6758reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);67596760WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);67616762return 0;6763}67646765/*6766* gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap6767*6768* @idx: the current pi/ci value6769* @q_len: the queue length (power of 2)6770*6771* @return the cyclically decremented index6772*/6773static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)6774{6775u32 mask = q_len - 1;67766777/*6778* modular decrement is equivalent to adding (queue_size -1)6779* later we take LSBs to make sure the value is in the6780* range [0, queue_len - 1]6781*/6782return (idx + q_len - 1) & mask;6783}67846785/**6786* gaudi_handle_sw_config_stream_data - print SW config stream data6787*6788* @hdev: pointer to the habanalabs device structure6789* @stream: the QMAN's stream6790* @qman_base: base address of QMAN registers block6791* @event_mask: mask of the last events occurred6792*/6793static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,6794u64 qman_base, u64 event_mask)6795{6796u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;6797u32 cq_ptr_lo_off, size;67986799cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;68006801cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +6802stream * cq_ptr_lo_off;6803cq_ptr_hi = cq_ptr_lo +6804(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);6805cq_tsize = cq_ptr_lo +6806(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);68076808cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);6809size = RREG32(cq_tsize);6810dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",6811stream, cq_ptr, size);68126813if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {6814hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;6815hdev->captured_err_info.undef_opcode.cq_size = size;6816hdev->captured_err_info.undef_opcode.stream_id = stream;6817}6818}68196820/**6821* gaudi_handle_last_pqes_on_err - print last PQEs on error6822*6823* @hdev: pointer to the habanalabs device structure6824* @qid_base: first QID of the QMAN (out of 4 streams)6825* @stream: the QMAN's stream6826* @qman_base: base address of QMAN registers block6827* @event_mask: mask of the last events occurred6828* @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)6829*/6830static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,6831u32 stream, u64 qman_base,6832u64 event_mask,6833bool pr_sw_conf)6834{6835u32 ci, qm_ci_stream_off, queue_len;6836struct hl_hw_queue *q;6837u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];6838int i;68396840q = &hdev->kernel_queues[qid_base + stream];68416842qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;6843pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +6844stream * qm_ci_stream_off;68456846queue_len = (q->queue_type == QUEUE_TYPE_INT) ?6847q->int_queue_len : HL_QUEUE_LENGTH;68486849hdev->asic_funcs->hw_queues_lock(hdev);68506851if (pr_sw_conf)6852gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);68536854ci = RREG32(pq_ci);68556856/* we should start printing form ci -1 */6857ci = gaudi_queue_idx_dec(ci, queue_len);6858memset(addr, 0, sizeof(addr));68596860for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {6861struct hl_bd *bd;6862u32 len;68636864bd = q->kernel_address;6865bd += ci;68666867len = le32_to_cpu(bd->len);6868/* len 0 means uninitialized entry- break */6869if (!len)6870break;68716872addr[i] = le64_to_cpu(bd->ptr);68736874dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",6875stream, ci, addr[i], len);68766877/* get previous ci, wrap if needed */6878ci = gaudi_queue_idx_dec(ci, queue_len);6879}68806881if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {6882struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;6883u32 arr_idx = undef_opcode->cb_addr_streams_len;68846885if (arr_idx == 0) {6886undef_opcode->timestamp = ktime_get();6887undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];6888}68896890memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));6891undef_opcode->cb_addr_streams_len++;6892}68936894hdev->asic_funcs->hw_queues_unlock(hdev);6895}68966897/**6898* handle_qman_data_on_err - extract QMAN data on error6899*6900* @hdev: pointer to the habanalabs device structure6901* @qid_base: first QID of the QMAN (out of 4 streams)6902* @stream: the QMAN's stream6903* @qman_base: base address of QMAN registers block6904* @event_mask: mask of the last events occurred6905*6906* This function attempt to exatract as much data as possible on QMAN error.6907* On upper CP print the SW config stream data and last 8 PQEs.6908* On lower CP print SW config data and last PQEs of ALL 4 upper CPs6909*/6910static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,6911u32 stream, u64 qman_base, u64 event_mask)6912{6913u32 i;69146915if (stream != QMAN_STREAMS) {6916gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,6917qman_base, event_mask, true);6918return;6919}69206921/* handle Lower-CP */6922gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);69236924for (i = 0; i < QMAN_STREAMS; i++)6925gaudi_handle_last_pqes_on_err(hdev, qid_base, i,6926qman_base, event_mask, false);6927}69286929static void gaudi_handle_qman_err_generic(struct hl_device *hdev,6930const char *qm_name,6931u64 qman_base,6932u32 qid_base,6933u64 *event_mask)6934{6935u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;6936u64 glbl_sts_addr, arb_err_addr;6937char reg_desc[32];69386939glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);6940arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);69416942/* Iterate through all stream GLBL_STS1 registers + Lower CP */6943for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {6944glbl_sts_clr_val = 0;6945glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);69466947if (!glbl_sts_val)6948continue;69496950if (i == QMAN_STREAMS)6951snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");6952else6953snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);69546955for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {6956if (glbl_sts_val & BIT(j)) {6957dev_err_ratelimited(hdev->dev,6958"%s %s. err cause: %s\n",6959qm_name, reg_desc,6960gaudi_qman_error_cause[j]);6961glbl_sts_clr_val |= BIT(j);6962}6963}6964/* check for undefined opcode */6965if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&6966hdev->captured_err_info.undef_opcode.write_enable) {6967memset(&hdev->captured_err_info.undef_opcode, 0,6968sizeof(hdev->captured_err_info.undef_opcode));69696970hdev->captured_err_info.undef_opcode.write_enable = false;6971*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;6972}69736974/* Write 1 clear errors */6975if (!hdev->stop_on_err)6976WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);6977else6978handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);6979}69806981arb_err_val = RREG32(arb_err_addr);69826983if (!arb_err_val)6984return;69856986for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {6987if (arb_err_val & BIT(j)) {6988dev_err_ratelimited(hdev->dev,6989"%s ARB_ERR. err cause: %s\n",6990qm_name,6991gaudi_qman_arb_error_cause[j]);6992}6993}6994}69956996static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,6997struct hl_eq_sm_sei_data *sei_data)6998{6999u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;70007001/* Flip the bits as the enum is ordered in the opposite way */7002index = (index ^ 0x3) & 0x3;70037004switch (sei_data->sei_cause) {7005case SM_SEI_SO_OVERFLOW:7006dev_err_ratelimited(hdev->dev,7007"%s SEI Error: SOB Group %u overflow/underflow",7008gaudi_sync_manager_names[index],7009le32_to_cpu(sei_data->sei_log));7010break;7011case SM_SEI_LBW_4B_UNALIGNED:7012dev_err_ratelimited(hdev->dev,7013"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",7014gaudi_sync_manager_names[index],7015le32_to_cpu(sei_data->sei_log));7016break;7017case SM_SEI_AXI_RESPONSE_ERR:7018dev_err_ratelimited(hdev->dev,7019"%s SEI Error: AXI ID %u response error",7020gaudi_sync_manager_names[index],7021le32_to_cpu(sei_data->sei_log));7022break;7023default:7024dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",7025le32_to_cpu(sei_data->sei_log));7026break;7027}7028}70297030static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,7031struct hl_eq_ecc_data *ecc_data)7032{7033struct ecc_info_extract_params params;7034u64 ecc_address = 0, ecc_syndrom = 0;7035u8 index, memory_wrapper_idx = 0;7036bool extract_info_from_fw;7037int rc;70387039if (hdev->asic_prop.fw_security_enabled) {7040extract_info_from_fw = true;7041goto extract_ecc_info;7042}70437044switch (event_type) {7045case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:7046case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:7047extract_info_from_fw = true;7048break;7049case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:7050index = event_type - GAUDI_EVENT_TPC0_SERR;7051params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;7052params.num_memories = 90;7053params.derr = false;7054extract_info_from_fw = false;7055break;7056case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:7057index = event_type - GAUDI_EVENT_TPC0_DERR;7058params.block_address =7059mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;7060params.num_memories = 90;7061params.derr = true;7062extract_info_from_fw = false;7063break;7064case GAUDI_EVENT_MME0_ACC_SERR:7065case GAUDI_EVENT_MME1_ACC_SERR:7066case GAUDI_EVENT_MME2_ACC_SERR:7067case GAUDI_EVENT_MME3_ACC_SERR:7068index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;7069params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;7070params.num_memories = 128;7071params.derr = false;7072extract_info_from_fw = false;7073break;7074case GAUDI_EVENT_MME0_ACC_DERR:7075case GAUDI_EVENT_MME1_ACC_DERR:7076case GAUDI_EVENT_MME2_ACC_DERR:7077case GAUDI_EVENT_MME3_ACC_DERR:7078index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;7079params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;7080params.num_memories = 128;7081params.derr = true;7082extract_info_from_fw = false;7083break;7084case GAUDI_EVENT_MME0_SBAB_SERR:7085case GAUDI_EVENT_MME1_SBAB_SERR:7086case GAUDI_EVENT_MME2_SBAB_SERR:7087case GAUDI_EVENT_MME3_SBAB_SERR:7088index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;7089params.block_address =7090mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;7091params.num_memories = 33;7092params.derr = false;7093extract_info_from_fw = false;7094break;7095case GAUDI_EVENT_MME0_SBAB_DERR:7096case GAUDI_EVENT_MME1_SBAB_DERR:7097case GAUDI_EVENT_MME2_SBAB_DERR:7098case GAUDI_EVENT_MME3_SBAB_DERR:7099index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;7100params.block_address =7101mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;7102params.num_memories = 33;7103params.derr = true;7104extract_info_from_fw = false;7105break;7106default:7107return;7108}71097110extract_ecc_info:7111if (extract_info_from_fw) {7112ecc_address = le64_to_cpu(ecc_data->ecc_address);7113ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);7114memory_wrapper_idx = ecc_data->memory_wrapper_idx;7115} else {7116rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,7117&ecc_syndrom, &memory_wrapper_idx);7118if (rc)7119return;7120}71217122dev_err(hdev->dev,7123"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",7124ecc_address, ecc_syndrom, memory_wrapper_idx);7125}71267127static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)7128{7129u64 qman_base;7130char desc[32];7131u32 qid_base;7132u8 index;71337134switch (event_type) {7135case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:7136index = event_type - GAUDI_EVENT_TPC0_QM;7137qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;7138qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;7139snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);7140break;7141case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:7142if (event_type == GAUDI_EVENT_MME0_QM) {7143index = 0;7144qid_base = GAUDI_QUEUE_ID_MME_0_0;7145} else { /* event_type == GAUDI_EVENT_MME2_QM */7146index = 2;7147qid_base = GAUDI_QUEUE_ID_MME_1_0;7148}7149qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;7150snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);7151break;7152case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:7153index = event_type - GAUDI_EVENT_DMA0_QM;7154qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;7155/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */7156if (index > 1)7157qid_base++;7158qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;7159snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);7160break;7161case GAUDI_EVENT_NIC0_QM0:7162qid_base = GAUDI_QUEUE_ID_NIC_0_0;7163qman_base = mmNIC0_QM0_BASE;7164snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");7165break;7166case GAUDI_EVENT_NIC0_QM1:7167qid_base = GAUDI_QUEUE_ID_NIC_1_0;7168qman_base = mmNIC0_QM1_BASE;7169snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");7170break;7171case GAUDI_EVENT_NIC1_QM0:7172qid_base = GAUDI_QUEUE_ID_NIC_2_0;7173qman_base = mmNIC1_QM0_BASE;7174snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");7175break;7176case GAUDI_EVENT_NIC1_QM1:7177qid_base = GAUDI_QUEUE_ID_NIC_3_0;7178qman_base = mmNIC1_QM1_BASE;7179snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");7180break;7181case GAUDI_EVENT_NIC2_QM0:7182qid_base = GAUDI_QUEUE_ID_NIC_4_0;7183qman_base = mmNIC2_QM0_BASE;7184snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");7185break;7186case GAUDI_EVENT_NIC2_QM1:7187qid_base = GAUDI_QUEUE_ID_NIC_5_0;7188qman_base = mmNIC2_QM1_BASE;7189snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");7190break;7191case GAUDI_EVENT_NIC3_QM0:7192qid_base = GAUDI_QUEUE_ID_NIC_6_0;7193qman_base = mmNIC3_QM0_BASE;7194snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");7195break;7196case GAUDI_EVENT_NIC3_QM1:7197qid_base = GAUDI_QUEUE_ID_NIC_7_0;7198qman_base = mmNIC3_QM1_BASE;7199snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");7200break;7201case GAUDI_EVENT_NIC4_QM0:7202qid_base = GAUDI_QUEUE_ID_NIC_8_0;7203qman_base = mmNIC4_QM0_BASE;7204snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");7205break;7206case GAUDI_EVENT_NIC4_QM1:7207qid_base = GAUDI_QUEUE_ID_NIC_9_0;7208qman_base = mmNIC4_QM1_BASE;7209snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");7210break;7211default:7212return;7213}72147215gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);7216}72177218static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,7219bool check_razwi, u64 *event_mask)7220{7221bool is_read = false, is_write = false;7222u16 engine_id[2], num_of_razwi_eng = 0;7223char desc[64] = "";7224u64 razwi_addr = 0;7225u8 razwi_flags = 0;72267227/*7228* Init engine id by default as not valid and only if razwi initiated from engine with7229* engine id it will get valid value.7230*/7231engine_id[0] = HL_RAZWI_NA_ENG_ID;7232engine_id[1] = HL_RAZWI_NA_ENG_ID;72337234gaudi_get_event_desc(event_type, desc, sizeof(desc));7235dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",7236event_type, desc);72377238if (check_razwi) {7239gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,7240&is_write);7241gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);72427243if (is_read)7244razwi_flags |= HL_RAZWI_READ;7245if (is_write)7246razwi_flags |= HL_RAZWI_WRITE;72477248if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {7249if (engine_id[1] != HL_RAZWI_NA_ENG_ID)7250num_of_razwi_eng = 2;7251else7252num_of_razwi_eng = 1;7253}72547255if (razwi_flags)7256hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,7257razwi_flags, event_mask);7258}7259}72607261static void gaudi_print_out_of_sync_info(struct hl_device *hdev,7262struct cpucp_pkt_sync_err *sync_err)7263{7264struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];72657266dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",7267le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));7268}72697270static void gaudi_print_fw_alive_info(struct hl_device *hdev,7271struct hl_eq_fw_alive *fw_alive)7272{7273dev_err(hdev->dev,7274"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",7275(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",7276le32_to_cpu(fw_alive->process_id),7277le32_to_cpu(fw_alive->thread_id),7278le64_to_cpu(fw_alive->uptime_seconds));7279}72807281static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,7282void *data)7283{7284char desc[64] = "", *type;7285struct eq_nic_sei_event *eq_nic_sei = data;7286u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;72877288switch (eq_nic_sei->axi_error_cause) {7289case RXB:7290type = "RXB";7291break;7292case RXE:7293type = "RXE";7294break;7295case TXS:7296type = "TXS";7297break;7298case TXE:7299type = "TXE";7300break;7301case QPC_RESP:7302type = "QPC_RESP";7303break;7304case NON_AXI_ERR:7305type = "NON_AXI_ERR";7306break;7307case TMR:7308type = "TMR";7309break;7310default:7311dev_err(hdev->dev, "unknown NIC AXI cause %d\n",7312eq_nic_sei->axi_error_cause);7313type = "N/A";7314break;7315}73167317snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,7318eq_nic_sei->id);7319dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",7320event_type, desc);7321}73227323static int gaudi_compute_reset_late_init(struct hl_device *hdev)7324{7325/* GAUDI doesn't support any reset except hard-reset */7326return -EPERM;7327}73287329static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,7330struct hl_eq_hbm_ecc_data *hbm_ecc_data)7331{7332u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;7333int rc = 0;73347335if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &7336CPU_BOOT_DEV_STS0_HBM_ECC_EN) {7337if (!hbm_ecc_data) {7338dev_err(hdev->dev, "No FW ECC data");7339return 0;7340}73417342wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,7343le32_to_cpu(hbm_ecc_data->hbm_ecc_info));7344rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,7345le32_to_cpu(hbm_ecc_data->hbm_ecc_info));7346ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,7347le32_to_cpu(hbm_ecc_data->hbm_ecc_info));7348derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,7349le32_to_cpu(hbm_ecc_data->hbm_ecc_info));7350serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,7351le32_to_cpu(hbm_ecc_data->hbm_ecc_info));7352type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,7353le32_to_cpu(hbm_ecc_data->hbm_ecc_info));7354ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,7355le32_to_cpu(hbm_ecc_data->hbm_ecc_info));73567357dev_err(hdev->dev,7358"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",7359device, ch, wr_par, rd_par, ca_par, serr, derr);7360dev_err(hdev->dev,7361"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",7362device, ch, hbm_ecc_data->first_addr, type,7363hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,7364hbm_ecc_data->dec_cnt);7365return 0;7366}73677368if (hdev->asic_prop.fw_security_enabled) {7369dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");7370return 0;7371}73727373base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;7374for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {7375val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);7376val = (val & 0xFF) | ((val >> 8) & 0xFF);7377if (val) {7378rc = -EIO;7379dev_err(hdev->dev,7380"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",7381device, ch * 2, val & 0x1, (val >> 1) & 0x1,7382(val >> 2) & 0x1, (val >> 3) & 0x1,7383(val >> 4) & 0x1);73847385val2 = RREG32(base + ch * 0x1000 + 0x060);7386dev_err(hdev->dev,7387"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",7388device, ch * 2,7389RREG32(base + ch * 0x1000 + 0x064),7390(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,7391(val2 & 0xFF0000) >> 16,7392(val2 & 0xFF000000) >> 24);7393}73947395val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);7396val = (val & 0xFF) | ((val >> 8) & 0xFF);7397if (val) {7398rc = -EIO;7399dev_err(hdev->dev,7400"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",7401device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,7402(val >> 2) & 0x1, (val >> 3) & 0x1,7403(val >> 4) & 0x1);74047405val2 = RREG32(base + ch * 0x1000 + 0x070);7406dev_err(hdev->dev,7407"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",7408device, ch * 2 + 1,7409RREG32(base + ch * 0x1000 + 0x074),7410(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,7411(val2 & 0xFF0000) >> 16,7412(val2 & 0xFF000000) >> 24);7413}74147415/* Clear interrupts */7416RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);7417RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);7418WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);7419WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);7420RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);7421RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);7422}74237424val = RREG32(base + 0x8F30);7425val2 = RREG32(base + 0x8F34);7426if (val | val2) {7427rc = -EIO;7428dev_err(hdev->dev,7429"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",7430device, val, val2);7431}7432val = RREG32(base + 0x8F40);7433val2 = RREG32(base + 0x8F44);7434if (val | val2) {7435rc = -EIO;7436dev_err(hdev->dev,7437"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",7438device, val, val2);7439}74407441return rc;7442}74437444static int gaudi_hbm_event_to_dev(u16 hbm_event_type)7445{7446switch (hbm_event_type) {7447case GAUDI_EVENT_HBM0_SPI_0:7448case GAUDI_EVENT_HBM0_SPI_1:7449return 0;7450case GAUDI_EVENT_HBM1_SPI_0:7451case GAUDI_EVENT_HBM1_SPI_1:7452return 1;7453case GAUDI_EVENT_HBM2_SPI_0:7454case GAUDI_EVENT_HBM2_SPI_1:7455return 2;7456case GAUDI_EVENT_HBM3_SPI_0:7457case GAUDI_EVENT_HBM3_SPI_1:7458return 3;7459default:7460break;7461}74627463/* Should never happen */7464return 0;7465}74667467static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,7468char *interrupt_name)7469{7470u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;7471bool soft_reset_required = false;74727473tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &7474TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;74757476for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)7477if (tpc_interrupts_cause & BIT(i)) {7478dev_err_ratelimited(hdev->dev,7479"TPC%d_%s interrupt cause: %s\n",7480tpc_id, interrupt_name,7481gaudi_tpc_interrupts_cause[i]);7482/* If this is QM error, we need to soft-reset */7483if (i == 15)7484soft_reset_required = true;7485}74867487/* Clear interrupts */7488WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);74897490return soft_reset_required;7491}74927493static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7494{7495return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;7496}74977498static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7499{7500return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;7501}75027503static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)7504{7505ktime_t zero_time = ktime_set(0, 0);75067507mutex_lock(&hdev->clk_throttling.lock);75087509switch (event_type) {7510case GAUDI_EVENT_FIX_POWER_ENV_S:7511hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;7512hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;7513hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();7514hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;7515dev_info_ratelimited(hdev->dev,7516"Clock throttling due to power consumption\n");7517break;75187519case GAUDI_EVENT_FIX_POWER_ENV_E:7520hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;7521hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();7522dev_info_ratelimited(hdev->dev,7523"Power envelop is safe, back to optimal clock\n");7524break;75257526case GAUDI_EVENT_FIX_THERMAL_ENV_S:7527hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;7528hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;7529hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();7530hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;7531*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7532dev_info_ratelimited(hdev->dev,7533"Clock throttling due to overheating\n");7534break;75357536case GAUDI_EVENT_FIX_THERMAL_ENV_E:7537hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;7538hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();7539*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7540dev_info_ratelimited(hdev->dev,7541"Thermal envelop is safe, back to optimal clock\n");7542break;75437544default:7545dev_err(hdev->dev, "Received invalid clock change event %d\n",7546event_type);7547break;7548}75497550mutex_unlock(&hdev->clk_throttling.lock);7551}75527553static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)7554{7555struct gaudi_device *gaudi = hdev->asic_specific;7556struct hl_info_fw_err_info fw_err_info;7557u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;7558u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);7559u32 fw_fatal_err_flag = 0, flags = 0;7560u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)7561>> EQ_CTL_EVENT_TYPE_SHIFT);7562bool reset_required, reset_direct = false;7563u8 cause;7564int rc;75657566if (event_type >= GAUDI_EVENT_SIZE) {7567dev_err(hdev->dev, "Event type %u exceeds maximum of %u",7568event_type, GAUDI_EVENT_SIZE - 1);7569return;7570}75717572gaudi->events_stat[event_type]++;7573gaudi->events_stat_aggregate[event_type]++;75747575switch (event_type) {7576case GAUDI_EVENT_PCIE_CORE_DERR:7577case GAUDI_EVENT_PCIE_IF_DERR:7578case GAUDI_EVENT_PCIE_PHY_DERR:7579case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:7580case GAUDI_EVENT_MME0_ACC_DERR:7581case GAUDI_EVENT_MME0_SBAB_DERR:7582case GAUDI_EVENT_MME1_ACC_DERR:7583case GAUDI_EVENT_MME1_SBAB_DERR:7584case GAUDI_EVENT_MME2_ACC_DERR:7585case GAUDI_EVENT_MME2_SBAB_DERR:7586case GAUDI_EVENT_MME3_ACC_DERR:7587case GAUDI_EVENT_MME3_SBAB_DERR:7588case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:7589fallthrough;7590case GAUDI_EVENT_CPU_IF_ECC_DERR:7591case GAUDI_EVENT_PSOC_MEM_DERR:7592case GAUDI_EVENT_PSOC_CORESIGHT_DERR:7593case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:7594case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:7595case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:7596case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:7597case GAUDI_EVENT_MMU_DERR:7598case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:7599gaudi_print_irq_info(hdev, event_type, true, &event_mask);7600gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);7601event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7602fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;7603goto reset_device;76047605case GAUDI_EVENT_GIC500:7606case GAUDI_EVENT_AXI_ECC:7607case GAUDI_EVENT_L2_RAM_ECC:7608case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:7609gaudi_print_irq_info(hdev, event_type, false, &event_mask);7610fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;7611event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7612goto reset_device;76137614case GAUDI_EVENT_HBM0_SPI_0:7615case GAUDI_EVENT_HBM1_SPI_0:7616case GAUDI_EVENT_HBM2_SPI_0:7617case GAUDI_EVENT_HBM3_SPI_0:7618gaudi_print_irq_info(hdev, event_type, false, &event_mask);7619gaudi_hbm_read_interrupts(hdev,7620gaudi_hbm_event_to_dev(event_type),7621&eq_entry->hbm_ecc_data);7622fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;7623event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7624goto reset_device;76257626case GAUDI_EVENT_HBM0_SPI_1:7627case GAUDI_EVENT_HBM1_SPI_1:7628case GAUDI_EVENT_HBM2_SPI_1:7629case GAUDI_EVENT_HBM3_SPI_1:7630gaudi_print_irq_info(hdev, event_type, false, &event_mask);7631gaudi_hbm_read_interrupts(hdev,7632gaudi_hbm_event_to_dev(event_type),7633&eq_entry->hbm_ecc_data);7634hl_fw_unmask_irq(hdev, event_type);7635event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7636break;76377638case GAUDI_EVENT_TPC0_DEC:7639case GAUDI_EVENT_TPC1_DEC:7640case GAUDI_EVENT_TPC2_DEC:7641case GAUDI_EVENT_TPC3_DEC:7642case GAUDI_EVENT_TPC4_DEC:7643case GAUDI_EVENT_TPC5_DEC:7644case GAUDI_EVENT_TPC6_DEC:7645case GAUDI_EVENT_TPC7_DEC:7646/* In TPC DEC event, notify on TPC assertion. While there isn't7647* a specific event for assertion yet, the FW generates TPC DEC event.7648* The SW upper layer will inspect an internal mapped area to indicate7649* if the event is a TPC Assertion or a "real" TPC DEC.7650*/7651event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;7652gaudi_print_irq_info(hdev, event_type, true, &event_mask);7653reset_required = gaudi_tpc_read_interrupts(hdev,7654tpc_dec_event_to_tpc_id(event_type),7655"AXI_SLV_DEC_Error");7656event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7657if (reset_required) {7658dev_err(hdev->dev, "reset required due to %s\n",7659gaudi_irq_map_table[event_type].name);76607661reset_direct = true;7662goto reset_device;7663} else {7664hl_fw_unmask_irq(hdev, event_type);7665event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;7666}7667break;76687669case GAUDI_EVENT_TPC0_KRN_ERR:7670case GAUDI_EVENT_TPC1_KRN_ERR:7671case GAUDI_EVENT_TPC2_KRN_ERR:7672case GAUDI_EVENT_TPC3_KRN_ERR:7673case GAUDI_EVENT_TPC4_KRN_ERR:7674case GAUDI_EVENT_TPC5_KRN_ERR:7675case GAUDI_EVENT_TPC6_KRN_ERR:7676case GAUDI_EVENT_TPC7_KRN_ERR:7677gaudi_print_irq_info(hdev, event_type, true, &event_mask);7678reset_required = gaudi_tpc_read_interrupts(hdev,7679tpc_krn_event_to_tpc_id(event_type),7680"KRN_ERR");7681event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7682if (reset_required) {7683dev_err(hdev->dev, "reset required due to %s\n",7684gaudi_irq_map_table[event_type].name);76857686reset_direct = true;7687goto reset_device;7688} else {7689hl_fw_unmask_irq(hdev, event_type);7690event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;7691}7692break;76937694case GAUDI_EVENT_PCIE_CORE_SERR:7695case GAUDI_EVENT_PCIE_IF_SERR:7696case GAUDI_EVENT_PCIE_PHY_SERR:7697case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:7698case GAUDI_EVENT_MME0_ACC_SERR:7699case GAUDI_EVENT_MME0_SBAB_SERR:7700case GAUDI_EVENT_MME1_ACC_SERR:7701case GAUDI_EVENT_MME1_SBAB_SERR:7702case GAUDI_EVENT_MME2_ACC_SERR:7703case GAUDI_EVENT_MME2_SBAB_SERR:7704case GAUDI_EVENT_MME3_ACC_SERR:7705case GAUDI_EVENT_MME3_SBAB_SERR:7706case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:7707case GAUDI_EVENT_CPU_IF_ECC_SERR:7708case GAUDI_EVENT_PSOC_MEM_SERR:7709case GAUDI_EVENT_PSOC_CORESIGHT_SERR:7710case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:7711case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:7712case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:7713case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:7714fallthrough;7715case GAUDI_EVENT_MMU_SERR:7716gaudi_print_irq_info(hdev, event_type, true, &event_mask);7717gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);7718hl_fw_unmask_irq(hdev, event_type);7719event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7720break;77217722case GAUDI_EVENT_PCIE_DEC:7723case GAUDI_EVENT_CPU_AXI_SPLITTER:7724case GAUDI_EVENT_PSOC_AXI_DEC:7725case GAUDI_EVENT_PSOC_PRSTN_FALL:7726gaudi_print_irq_info(hdev, event_type, true, &event_mask);7727hl_fw_unmask_irq(hdev, event_type);7728event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7729break;77307731case GAUDI_EVENT_MMU_PAGE_FAULT:7732case GAUDI_EVENT_MMU_WR_PERM:7733gaudi_print_irq_info(hdev, event_type, true, &event_mask);7734hl_fw_unmask_irq(hdev, event_type);7735event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7736break;77377738case GAUDI_EVENT_MME0_WBC_RSP:7739case GAUDI_EVENT_MME0_SBAB0_RSP:7740case GAUDI_EVENT_MME1_WBC_RSP:7741case GAUDI_EVENT_MME1_SBAB0_RSP:7742case GAUDI_EVENT_MME2_WBC_RSP:7743case GAUDI_EVENT_MME2_SBAB0_RSP:7744case GAUDI_EVENT_MME3_WBC_RSP:7745case GAUDI_EVENT_MME3_SBAB0_RSP:7746case GAUDI_EVENT_RAZWI_OR_ADC:7747case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:7748case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:7749fallthrough;7750case GAUDI_EVENT_NIC0_QM0:7751case GAUDI_EVENT_NIC0_QM1:7752case GAUDI_EVENT_NIC1_QM0:7753case GAUDI_EVENT_NIC1_QM1:7754case GAUDI_EVENT_NIC2_QM0:7755case GAUDI_EVENT_NIC2_QM1:7756case GAUDI_EVENT_NIC3_QM0:7757case GAUDI_EVENT_NIC3_QM1:7758case GAUDI_EVENT_NIC4_QM0:7759case GAUDI_EVENT_NIC4_QM1:7760case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:7761case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:7762gaudi_print_irq_info(hdev, event_type, true, &event_mask);7763gaudi_handle_qman_err(hdev, event_type, &event_mask);7764hl_fw_unmask_irq(hdev, event_type);7765event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);7766break;77677768case GAUDI_EVENT_RAZWI_OR_ADC_SW:7769gaudi_print_irq_info(hdev, event_type, true, &event_mask);7770event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7771goto reset_device;77727773case GAUDI_EVENT_TPC0_BMON_SPMU:7774case GAUDI_EVENT_TPC1_BMON_SPMU:7775case GAUDI_EVENT_TPC2_BMON_SPMU:7776case GAUDI_EVENT_TPC3_BMON_SPMU:7777case GAUDI_EVENT_TPC4_BMON_SPMU:7778case GAUDI_EVENT_TPC5_BMON_SPMU:7779case GAUDI_EVENT_TPC6_BMON_SPMU:7780case GAUDI_EVENT_TPC7_BMON_SPMU:7781case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:7782gaudi_print_irq_info(hdev, event_type, false, &event_mask);7783hl_fw_unmask_irq(hdev, event_type);7784event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7785break;77867787case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:7788gaudi_print_nic_axi_irq_info(hdev, event_type, &data);7789hl_fw_unmask_irq(hdev, event_type);7790event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7791break;77927793case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:7794gaudi_print_irq_info(hdev, event_type, false, &event_mask);7795gaudi_print_sm_sei_info(hdev, event_type,7796&eq_entry->sm_sei_data);7797rc = hl_state_dump(hdev);7798event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7799if (rc)7800dev_err(hdev->dev,7801"Error during system state dump %d\n", rc);7802hl_fw_unmask_irq(hdev, event_type);7803break;78047805case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:7806break;78077808case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:7809gaudi_print_clk_change_info(hdev, event_type, &event_mask);7810hl_fw_unmask_irq(hdev, event_type);7811break;78127813case GAUDI_EVENT_PSOC_GPIO_U16_0:7814cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;7815dev_err(hdev->dev,7816"Received high temp H/W interrupt %d (cause %d)\n",7817event_type, cause);7818event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;7819break;78207821case GAUDI_EVENT_DEV_RESET_REQ:7822gaudi_print_irq_info(hdev, event_type, false, &event_mask);7823event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7824goto reset_device;78257826case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:7827gaudi_print_irq_info(hdev, event_type, false, &event_mask);7828gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);7829event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;7830goto reset_device;78317832case GAUDI_EVENT_FW_ALIVE_S:7833gaudi_print_irq_info(hdev, event_type, false, &event_mask);7834gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);7835fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;7836fw_err_info.event_id = event_type;7837fw_err_info.event_mask = &event_mask;7838hl_handle_fw_err(hdev, &fw_err_info);7839goto reset_device;78407841default:7842dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",7843event_type);7844break;7845}78467847if (event_mask)7848hl_notifier_event_send_all(hdev, event_mask);78497850return;78517852reset_device:7853reset_required = true;78547855if (hdev->asic_prop.fw_security_enabled && !reset_direct) {7856flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;78577858/* notify on device unavailable while the reset triggered by fw */7859event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |7860HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);7861} else if (hdev->hard_reset_on_fw_events) {7862flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;7863event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;7864} else {7865reset_required = false;7866}78677868if (reset_required) {7869/* escalate general hw errors to critical/fatal error */7870if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)7871hl_handle_critical_hw_err(hdev, event_type, &event_mask);78727873hl_device_cond_reset(hdev, flags, event_mask);7874} else {7875hl_fw_unmask_irq(hdev, event_type);7876/* Notification on occurred event needs to be sent although reset is not executed */7877if (event_mask)7878hl_notifier_event_send_all(hdev, event_mask);7879}7880}78817882static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)7883{7884struct gaudi_device *gaudi = hdev->asic_specific;78857886if (aggregate) {7887*size = (u32) sizeof(gaudi->events_stat_aggregate);7888return gaudi->events_stat_aggregate;7889}78907891*size = (u32) sizeof(gaudi->events_stat);7892return gaudi->events_stat;7893}78947895static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)7896{7897struct gaudi_device *gaudi = hdev->asic_specific;7898u32 status, timeout_usec;7899int rc;79007901if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||7902hdev->reset_info.hard_reset_pending)7903return 0;79047905if (hdev->pldm)7906timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;7907else7908timeout_usec = MMU_CONFIG_TIMEOUT_USEC;79097910/* L0 & L1 invalidation */7911WREG32(mmSTLB_INV_PS, 3);7912WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);7913WREG32(mmSTLB_INV_PS, 2);79147915rc = hl_poll_timeout(7916hdev,7917mmSTLB_INV_PS,7918status,7919!status,79201000,7921timeout_usec);79227923WREG32(mmSTLB_INV_SET, 0);79247925return rc;7926}79277928static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,7929bool is_hard, u32 flags,7930u32 asid, u64 va, u64 size)7931{7932/* Treat as invalidate all because there is no range invalidation7933* in Gaudi7934*/7935return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);7936}79377938static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)7939{7940u32 status, timeout_usec;7941int rc;79427943if (hdev->pldm)7944timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;7945else7946timeout_usec = MMU_CONFIG_TIMEOUT_USEC;79477948WREG32(MMU_ASID, asid);7949WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);7950WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);7951WREG32(MMU_BUSY, 0x80000000);79527953rc = hl_poll_timeout(7954hdev,7955MMU_BUSY,7956status,7957!(status & 0x80000000),79581000,7959timeout_usec);79607961if (rc) {7962dev_err(hdev->dev,7963"Timeout during MMU hop0 config of asid %d\n", asid);7964return rc;7965}79667967return 0;7968}79697970static int gaudi_send_heartbeat(struct hl_device *hdev)7971{7972struct gaudi_device *gaudi = hdev->asic_specific;79737974if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))7975return 0;79767977return hl_fw_send_heartbeat(hdev);7978}79797980static int gaudi_cpucp_info_get(struct hl_device *hdev)7981{7982struct gaudi_device *gaudi = hdev->asic_specific;7983struct asic_fixed_properties *prop = &hdev->asic_prop;7984int rc;79857986if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))7987return 0;79887989rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,7990mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,7991mmCPU_BOOT_ERR1);7992if (rc)7993return rc;79947995if (!strlen(prop->cpucp_info.card_name))7996strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,7997CARD_NAME_MAX_LEN);79987999hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);80008001set_default_power_values(hdev);80028003return 0;8004}80058006static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,8007struct engines_data *e)8008{8009struct gaudi_device *gaudi = hdev->asic_specific;8010const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";8011const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";8012const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";8013unsigned long *mask = (unsigned long *)mask_arr;8014u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;8015bool is_idle = true, is_eng_idle, is_slave;8016u64 offset;8017int i, dma_id, port;80188019if (e)8020hl_engine_data_sprintf(e,8021"\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"8022"--- ------- ------------ ---------- -------------\n");80238024for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {8025dma_id = gaudi_dma_assignment[i];8026offset = dma_id * DMA_QMAN_OFFSET;80278028qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);8029qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);8030dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);8031is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&8032IS_DMA_IDLE(dma_core_sts0);8033is_idle &= is_eng_idle;80348035if (mask && !is_eng_idle)8036set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);8037if (e)8038hl_engine_data_sprintf(e, fmt, dma_id,8039is_eng_idle ? "Y" : "N", qm_glbl_sts0,8040qm_cgm_sts, dma_core_sts0);8041}80428043if (e)8044hl_engine_data_sprintf(e,8045"\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"8046"--- ------- ------------ ---------- ----------\n");80478048for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {8049offset = i * TPC_QMAN_OFFSET;8050qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);8051qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);8052tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);8053is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&8054IS_TPC_IDLE(tpc_cfg_sts);8055is_idle &= is_eng_idle;80568057if (mask && !is_eng_idle)8058set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);8059if (e)8060hl_engine_data_sprintf(e, fmt, i,8061is_eng_idle ? "Y" : "N",8062qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);8063}80648065if (e)8066hl_engine_data_sprintf(e,8067"\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"8068"--- ------- ------------ ---------- -----------\n");80698070for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {8071offset = i * MME_QMAN_OFFSET;8072mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);8073is_eng_idle = IS_MME_IDLE(mme_arch_sts);80748075/* MME 1 & 3 are slaves, no need to check their QMANs */8076is_slave = i % 2;8077if (!is_slave) {8078qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);8079qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);8080is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);8081}80828083is_idle &= is_eng_idle;80848085if (mask && !is_eng_idle)8086set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);8087if (e) {8088if (!is_slave)8089hl_engine_data_sprintf(e, fmt, i,8090is_eng_idle ? "Y" : "N",8091qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);8092else8093hl_engine_data_sprintf(e, mme_slave_fmt, i,8094is_eng_idle ? "Y" : "N", "-",8095"-", mme_arch_sts);8096}8097}80988099if (e)8100hl_engine_data_sprintf(e,8101"\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"8102"--- ------- ------------ ----------\n");81038104for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {8105offset = i * NIC_MACRO_QMAN_OFFSET;8106port = 2 * i;8107if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {8108qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);8109qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);8110is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);8111is_idle &= is_eng_idle;81128113if (mask && !is_eng_idle)8114set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);8115if (e)8116hl_engine_data_sprintf(e, nic_fmt, port,8117is_eng_idle ? "Y" : "N",8118qm_glbl_sts0, qm_cgm_sts);8119}81208121port = 2 * i + 1;8122if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {8123qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);8124qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);8125is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);8126is_idle &= is_eng_idle;81278128if (mask && !is_eng_idle)8129set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);8130if (e)8131hl_engine_data_sprintf(e, nic_fmt, port,8132is_eng_idle ? "Y" : "N",8133qm_glbl_sts0, qm_cgm_sts);8134}8135}81368137if (e)8138hl_engine_data_sprintf(e, "\n");81398140return is_idle;8141}81428143static void gaudi_hw_queues_lock(struct hl_device *hdev)8144__acquires(&gaudi->hw_queues_lock)8145{8146struct gaudi_device *gaudi = hdev->asic_specific;81478148spin_lock(&gaudi->hw_queues_lock);8149}81508151static void gaudi_hw_queues_unlock(struct hl_device *hdev)8152__releases(&gaudi->hw_queues_lock)8153{8154struct gaudi_device *gaudi = hdev->asic_specific;81558156spin_unlock(&gaudi->hw_queues_lock);8157}81588159static u32 gaudi_get_pci_id(struct hl_device *hdev)8160{8161return hdev->pdev->device;8162}81638164static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,8165size_t max_size)8166{8167struct gaudi_device *gaudi = hdev->asic_specific;81688169if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))8170return 0;81718172return hl_fw_get_eeprom_data(hdev, data, max_size);8173}81748175static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)8176{8177struct gaudi_device *gaudi = hdev->asic_specific;81788179if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))8180return 0;81818182return hl_fw_get_monitor_dump(hdev, data);8183}81848185/*8186* this function should be used only during initialization and/or after reset,8187* when there are no active users.8188*/8189static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)8190{8191u64 kernel_timeout;8192u32 status, offset;8193int rc;81948195offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);81968197if (hdev->pldm)8198kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;8199else8200kernel_timeout = HL_DEVICE_TIMEOUT_USEC;82018202WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,8203lower_32_bits(tpc_kernel));8204WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,8205upper_32_bits(tpc_kernel));82068207WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,8208lower_32_bits(tpc_kernel));8209WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,8210upper_32_bits(tpc_kernel));8211/* set a valid LUT pointer, content is of no significance */8212WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,8213lower_32_bits(tpc_kernel));8214WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,8215upper_32_bits(tpc_kernel));82168217WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,8218lower_32_bits(CFG_BASE +8219mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));82208221WREG32(mmTPC0_CFG_TPC_CMD + offset,8222(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |82231 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));8224/* wait a bit for the engine to start executing */8225usleep_range(1000, 1500);82268227/* wait until engine has finished executing */8228rc = hl_poll_timeout(8229hdev,8230mmTPC0_CFG_STATUS + offset,8231status,8232(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==8233TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,82341000,8235kernel_timeout);82368237if (rc) {8238dev_err(hdev->dev,8239"Timeout while waiting for TPC%d icache prefetch\n",8240tpc_id);8241return -EIO;8242}82438244WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,82451 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);82468247/* wait a bit for the engine to start executing */8248usleep_range(1000, 1500);82498250/* wait until engine has finished executing */8251rc = hl_poll_timeout(8252hdev,8253mmTPC0_CFG_STATUS + offset,8254status,8255(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==8256TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,82571000,8258kernel_timeout);82598260if (rc) {8261dev_err(hdev->dev,8262"Timeout while waiting for TPC%d vector pipe\n",8263tpc_id);8264return -EIO;8265}82668267rc = hl_poll_timeout(8268hdev,8269mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,8270status,8271(status == 0),82721000,8273kernel_timeout);82748275if (rc) {8276dev_err(hdev->dev,8277"Timeout while waiting for TPC%d kernel to execute\n",8278tpc_id);8279return -EIO;8280}82818282return 0;8283}82848285static int gaudi_internal_cb_pool_init(struct hl_device *hdev,8286struct hl_ctx *ctx)8287{8288struct gaudi_device *gaudi = hdev->asic_specific;8289int min_alloc_order, rc, collective_cb_size;82908291if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))8292return 0;82938294hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,8295HOST_SPACE_INTERNAL_CB_SZ,8296&hdev->internal_cb_pool_dma_addr,8297GFP_KERNEL | __GFP_ZERO);82988299if (!hdev->internal_cb_pool_virt_addr)8300return -ENOMEM;83018302collective_cb_size = sizeof(struct packet_msg_short) * 5 +8303sizeof(struct packet_fence);8304min_alloc_order = ilog2(collective_cb_size);83058306hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);8307if (!hdev->internal_cb_pool) {8308dev_err(hdev->dev,8309"Failed to create internal CB pool\n");8310rc = -ENOMEM;8311goto free_internal_cb_pool;8312}83138314rc = gen_pool_add(hdev->internal_cb_pool,8315(uintptr_t) hdev->internal_cb_pool_virt_addr,8316HOST_SPACE_INTERNAL_CB_SZ, -1);8317if (rc) {8318dev_err(hdev->dev,8319"Failed to add memory to internal CB pool\n");8320rc = -EFAULT;8321goto destroy_internal_cb_pool;8322}83238324hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,8325HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,8326HL_MMU_VA_ALIGNMENT_NOT_NEEDED);83278328if (!hdev->internal_cb_va_base) {8329rc = -ENOMEM;8330goto destroy_internal_cb_pool;8331}83328333mutex_lock(&hdev->mmu_lock);83348335rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,8336hdev->internal_cb_pool_dma_addr,8337HOST_SPACE_INTERNAL_CB_SZ);8338if (rc)8339goto unreserve_internal_cb_pool;83408341rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);8342if (rc)8343goto unmap_internal_cb_pool;83448345mutex_unlock(&hdev->mmu_lock);83468347return 0;83488349unmap_internal_cb_pool:8350hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,8351HOST_SPACE_INTERNAL_CB_SZ);8352unreserve_internal_cb_pool:8353mutex_unlock(&hdev->mmu_lock);8354hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,8355HOST_SPACE_INTERNAL_CB_SZ);8356destroy_internal_cb_pool:8357gen_pool_destroy(hdev->internal_cb_pool);8358free_internal_cb_pool:8359hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,8360hdev->internal_cb_pool_dma_addr);83618362return rc;8363}83648365static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,8366struct hl_ctx *ctx)8367{8368struct gaudi_device *gaudi = hdev->asic_specific;83698370if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))8371return;83728373mutex_lock(&hdev->mmu_lock);8374hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,8375HOST_SPACE_INTERNAL_CB_SZ);8376hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,8377HOST_SPACE_INTERNAL_CB_SZ);8378hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);8379mutex_unlock(&hdev->mmu_lock);83808381gen_pool_destroy(hdev->internal_cb_pool);83828383hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,8384hdev->internal_cb_pool_dma_addr);8385}83868387static int gaudi_ctx_init(struct hl_ctx *ctx)8388{8389int rc;83908391if (ctx->asid == HL_KERNEL_ASID_ID)8392return 0;83938394rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);8395if (rc)8396return rc;83978398rc = gaudi_restore_user_registers(ctx->hdev);8399if (rc)8400gaudi_internal_cb_pool_fini(ctx->hdev, ctx);84018402return rc;8403}84048405static void gaudi_ctx_fini(struct hl_ctx *ctx)8406{8407if (ctx->asid == HL_KERNEL_ASID_ID)8408return;84098410gaudi_internal_cb_pool_fini(ctx->hdev, ctx);8411}84128413static int gaudi_pre_schedule_cs(struct hl_cs *cs)8414{8415return 0;8416}84178418static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)8419{8420return gaudi_cq_assignment[cq_idx];8421}84228423static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)8424{8425return sizeof(struct packet_msg_short) +8426sizeof(struct packet_msg_prot) * 2;8427}84288429static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)8430{8431return sizeof(struct packet_msg_short) * 4 +8432sizeof(struct packet_fence) +8433sizeof(struct packet_msg_prot) * 2;8434}84358436static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)8437{8438return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);8439}84408441static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,8442u32 size, bool eb)8443{8444struct hl_cb *cb = (struct hl_cb *) data;8445struct packet_msg_short *pkt;8446u32 value, ctl, pkt_size = sizeof(*pkt);84478448pkt = cb->kernel_address + size;8449memset(pkt, 0, pkt_size);84508451/* Inc by 1, Mode ADD */8452value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);8453value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);84548455ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);8456ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */8457ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */8458ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);8459ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);8460ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);8461ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);84628463pkt->value = cpu_to_le32(value);8464pkt->ctl = cpu_to_le32(ctl);84658466return size + pkt_size;8467}84688469static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,8470u16 addr)8471{8472u32 ctl, pkt_size = sizeof(*pkt);84738474memset(pkt, 0, pkt_size);84758476ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);8477ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */8478ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);8479ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);8480ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);8481ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */84828483pkt->value = cpu_to_le32(value);8484pkt->ctl = cpu_to_le32(ctl);84858486return pkt_size;8487}84888489static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,8490struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,8491u16 sob_val, u16 mon_id)8492{8493u64 monitor_base;8494u32 ctl, value, pkt_size = sizeof(*pkt);8495u16 msg_addr_offset;8496u8 mask;84978498if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {8499dev_err(hdev->dev,8500"sob_base %u (mask %#x) is not valid\n",8501sob_base, sob_mask);8502return 0;8503}85048505/*8506* monitor_base should be the content of the base0 address registers,8507* so it will be added to the msg short offsets8508*/8509monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;85108511msg_addr_offset =8512(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -8513monitor_base;85148515memset(pkt, 0, pkt_size);85168517/* Monitor config packet: bind the monitor to a sync object */8518value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);8519value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);8520value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,85210); /* GREATER OR EQUAL*/8522value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);85238524ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);8525ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */8526ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */8527ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);8528ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);8529ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);8530ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);85318532pkt->value = cpu_to_le32(value);8533pkt->ctl = cpu_to_le32(ctl);85348535return pkt_size;8536}85378538static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)8539{8540u32 ctl, cfg, pkt_size = sizeof(*pkt);85418542memset(pkt, 0, pkt_size);85438544cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);8545cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);8546cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);85478548ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);8549ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);8550ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);8551ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);85528553pkt->cfg = cpu_to_le32(cfg);8554pkt->ctl = cpu_to_le32(ctl);85558556return pkt_size;8557}85588559static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)8560{8561u32 offset, nic_index;85628563switch (queue_id) {8564case GAUDI_QUEUE_ID_DMA_0_0:8565offset = mmDMA0_QM_CP_FENCE2_RDATA_0;8566break;8567case GAUDI_QUEUE_ID_DMA_0_1:8568offset = mmDMA0_QM_CP_FENCE2_RDATA_1;8569break;8570case GAUDI_QUEUE_ID_DMA_0_2:8571offset = mmDMA0_QM_CP_FENCE2_RDATA_2;8572break;8573case GAUDI_QUEUE_ID_DMA_0_3:8574offset = mmDMA0_QM_CP_FENCE2_RDATA_3;8575break;8576case GAUDI_QUEUE_ID_DMA_1_0:8577offset = mmDMA1_QM_CP_FENCE2_RDATA_0;8578break;8579case GAUDI_QUEUE_ID_DMA_1_1:8580offset = mmDMA1_QM_CP_FENCE2_RDATA_1;8581break;8582case GAUDI_QUEUE_ID_DMA_1_2:8583offset = mmDMA1_QM_CP_FENCE2_RDATA_2;8584break;8585case GAUDI_QUEUE_ID_DMA_1_3:8586offset = mmDMA1_QM_CP_FENCE2_RDATA_3;8587break;8588case GAUDI_QUEUE_ID_DMA_5_0:8589offset = mmDMA5_QM_CP_FENCE2_RDATA_0;8590break;8591case GAUDI_QUEUE_ID_DMA_5_1:8592offset = mmDMA5_QM_CP_FENCE2_RDATA_1;8593break;8594case GAUDI_QUEUE_ID_DMA_5_2:8595offset = mmDMA5_QM_CP_FENCE2_RDATA_2;8596break;8597case GAUDI_QUEUE_ID_DMA_5_3:8598offset = mmDMA5_QM_CP_FENCE2_RDATA_3;8599break;8600case GAUDI_QUEUE_ID_TPC_7_0:8601offset = mmTPC7_QM_CP_FENCE2_RDATA_0;8602break;8603case GAUDI_QUEUE_ID_TPC_7_1:8604offset = mmTPC7_QM_CP_FENCE2_RDATA_1;8605break;8606case GAUDI_QUEUE_ID_TPC_7_2:8607offset = mmTPC7_QM_CP_FENCE2_RDATA_2;8608break;8609case GAUDI_QUEUE_ID_TPC_7_3:8610offset = mmTPC7_QM_CP_FENCE2_RDATA_3;8611break;8612case GAUDI_QUEUE_ID_NIC_0_0:8613case GAUDI_QUEUE_ID_NIC_1_0:8614case GAUDI_QUEUE_ID_NIC_2_0:8615case GAUDI_QUEUE_ID_NIC_3_0:8616case GAUDI_QUEUE_ID_NIC_4_0:8617case GAUDI_QUEUE_ID_NIC_5_0:8618case GAUDI_QUEUE_ID_NIC_6_0:8619case GAUDI_QUEUE_ID_NIC_7_0:8620case GAUDI_QUEUE_ID_NIC_8_0:8621case GAUDI_QUEUE_ID_NIC_9_0:8622nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;8623offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +8624(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +8625(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;8626break;8627case GAUDI_QUEUE_ID_NIC_0_1:8628case GAUDI_QUEUE_ID_NIC_1_1:8629case GAUDI_QUEUE_ID_NIC_2_1:8630case GAUDI_QUEUE_ID_NIC_3_1:8631case GAUDI_QUEUE_ID_NIC_4_1:8632case GAUDI_QUEUE_ID_NIC_5_1:8633case GAUDI_QUEUE_ID_NIC_6_1:8634case GAUDI_QUEUE_ID_NIC_7_1:8635case GAUDI_QUEUE_ID_NIC_8_1:8636case GAUDI_QUEUE_ID_NIC_9_1:8637nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;8638offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +8639(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +8640(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;8641break;8642case GAUDI_QUEUE_ID_NIC_0_2:8643case GAUDI_QUEUE_ID_NIC_1_2:8644case GAUDI_QUEUE_ID_NIC_2_2:8645case GAUDI_QUEUE_ID_NIC_3_2:8646case GAUDI_QUEUE_ID_NIC_4_2:8647case GAUDI_QUEUE_ID_NIC_5_2:8648case GAUDI_QUEUE_ID_NIC_6_2:8649case GAUDI_QUEUE_ID_NIC_7_2:8650case GAUDI_QUEUE_ID_NIC_8_2:8651case GAUDI_QUEUE_ID_NIC_9_2:8652nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;8653offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +8654(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +8655(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;8656break;8657case GAUDI_QUEUE_ID_NIC_0_3:8658case GAUDI_QUEUE_ID_NIC_1_3:8659case GAUDI_QUEUE_ID_NIC_2_3:8660case GAUDI_QUEUE_ID_NIC_3_3:8661case GAUDI_QUEUE_ID_NIC_4_3:8662case GAUDI_QUEUE_ID_NIC_5_3:8663case GAUDI_QUEUE_ID_NIC_6_3:8664case GAUDI_QUEUE_ID_NIC_7_3:8665case GAUDI_QUEUE_ID_NIC_8_3:8666case GAUDI_QUEUE_ID_NIC_9_3:8667nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;8668offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +8669(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +8670(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;8671break;8672default:8673return -EINVAL;8674}86758676*addr = CFG_BASE + offset;86778678return 0;8679}86808681static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)8682{8683u64 monitor_base;8684u32 size = 0;8685u16 msg_addr_offset;86868687/*8688* monitor_base should be the content of the base0 address registers,8689* so it will be added to the msg short offsets8690*/8691monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;86928693/* First monitor config packet: low address of the sync */8694msg_addr_offset =8695(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -8696monitor_base;86978698size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,8699msg_addr_offset);87008701/* Second monitor config packet: high address of the sync */8702msg_addr_offset =8703(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -8704monitor_base;87058706size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),8707msg_addr_offset);87088709/*8710* Third monitor config packet: the payload, i.e. what to write when the8711* sync triggers8712*/8713msg_addr_offset =8714(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -8715monitor_base;87168717size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);87188719return size;8720}87218722static u32 gaudi_gen_wait_cb(struct hl_device *hdev,8723struct hl_gen_wait_properties *prop)8724{8725struct hl_cb *cb = (struct hl_cb *) prop->data;8726void *buf = cb->kernel_address;8727u64 fence_addr = 0;8728u32 size = prop->size;87298730if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {8731dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",8732prop->q_idx);8733return 0;8734}87358736size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);8737size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,8738prop->sob_mask, prop->sob_val, prop->mon_id);8739size += gaudi_add_fence_pkt(buf + size);87408741return size;8742}87438744static void gaudi_reset_sob(struct hl_device *hdev, void *data)8745{8746struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;87478748dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,8749hw_sob->sob_id);87508751WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +8752hw_sob->sob_id * 4, 0);87538754kref_init(&hw_sob->kref);8755}87568757static u64 gaudi_get_device_time(struct hl_device *hdev)8758{8759u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;87608761return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);8762}87638764static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,8765u32 *block_size, u32 *block_id)8766{8767return -EPERM;8768}87698770static int gaudi_block_mmap(struct hl_device *hdev,8771struct vm_area_struct *vma,8772u32 block_id, u32 block_size)8773{8774return -EPERM;8775}87768777static void gaudi_enable_events_from_fw(struct hl_device *hdev)8778{8779struct cpu_dyn_regs *dyn_regs =8780&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;8781u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?8782mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :8783le32_to_cpu(dyn_regs->gic_host_ints_irq);87848785WREG32(irq_handler_offset,8786gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);8787}87888789static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)8790{8791return -EINVAL;8792}87938794static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)8795{8796switch (pll_idx) {8797case HL_GAUDI_CPU_PLL: return CPU_PLL;8798case HL_GAUDI_PCI_PLL: return PCI_PLL;8799case HL_GAUDI_NIC_PLL: return NIC_PLL;8800case HL_GAUDI_DMA_PLL: return DMA_PLL;8801case HL_GAUDI_MESH_PLL: return MESH_PLL;8802case HL_GAUDI_MME_PLL: return MME_PLL;8803case HL_GAUDI_TPC_PLL: return TPC_PLL;8804case HL_GAUDI_IF_PLL: return IF_PLL;8805case HL_GAUDI_SRAM_PLL: return SRAM_PLL;8806case HL_GAUDI_HBM_PLL: return HBM_PLL;8807default: return -EINVAL;8808}8809}88108811static int gaudi_add_sync_to_engine_map_entry(8812struct hl_sync_to_engine_map *map, u32 reg_value,8813enum hl_sync_engine_type engine_type, u32 engine_id)8814{8815struct hl_sync_to_engine_map_entry *entry;88168817/* Reg value represents a partial address of sync object,8818* it is used as unique identifier. For this we need to8819* clear the cutoff cfg base bits from the value.8820*/8821if (reg_value == 0 || reg_value == 0xffffffff)8822return 0;8823reg_value -= lower_32_bits(CFG_BASE);88248825/* create a new hash entry */8826entry = kzalloc(sizeof(*entry), GFP_KERNEL);8827if (!entry)8828return -ENOMEM;8829entry->engine_type = engine_type;8830entry->engine_id = engine_id;8831entry->sync_id = reg_value;8832hash_add(map->tb, &entry->node, reg_value);88338834return 0;8835}88368837static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,8838struct hl_sync_to_engine_map *map)8839{8840struct hl_state_dump_specs *sds = &hdev->state_dump_specs;8841int i, j, rc;8842u32 reg_value;88438844/* Iterate over TPC engines */8845for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {88468847reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +8848sds->props[SP_NEXT_TPC] * i);88498850rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,8851ENGINE_TPC, i);8852if (rc)8853goto free_sync_to_engine_map;8854}88558856/* Iterate over MME engines */8857for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {8858for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {88598860reg_value = RREG32(sds->props[SP_MME_CFG_SO] +8861sds->props[SP_NEXT_MME] * i +8862j * sizeof(u32));88638864rc = gaudi_add_sync_to_engine_map_entry(8865map, reg_value, ENGINE_MME,8866i * sds->props[SP_SUB_MME_ENG_NUM] + j);8867if (rc)8868goto free_sync_to_engine_map;8869}8870}88718872/* Iterate over DMA engines */8873for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {8874reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +8875sds->props[SP_DMA_QUEUES_OFFSET] * i);8876rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,8877ENGINE_DMA, i);8878if (rc)8879goto free_sync_to_engine_map;8880}88818882return 0;88838884free_sync_to_engine_map:8885hl_state_dump_free_sync_to_engine_map(map);88868887return rc;8888}88898890static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)8891{8892return FIELD_GET(8893SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,8894mon->status);8895}88968897static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)8898{8899const size_t max_write = 10;8900u32 gid, mask, sob;8901int i, offset;89028903/* Sync object ID is calculated as follows:8904* (8 * group_id + cleared bits in mask)8905*/8906gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,8907mon->arm_data);8908mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,8909mon->arm_data);89108911for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -8912max_write; mask >>= 1, i++) {8913if (!(mask & 1)) {8914sob = gid * MONITOR_MAX_SOBS + i;89158916if (offset > 0)8917offset += snprintf(sobs + offset, max_write,8918", ");89198920offset += snprintf(sobs + offset, max_write, "%u", sob);8921}8922}8923}89248925static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,8926struct hl_device *hdev,8927struct hl_mon_state_dump *mon)8928{8929const char *name;8930char scratch_buf1[BIN_REG_STRING_SIZE],8931scratch_buf2[BIN_REG_STRING_SIZE];8932char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};89338934name = hl_state_dump_get_monitor_name(hdev, mon);8935if (!name)8936name = "";89378938gaudi_fill_sobs_from_mon(monitored_sobs, mon);89398940return hl_snprintf_resize(8941buf, size, offset,8942"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",8943mon->id, name,8944FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,8945mon->arm_data),8946hl_format_as_binary(8947scratch_buf1, sizeof(scratch_buf1),8948FIELD_GET(8949SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,8950mon->arm_data)),8951FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,8952mon->arm_data),8953mon->wr_data,8954(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,8955hl_format_as_binary(8956scratch_buf2, sizeof(scratch_buf2),8957FIELD_GET(8958SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,8959mon->status)),8960monitored_sobs);8961}896289638964static int gaudi_print_fences_single_engine(8965struct hl_device *hdev, u64 base_offset, u64 status_base_offset,8966enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,8967size_t *size, size_t *offset)8968{8969struct hl_state_dump_specs *sds = &hdev->state_dump_specs;8970int rc = -ENOMEM, i;8971u32 *statuses, *fences;89728973statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],8974sizeof(*statuses), GFP_KERNEL);8975if (!statuses)8976goto out;89778978fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *8979sds->props[SP_ENGINE_NUM_OF_QUEUES],8980sizeof(*fences), GFP_KERNEL);8981if (!fences)8982goto free_status;89838984for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)8985statuses[i] = RREG32(status_base_offset + i * sizeof(u32));89868987for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *8988sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)8989fences[i] = RREG32(base_offset + i * sizeof(u32));89908991/* The actual print */8992for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {8993u32 fence_id;8994u64 fence_cnt, fence_rdata;8995const char *engine_name;89968997if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,8998statuses[i]))8999continue;90009001fence_id =9002FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);9003fence_cnt = base_offset + CFG_BASE +9004sizeof(u32) *9005(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);9006fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +9007sds->props[SP_FENCE0_RDATA_OFFSET];9008engine_name = hl_sync_engine_to_string(engine_type);90099010rc = hl_snprintf_resize(9011buf, size, offset,9012"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",9013engine_name, engine_id,9014i, fence_id,9015fence_cnt, engine_name, engine_id, fence_id, i,9016fence_rdata, engine_name, engine_id, fence_id, i,9017fences[fence_id],9018statuses[i]);9019if (rc)9020goto free_fences;9021}90229023rc = 0;90249025free_fences:9026kfree(fences);9027free_status:9028kfree(statuses);9029out:9030return rc;9031}903290339034static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {9035.monitor_valid = gaudi_monitor_valid,9036.print_single_monitor = gaudi_print_single_monitor,9037.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,9038.print_fences_single_engine = gaudi_print_fences_single_engine,9039};90409041static void gaudi_state_dump_init(struct hl_device *hdev)9042{9043struct hl_state_dump_specs *sds = &hdev->state_dump_specs;9044int i;90459046for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)9047hash_add(sds->so_id_to_str_tb,9048&gaudi_so_id_to_str[i].node,9049gaudi_so_id_to_str[i].id);90509051for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)9052hash_add(sds->monitor_id_to_str_tb,9053&gaudi_monitor_id_to_str[i].node,9054gaudi_monitor_id_to_str[i].id);90559056sds->props = gaudi_state_dump_specs_props;90579058sds->sync_namager_names = gaudi_sync_manager_names;90599060sds->funcs = gaudi_state_dump_funcs;9061}90629063static u32 *gaudi_get_stream_master_qid_arr(void)9064{9065return gaudi_stream_master;9066}90679068static int gaudi_set_dram_properties(struct hl_device *hdev)9069{9070return 0;9071}90729073static int gaudi_set_binning_masks(struct hl_device *hdev)9074{9075return 0;9076}90779078static void gaudi_check_if_razwi_happened(struct hl_device *hdev)9079{9080}90819082static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)9083{9084struct hl_device *hdev = dev_get_drvdata(dev);9085struct cpucp_info *cpucp_info;90869087cpucp_info = &hdev->asic_prop.cpucp_info;90889089return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));9090}90919092static DEVICE_ATTR_RO(infineon_ver);90939094static struct attribute *gaudi_vrm_dev_attrs[] = {9095&dev_attr_infineon_ver.attr,9096NULL,9097};90989099static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,9100struct attribute_group *dev_vrm_attr_grp)9101{9102hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);9103dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;9104}91059106static int gaudi_send_device_activity(struct hl_device *hdev, bool open)9107{9108return 0;9109}91109111static const struct hl_asic_funcs gaudi_funcs = {9112.early_init = gaudi_early_init,9113.early_fini = gaudi_early_fini,9114.late_init = gaudi_late_init,9115.late_fini = gaudi_late_fini,9116.sw_init = gaudi_sw_init,9117.sw_fini = gaudi_sw_fini,9118.hw_init = gaudi_hw_init,9119.hw_fini = gaudi_hw_fini,9120.halt_engines = gaudi_halt_engines,9121.suspend = gaudi_suspend,9122.resume = gaudi_resume,9123.mmap = gaudi_mmap,9124.ring_doorbell = gaudi_ring_doorbell,9125.pqe_write = gaudi_pqe_write,9126.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,9127.asic_dma_free_coherent = gaudi_dma_free_coherent,9128.scrub_device_mem = gaudi_scrub_device_mem,9129.scrub_device_dram = gaudi_scrub_device_dram,9130.get_int_queue_base = gaudi_get_int_queue_base,9131.test_queues = gaudi_test_queues,9132.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,9133.asic_dma_pool_free = gaudi_dma_pool_free,9134.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,9135.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,9136.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,9137.cs_parser = gaudi_cs_parser,9138.dma_map_sgtable = hl_asic_dma_map_sgtable,9139.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,9140.update_eq_ci = gaudi_update_eq_ci,9141.context_switch = gaudi_context_switch,9142.restore_phase_topology = gaudi_restore_phase_topology,9143.debugfs_read_dma = gaudi_debugfs_read_dma,9144.add_device_attr = gaudi_add_device_attr,9145.handle_eqe = gaudi_handle_eqe,9146.get_events_stat = gaudi_get_events_stat,9147.read_pte = gaudi_read_pte,9148.write_pte = gaudi_write_pte,9149.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,9150.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,9151.mmu_prefetch_cache_range = NULL,9152.send_heartbeat = gaudi_send_heartbeat,9153.debug_coresight = gaudi_debug_coresight,9154.is_device_idle = gaudi_is_device_idle,9155.compute_reset_late_init = gaudi_compute_reset_late_init,9156.hw_queues_lock = gaudi_hw_queues_lock,9157.hw_queues_unlock = gaudi_hw_queues_unlock,9158.get_pci_id = gaudi_get_pci_id,9159.get_eeprom_data = gaudi_get_eeprom_data,9160.get_monitor_dump = gaudi_get_monitor_dump,9161.send_cpu_message = gaudi_send_cpu_message,9162.pci_bars_map = gaudi_pci_bars_map,9163.init_iatu = gaudi_init_iatu,9164.rreg = hl_rreg,9165.wreg = hl_wreg,9166.halt_coresight = gaudi_halt_coresight,9167.ctx_init = gaudi_ctx_init,9168.ctx_fini = gaudi_ctx_fini,9169.pre_schedule_cs = gaudi_pre_schedule_cs,9170.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,9171.load_firmware_to_device = gaudi_load_firmware_to_device,9172.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,9173.get_signal_cb_size = gaudi_get_signal_cb_size,9174.get_wait_cb_size = gaudi_get_wait_cb_size,9175.gen_signal_cb = gaudi_gen_signal_cb,9176.gen_wait_cb = gaudi_gen_wait_cb,9177.reset_sob = gaudi_reset_sob,9178.reset_sob_group = gaudi_reset_sob_group,9179.get_device_time = gaudi_get_device_time,9180.pb_print_security_errors = NULL,9181.collective_wait_init_cs = gaudi_collective_wait_init_cs,9182.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,9183.get_dec_base_addr = NULL,9184.scramble_addr = hl_mmu_scramble_addr,9185.descramble_addr = hl_mmu_descramble_addr,9186.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,9187.get_hw_block_id = gaudi_get_hw_block_id,9188.hw_block_mmap = gaudi_block_mmap,9189.enable_events_from_fw = gaudi_enable_events_from_fw,9190.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,9191.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,9192.init_firmware_preload_params = gaudi_init_firmware_preload_params,9193.init_firmware_loader = gaudi_init_firmware_loader,9194.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,9195.state_dump_init = gaudi_state_dump_init,9196.get_sob_addr = gaudi_get_sob_addr,9197.set_pci_memory_regions = gaudi_set_pci_memory_regions,9198.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,9199.check_if_razwi_happened = gaudi_check_if_razwi_happened,9200.mmu_get_real_page_size = hl_mmu_get_real_page_size,9201.access_dev_mem = hl_access_dev_mem,9202.set_dram_bar_base = gaudi_set_hbm_bar_base,9203.send_device_activity = gaudi_send_device_activity,9204.set_dram_properties = gaudi_set_dram_properties,9205.set_binning_masks = gaudi_set_binning_masks,9206};92079208/**9209* gaudi_set_asic_funcs - set GAUDI function pointers9210*9211* @hdev: pointer to hl_device structure9212*9213*/9214void gaudi_set_asic_funcs(struct hl_device *hdev)9215{9216hdev->asic_funcs = &gaudi_funcs;9217}921892199220