Path: blob/master/drivers/accel/habanalabs/gaudi/gaudiP.h
26439 views
/* SPDX-License-Identifier: GPL-2.01*2* Copyright 2019-2022 HabanaLabs, Ltd.3* All Rights Reserved.4*5*/67#ifndef GAUDIP_H_8#define GAUDIP_H_910#include <uapi/drm/habanalabs_accel.h>11#include "../common/habanalabs.h"12#include <linux/habanalabs/hl_boot_if.h>13#include "../include/gaudi/gaudi_packets.h"14#include "../include/gaudi/gaudi.h"15#include "../include/gaudi/gaudi_async_events.h"16#include "../include/gaudi/gaudi_fw_if.h"1718#define NUMBER_OF_EXT_HW_QUEUES 819#define NUMBER_OF_CMPLT_QUEUES NUMBER_OF_EXT_HW_QUEUES20#define NUMBER_OF_CPU_HW_QUEUES 121#define NUMBER_OF_INT_HW_QUEUES 10022#define NUMBER_OF_HW_QUEUES (NUMBER_OF_EXT_HW_QUEUES + \23NUMBER_OF_CPU_HW_QUEUES + \24NUMBER_OF_INT_HW_QUEUES)2526/* 10 NIC QMANs, DMA5 QMAN, TPC7 QMAN */27#define NUMBER_OF_COLLECTIVE_QUEUES 1228#define NUMBER_OF_SOBS_IN_GRP 112930#define GAUDI_STREAM_MASTER_ARR_SIZE 83132#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */3334#define GAUDI_MAX_CLK_FREQ 2200000000ull /* 2200 MHz */3536#define MAX_POWER_DEFAULT_PCI 200000 /* 200W */37#define MAX_POWER_DEFAULT_PMC 350000 /* 350W */3839#define DC_POWER_DEFAULT_PCI 60000 /* 60W */40#define DC_POWER_DEFAULT_PMC 60000 /* 60W */4142#define DC_POWER_DEFAULT_PMC_SEC 97000 /* 97W */4344#define GAUDI_CPU_TIMEOUT_USEC 30000000 /* 30s */4546#define TPC_ENABLED_MASK 0xFF4748#define GAUDI_HBM_SIZE_32GB 0x800000000ull49#define GAUDI_HBM_DEVICES 450#define GAUDI_HBM_CHANNELS 851#define GAUDI_HBM_CFG_BASE (mmHBM0_BASE - CFG_BASE)52#define GAUDI_HBM_CFG_OFFSET (mmHBM1_BASE - mmHBM0_BASE)5354#define DMA_MAX_TRANSFER_SIZE U32_MAX5556#define GAUDI_DEFAULT_CARD_NAME "HL205"5758#define GAUDI_MAX_PENDING_CS SZ_16K5960#if !IS_MAX_PENDING_CS_VALID(GAUDI_MAX_PENDING_CS)61#error "GAUDI_MAX_PENDING_CS must be power of 2 and greater than 1"62#endif6364#define PCI_DMA_NUMBER_OF_CHNLS 265#define HBM_DMA_NUMBER_OF_CHNLS 666#define DMA_NUMBER_OF_CHNLS (PCI_DMA_NUMBER_OF_CHNLS + \67HBM_DMA_NUMBER_OF_CHNLS)6869#define MME_NUMBER_OF_SLAVE_ENGINES 270#define MME_NUMBER_OF_ENGINES (MME_NUMBER_OF_MASTER_ENGINES + \71MME_NUMBER_OF_SLAVE_ENGINES)72#define MME_NUMBER_OF_QMANS (MME_NUMBER_OF_MASTER_ENGINES * \73QMAN_STREAMS)7475#define QMAN_STREAMS 476#define PQ_FETCHER_CACHE_SIZE 87778#define DMA_QMAN_OFFSET (mmDMA1_QM_BASE - mmDMA0_QM_BASE)79#define TPC_QMAN_OFFSET (mmTPC1_QM_BASE - mmTPC0_QM_BASE)80#define MME_QMAN_OFFSET (mmMME1_QM_BASE - mmMME0_QM_BASE)81#define NIC_MACRO_QMAN_OFFSET (mmNIC1_QM0_BASE - mmNIC0_QM0_BASE)82#define NIC_ENGINE_QMAN_OFFSET (mmNIC0_QM1_BASE - mmNIC0_QM0_BASE)8384#define TPC_CFG_OFFSET (mmTPC1_CFG_BASE - mmTPC0_CFG_BASE)8586#define DMA_CORE_OFFSET (mmDMA1_CORE_BASE - mmDMA0_CORE_BASE)8788#define QMAN_LDMA_SRC_OFFSET (mmDMA0_CORE_SRC_BASE_LO - mmDMA0_CORE_CFG_0)89#define QMAN_LDMA_DST_OFFSET (mmDMA0_CORE_DST_BASE_LO - mmDMA0_CORE_CFG_0)90#define QMAN_LDMA_SIZE_OFFSET (mmDMA0_CORE_DST_TSIZE_0 - mmDMA0_CORE_CFG_0)9192#define QMAN_CPDMA_SRC_OFFSET (mmDMA0_QM_CQ_PTR_LO_4 - mmDMA0_CORE_CFG_0)93#define QMAN_CPDMA_DST_OFFSET (mmDMA0_CORE_DST_BASE_LO - mmDMA0_CORE_CFG_0)94#define QMAN_CPDMA_SIZE_OFFSET (mmDMA0_QM_CQ_TSIZE_4 - mmDMA0_CORE_CFG_0)9596#define SIF_RTR_CTRL_OFFSET (mmSIF_RTR_CTRL_1_BASE - mmSIF_RTR_CTRL_0_BASE)9798#define NIF_RTR_CTRL_OFFSET (mmNIF_RTR_CTRL_1_BASE - mmNIF_RTR_CTRL_0_BASE)99100#define MME_ACC_OFFSET (mmMME1_ACC_BASE - mmMME0_ACC_BASE)101#define SRAM_BANK_OFFSET (mmSRAM_Y0_X1_RTR_BASE - mmSRAM_Y0_X0_RTR_BASE)102103#define NUM_OF_SOB_IN_BLOCK \104(((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 - \105mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2)106107#define NUM_OF_MONITORS_IN_BLOCK \108(((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 - \109mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0) + 4) >> 2)110111#define MONITOR_MAX_SOBS 8112113/* DRAM Memory Map */114115#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */116#define MMU_PAGE_TABLES_SIZE 0x0BF00000 /* 191MB */117#define MMU_CACHE_MNG_SIZE 0x00100000 /* 1MB */118#define RESERVED 0x04000000 /* 64MB */119120#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE121#define MMU_PAGE_TABLES_ADDR (CPU_FW_IMAGE_ADDR + CPU_FW_IMAGE_SIZE)122#define MMU_CACHE_MNG_ADDR (MMU_PAGE_TABLES_ADDR + MMU_PAGE_TABLES_SIZE)123124#define DRAM_DRIVER_END_ADDR (MMU_CACHE_MNG_ADDR + MMU_CACHE_MNG_SIZE +\125RESERVED)126127#define DRAM_BASE_ADDR_USER 0x20000000128129#if (DRAM_DRIVER_END_ADDR > DRAM_BASE_ADDR_USER)130#error "Driver must reserve no more than 512MB"131#endif132133/* Internal QMANs PQ sizes */134135#define MME_QMAN_LENGTH 1024136#define MME_QMAN_SIZE_IN_BYTES (MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)137138#define HBM_DMA_QMAN_LENGTH 4096139#define HBM_DMA_QMAN_SIZE_IN_BYTES \140(HBM_DMA_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)141142#define TPC_QMAN_LENGTH 1024143#define TPC_QMAN_SIZE_IN_BYTES (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)144145#define NIC_QMAN_LENGTH 4096146#define NIC_QMAN_SIZE_IN_BYTES (NIC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)147148149#define SRAM_USER_BASE_OFFSET GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START150151/* Virtual address space */152#define VA_HOST_SPACE_START 0x1000000000000ull /* 256TB */153#define VA_HOST_SPACE_END 0x3FF8000000000ull /* 1PB - 512GB */154#define VA_HOST_SPACE_SIZE (VA_HOST_SPACE_END - \155VA_HOST_SPACE_START) /* 767TB */156#define HOST_SPACE_INTERNAL_CB_SZ SZ_2M157158#define HW_CAP_PLL BIT(0)159#define HW_CAP_HBM BIT(1)160#define HW_CAP_MMU BIT(2)161#define HW_CAP_MME BIT(3)162#define HW_CAP_CPU BIT(4)163#define HW_CAP_PCI_DMA BIT(5)164#define HW_CAP_MSI BIT(6)165#define HW_CAP_CPU_Q BIT(7)166#define HW_CAP_HBM_DMA BIT(8)167#define HW_CAP_SRAM_SCRAMBLER BIT(10)168#define HW_CAP_HBM_SCRAMBLER BIT(11)169170#define HW_CAP_NIC0 BIT(14)171#define HW_CAP_NIC1 BIT(15)172#define HW_CAP_NIC2 BIT(16)173#define HW_CAP_NIC3 BIT(17)174#define HW_CAP_NIC4 BIT(18)175#define HW_CAP_NIC5 BIT(19)176#define HW_CAP_NIC6 BIT(20)177#define HW_CAP_NIC7 BIT(21)178#define HW_CAP_NIC8 BIT(22)179#define HW_CAP_NIC9 BIT(23)180#define HW_CAP_NIC_MASK GENMASK(23, 14)181#define HW_CAP_NIC_SHIFT 14182183#define HW_CAP_TPC0 BIT(24)184#define HW_CAP_TPC1 BIT(25)185#define HW_CAP_TPC2 BIT(26)186#define HW_CAP_TPC3 BIT(27)187#define HW_CAP_TPC4 BIT(28)188#define HW_CAP_TPC5 BIT(29)189#define HW_CAP_TPC6 BIT(30)190#define HW_CAP_TPC7 BIT(31)191#define HW_CAP_TPC_MASK GENMASK(31, 24)192#define HW_CAP_TPC_SHIFT 24193194#define NEXT_SYNC_OBJ_ADDR_INTERVAL \195(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 - \196mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)197#define NUM_OF_MME_ENGINES 2198#define NUM_OF_MME_SUB_ENGINES 2199#define NUM_OF_TPC_ENGINES 8200#define NUM_OF_DMA_ENGINES 8201#define NUM_OF_QUEUES 5202#define NUM_OF_STREAMS 4203#define NUM_OF_FENCES 4204205206#define GAUDI_CPU_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 39)) >> 39)207#define GAUDI_PCI_TO_CPU_ADDR(addr) \208do { \209(addr) &= ~GENMASK_ULL(49, 39); \210(addr) |= BIT_ULL(39); \211} while (0)212#define GAUDI_CPU_TO_PCI_ADDR(addr, extension) \213do { \214(addr) &= ~GENMASK_ULL(49, 39); \215(addr) |= (u64) (extension) << 39; \216} while (0)217218enum gaudi_dma_channels {219GAUDI_PCI_DMA_1,220GAUDI_PCI_DMA_2,221GAUDI_HBM_DMA_1,222GAUDI_HBM_DMA_2,223GAUDI_HBM_DMA_3,224GAUDI_HBM_DMA_4,225GAUDI_HBM_DMA_5,226GAUDI_HBM_DMA_6,227GAUDI_DMA_MAX228};229230enum gaudi_tpc_mask {231GAUDI_TPC_MASK_TPC0 = 0x01,232GAUDI_TPC_MASK_TPC1 = 0x02,233GAUDI_TPC_MASK_TPC2 = 0x04,234GAUDI_TPC_MASK_TPC3 = 0x08,235GAUDI_TPC_MASK_TPC4 = 0x10,236GAUDI_TPC_MASK_TPC5 = 0x20,237GAUDI_TPC_MASK_TPC6 = 0x40,238GAUDI_TPC_MASK_TPC7 = 0x80,239GAUDI_TPC_MASK_ALL = 0xFF240};241242enum gaudi_nic_mask {243GAUDI_NIC_MASK_NIC0 = 0x01,244GAUDI_NIC_MASK_NIC1 = 0x02,245GAUDI_NIC_MASK_NIC2 = 0x04,246GAUDI_NIC_MASK_NIC3 = 0x08,247GAUDI_NIC_MASK_NIC4 = 0x10,248GAUDI_NIC_MASK_NIC5 = 0x20,249GAUDI_NIC_MASK_NIC6 = 0x40,250GAUDI_NIC_MASK_NIC7 = 0x80,251GAUDI_NIC_MASK_NIC8 = 0x100,252GAUDI_NIC_MASK_NIC9 = 0x200,253GAUDI_NIC_MASK_ALL = 0x3FF254};255256/*257* struct gaudi_hw_sob_group - H/W SOB group info.258* @hdev: habanalabs device structure.259* @kref: refcount of this SOB group. group will reset once refcount is zero.260* @base_sob_id: base sob id of this SOB group.261* @queue_id: id of the queue that waits on this sob group262*/263struct gaudi_hw_sob_group {264struct hl_device *hdev;265struct kref kref;266u32 base_sob_id;267u32 queue_id;268};269270#define NUM_SOB_GROUPS (HL_RSVD_SOBS * QMAN_STREAMS)271/**272* struct gaudi_collective_properties -273* holds all SOB groups and queues info reserved for the collective274* @hw_sob_group: H/W SOB groups.275* @next_sob_group_val: the next value to use for the currently used SOB group.276* @curr_sob_group_idx: the index of the currently used SOB group.277* @mstr_sob_mask: pre-defined masks for collective master monitors278*/279struct gaudi_collective_properties {280struct gaudi_hw_sob_group hw_sob_group[NUM_SOB_GROUPS];281u16 next_sob_group_val[QMAN_STREAMS];282u8 curr_sob_group_idx[QMAN_STREAMS];283u8 mstr_sob_mask[HL_COLLECTIVE_RSVD_MSTR_MONS];284};285286/**287* struct gaudi_internal_qman_info - Internal QMAN information.288* @pq_kernel_addr: Kernel address of the PQ memory area in the host.289* @pq_dma_addr: DMA address of the PQ memory area in the host.290* @pq_size: Size of allocated host memory for PQ.291*/292struct gaudi_internal_qman_info {293void *pq_kernel_addr;294dma_addr_t pq_dma_addr;295size_t pq_size;296};297298/**299* struct gaudi_device - ASIC specific manage structure.300* @cpucp_info_get: get information on device from CPU-CP301* @hw_queues_lock: protects the H/W queues from concurrent access.302* @internal_qmans: Internal QMANs information. The array size is larger than303* the actual number of internal queues because they are not in304* consecutive order.305* @hbm_bar_cur_addr: current address of HBM PCI bar.306* @events: array that holds all event id's307* @events_stat: array that holds histogram of all received events.308* @events_stat_aggregate: same as events_stat but doesn't get cleared on reset309* @hw_cap_initialized: This field contains a bit per H/W engine. When that310* engine is initialized, that bit is set by the driver to311* signal we can use this engine in later code paths.312* Each bit is cleared upon reset of its corresponding H/W313* engine.314* @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an315* 8-bit value so use u8.316*/317struct gaudi_device {318int (*cpucp_info_get)(struct hl_device *hdev);319320/* TODO: remove hw_queues_lock after moving to scheduler code */321spinlock_t hw_queues_lock;322323struct gaudi_internal_qman_info internal_qmans[GAUDI_QUEUE_ID_SIZE];324325struct gaudi_collective_properties collective_props;326327u64 hbm_bar_cur_addr;328329u32 events[GAUDI_EVENT_SIZE];330u32 events_stat[GAUDI_EVENT_SIZE];331u32 events_stat_aggregate[GAUDI_EVENT_SIZE];332u32 hw_cap_initialized;333u8 mmu_cache_inv_pi;334};335336void gaudi_init_security(struct hl_device *hdev);337void gaudi_ack_protection_bits_errors(struct hl_device *hdev);338int gaudi_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data);339void gaudi_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx);340void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid);341342#endif /* GAUDIP_H_ */343344345