Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/accel/amdxdna/npu4_regs.c
52013 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
4
*/
5
6
#include <drm/amdxdna_accel.h>
7
#include <drm/drm_device.h>
8
#include <drm/gpu_scheduler.h>
9
#include <linux/bits.h>
10
#include <linux/sizes.h>
11
12
#include "aie2_pci.h"
13
#include "amdxdna_mailbox.h"
14
#include "amdxdna_pci_drv.h"
15
16
/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
17
#define MPNPU_PWAITMODE 0x301003C
18
#define MPNPU_PUB_SEC_INTR 0x3010060
19
#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
20
#define MPNPU_PUB_SCRATCH0 0x301006C
21
#define MPNPU_PUB_SCRATCH1 0x3010070
22
#define MPNPU_PUB_SCRATCH2 0x3010074
23
#define MPNPU_PUB_SCRATCH3 0x3010078
24
#define MPNPU_PUB_SCRATCH4 0x301007C
25
#define MPNPU_PUB_SCRATCH5 0x3010080
26
#define MPNPU_PUB_SCRATCH6 0x3010084
27
#define MPNPU_PUB_SCRATCH7 0x3010088
28
#define MPNPU_PUB_SCRATCH8 0x301008C
29
#define MPNPU_PUB_SCRATCH9 0x3010090
30
#define MPNPU_PUB_SCRATCH10 0x3010094
31
#define MPNPU_PUB_SCRATCH11 0x3010098
32
#define MPNPU_PUB_SCRATCH12 0x301009C
33
#define MPNPU_PUB_SCRATCH13 0x30100A0
34
#define MPNPU_PUB_SCRATCH14 0x30100A4
35
#define MPNPU_PUB_SCRATCH15 0x30100A8
36
#define MP0_C2PMSG_73 0x3810A24
37
#define MP0_C2PMSG_123 0x3810AEC
38
39
#define MP1_C2PMSG_0 0x3B10900
40
#define MP1_C2PMSG_60 0x3B109F0
41
#define MP1_C2PMSG_61 0x3B109F4
42
43
#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
44
#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
45
#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
46
#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
47
48
#define MMNPU_APERTURE0_BASE 0x3000000
49
#define MMNPU_APERTURE1_BASE 0x3600000
50
#define MMNPU_APERTURE3_BASE 0x3810000
51
#define MMNPU_APERTURE4_BASE 0x3B10000
52
53
/* PCIe BAR Index for NPU4 */
54
#define NPU4_REG_BAR_INDEX 0
55
#define NPU4_MBOX_BAR_INDEX 0
56
#define NPU4_PSP_BAR_INDEX 4
57
#define NPU4_SMU_BAR_INDEX 5
58
#define NPU4_SRAM_BAR_INDEX 2
59
/* Associated BARs and Apertures */
60
#define NPU4_REG_BAR_BASE MMNPU_APERTURE0_BASE
61
#define NPU4_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
62
#define NPU4_PSP_BAR_BASE MMNPU_APERTURE3_BASE
63
#define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE
64
#define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
65
66
const struct rt_config npu4_default_rt_cfg[] = {
67
{ 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
68
{ 10, 1, AIE2_RT_CFG_INIT }, /* DEBUG BUF */
69
{ 14, 0, AIE2_RT_CFG_INIT, BIT_U64(AIE2_PREEMPT) }, /* Frame boundary preemption */
70
{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
71
{ 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
72
{ 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
73
{ 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
74
{ 13, 0, AIE2_RT_CFG_FORCE_PREEMPT },
75
{ 14, 0, AIE2_RT_CFG_FRAME_BOUNDARY_PREEMPT },
76
{ 0 },
77
};
78
79
const struct dpm_clk_freq npu4_dpm_clk_table[] = {
80
{396, 792},
81
{600, 1056},
82
{792, 1152},
83
{975, 1267},
84
{975, 1267},
85
{1056, 1408},
86
{1152, 1584},
87
{1267, 1800},
88
{ 0 }
89
};
90
91
const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
92
{ .major = 6, .min_minor = 12 },
93
{ .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor = 15 },
94
{ .features = BIT_U64(AIE2_PREEMPT), .major = 6, .min_minor = 12 },
95
{ .features = BIT_U64(AIE2_TEMPORAL_ONLY), .major = 6, .min_minor = 12 },
96
{ .features = GENMASK_ULL(AIE2_TEMPORAL_ONLY, AIE2_NPU_COMMAND), .major = 7 },
97
{ 0 }
98
};
99
100
static const struct amdxdna_dev_priv npu4_dev_priv = {
101
.fw_path = "amdnpu/17f0_10/npu.sbin",
102
.rt_config = npu4_default_rt_cfg,
103
.dpm_clk_tbl = npu4_dpm_clk_table,
104
.fw_feature_tbl = npu4_fw_feature_table,
105
.col_align = COL_ALIGN_NATURE,
106
.mbox_dev_addr = NPU4_MBOX_BAR_BASE,
107
.mbox_size = 0, /* Use BAR size */
108
.sram_dev_addr = NPU4_SRAM_BAR_BASE,
109
.hwctx_limit = 16,
110
.sram_offs = {
111
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
112
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
113
},
114
.psp_regs_off = {
115
DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU4_PSP, MP0_C2PMSG_123),
116
DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
117
DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU4_REG, MPNPU_PUB_SCRATCH4),
118
DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU4_REG, MPNPU_PUB_SCRATCH9),
119
DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU4_PSP, MP0_C2PMSG_73),
120
DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123),
121
DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
122
DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU4_REG, MPNPU_PWAITMODE),
123
},
124
.smu_regs_off = {
125
DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU4_SMU, MP1_C2PMSG_0),
126
DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU4_SMU, MP1_C2PMSG_60),
127
DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU4_SMU, MMNPU_APERTURE4_BASE),
128
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
129
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60),
130
},
131
.hw_ops = {
132
.set_dpm = npu4_set_dpm,
133
},
134
};
135
136
const struct amdxdna_dev_info dev_npu4_info = {
137
.reg_bar = NPU4_REG_BAR_INDEX,
138
.mbox_bar = NPU4_MBOX_BAR_INDEX,
139
.sram_bar = NPU4_SRAM_BAR_INDEX,
140
.psp_bar = NPU4_PSP_BAR_INDEX,
141
.smu_bar = NPU4_SMU_BAR_INDEX,
142
.first_col = 0,
143
.dev_mem_buf_shift = 15, /* 32 KiB aligned */
144
.dev_mem_base = AIE2_DEVM_BASE,
145
.dev_mem_size = AIE2_DEVM_SIZE,
146
.vbnv = "RyzenAI-npu4",
147
.device_type = AMDXDNA_DEV_TYPE_KMQ,
148
.dev_priv = &npu4_dev_priv,
149
.ops = &aie2_ops, /* NPU4 can share NPU1's callback */
150
};
151
152