Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/accel/habanalabs/common/mmu/mmu_v2.c
26488 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
/*
4
* Copyright 2016-2020 HabanaLabs, Ltd.
5
* All Rights Reserved.
6
*/
7
8
#include "../habanalabs.h"
9
#include "../../include/hw_ip/mmu/mmu_general.h"
10
#include "../../include/hw_ip/mmu/mmu_v2_0.h"
11
12
#include <linux/slab.h>
13
14
/**
15
* hl_mmu_v2_ctx_init() - initialize a context for using the MMU module.
16
* @ctx: pointer to the context structure to initialize.
17
*
18
* Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
19
* page tables hops related to this context.
20
* Return: 0 on success, non-zero otherwise.
21
*/
22
static int hl_mmu_v2_ctx_init(struct hl_ctx *ctx)
23
{
24
hash_init(ctx->mmu_shadow_hash);
25
26
return 0;
27
}
28
29
/*
30
* hl_mmu_v2_ctx_fini - disable a ctx from using the mmu module
31
*
32
* @ctx: pointer to the context structure
33
*
34
* This function does the following:
35
* - Free any pgts which were not freed yet
36
* - Free the mutex
37
* - Free DRAM default page mapping hops
38
*/
39
static void hl_mmu_v2_ctx_fini(struct hl_ctx *ctx)
40
{
41
struct hl_device *hdev = ctx->hdev;
42
struct pgt_info *pgt_info;
43
struct hlist_node *tmp;
44
int i;
45
46
if (!hash_empty(ctx->mmu_shadow_hash))
47
dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
48
ctx->asid);
49
50
hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
51
dev_err_ratelimited(hdev->dev,
52
"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
53
pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
54
hl_mmu_dr_free_pgt_node(ctx, pgt_info);
55
}
56
}
57
58
static int hl_mmu_v2_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
59
{
60
u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 }, curr_pte,
61
scrambled_virt_addr;
62
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
63
struct hl_device *hdev = ctx->hdev;
64
struct hl_mmu_properties *mmu_prop;
65
bool is_huge = false;
66
int i, hop_last;
67
68
/* device resident in V2 are allowed only for HMMU */
69
if (!is_dram_addr)
70
return -EINVAL;
71
72
mmu_prop = &prop->dmmu;
73
74
hop_last = mmu_prop->num_hops - 1;
75
76
scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
77
78
hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx);
79
hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
80
hop_addr[0], scrambled_virt_addr);
81
if (hop_pte_addr[0] == U64_MAX)
82
return -EFAULT;
83
84
curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0];
85
86
for (i = 1 ; i < mmu_prop->num_hops ; i++) {
87
hop_addr[i] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
88
if (hop_addr[i] == ULLONG_MAX)
89
goto not_mapped;
90
91
hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
92
hop_addr[i], scrambled_virt_addr);
93
if (hop_pte_addr[i] == U64_MAX)
94
return -EFAULT;
95
96
curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i];
97
98
if ((i <= hop_last) && (curr_pte & mmu_prop->last_mask)) {
99
hop_last = i;
100
is_huge = true;
101
break;
102
}
103
}
104
105
if (is_dram_addr && !is_huge) {
106
dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n");
107
return -EFAULT;
108
}
109
110
if (!(curr_pte & PAGE_PRESENT_MASK))
111
goto not_mapped;
112
113
for (i = hop_last ; i > 0 ; i--) {
114
hl_mmu_dr_clear_pte(ctx, hop_pte_addr[i]);
115
if (hl_mmu_dr_put_pte(ctx, hop_addr[i]))
116
goto mapped;
117
}
118
hl_mmu_dr_clear_pte(ctx, hop_pte_addr[0]);
119
120
mapped:
121
return 0;
122
123
not_mapped:
124
dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
125
virt_addr);
126
127
return -EINVAL;
128
}
129
130
static int hl_mmu_v2_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
131
u32 page_size, bool is_dram_addr)
132
{
133
u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 },
134
curr_pte = 0, scrambled_virt_addr, scrambled_phys_addr;
135
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
136
bool hop_new[MMU_ARCH_6_HOPS] = { false };
137
struct hl_device *hdev = ctx->hdev;
138
struct hl_mmu_properties *mmu_prop;
139
int rc, i, hop_last;
140
141
/* device resident in V2 are allowed only for HMMU */
142
if (!is_dram_addr)
143
return -EINVAL;
144
145
mmu_prop = &prop->dmmu;
146
147
hop_last = mmu_prop->num_hops - 1;
148
149
scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
150
scrambled_phys_addr = hdev->asic_funcs->scramble_addr(hdev, phys_addr);
151
152
/* First hop is preallocated therefore it is treated differently */
153
hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx);
154
hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
155
hop_addr[0], scrambled_virt_addr);
156
curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0];
157
158
/* Handle hop1 to hop_last */
159
for (i = 1 ; i <= hop_last ; i++) {
160
hop_addr[i] = hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[i]);
161
if (hop_addr[i] == ULLONG_MAX) {
162
rc = -ENOMEM;
163
goto err;
164
}
165
166
hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
167
hop_addr[i], scrambled_virt_addr);
168
if (hop_pte_addr[i] == U64_MAX) {
169
rc = -EINVAL;
170
goto err;
171
}
172
173
if (!hop_pte_addr[i]) {
174
rc = -EINVAL;
175
goto err;
176
}
177
178
curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i];
179
}
180
181
if (curr_pte & PAGE_PRESENT_MASK) {
182
dev_err(hdev->dev,
183
"mapping already exists for virt_addr 0x%llx\n",
184
virt_addr);
185
186
for (i = 0 ; i <= hop_last ; i++)
187
dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n",
188
i, *(u64 *) (uintptr_t) hop_pte_addr[i],
189
hop_pte_addr[i]);
190
191
rc = -EINVAL;
192
goto err;
193
}
194
195
curr_pte = (scrambled_phys_addr & HOP_PHYS_ADDR_MASK)
196
| mmu_prop->last_mask | PAGE_PRESENT_MASK;
197
198
/* Write the PTEs */
199
hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_last], curr_pte);
200
201
/* for each new hop, add its address to the table of previous-hop */
202
for (i = 1 ; i <= hop_last ; i++) {
203
if (hop_new[i]) {
204
curr_pte = (hop_addr[i] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
205
hl_mmu_dr_write_pte(ctx, hop_pte_addr[i - 1], curr_pte);
206
207
if (i - 1)
208
hl_mmu_dr_get_pte(ctx, hop_addr[i - 1]);
209
}
210
}
211
hl_mmu_dr_get_pte(ctx, hop_addr[hop_last]);
212
213
return 0;
214
215
err:
216
for (i = 1 ; i <= hop_last ; i++)
217
if (hop_new[i] && (hop_addr[i] != U64_MAX))
218
hl_mmu_dr_free_hop(ctx, hop_addr[i]);
219
220
return rc;
221
}
222
223
/*
224
* hl_mmu_v2_swap_out - marks all mapping of the given ctx as swapped out
225
*
226
* @ctx: pointer to the context structure
227
*
228
*/
229
static void hl_mmu_v2_swap_out(struct hl_ctx *ctx)
230
{
231
232
}
233
234
/*
235
* hl_mmu_v2_swap_in - marks all mapping of the given ctx as swapped in
236
*
237
* @ctx: pointer to the context structure
238
*
239
*/
240
static void hl_mmu_v2_swap_in(struct hl_ctx *ctx)
241
{
242
243
}
244
245
static int hl_mmu_v2_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops)
246
{
247
struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
248
struct hl_device *hdev = ctx->hdev;
249
struct hl_mmu_properties *mmu_prop;
250
bool is_dram_addr;
251
int i;
252
253
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
254
prop->dmmu.start_addr,
255
prop->dmmu.end_addr);
256
257
/* device resident in V2 are allowed only for HMMU */
258
if (!is_dram_addr)
259
return -EINVAL;
260
261
mmu_prop = &prop->dmmu;
262
hops->range_type = HL_VA_RANGE_TYPE_DRAM;
263
264
hops->scrambled_vaddr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
265
266
hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);
267
hops->hop_info[0].hop_pte_addr = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
268
hops->hop_info[0].hop_addr,
269
hops->scrambled_vaddr);
270
if (hops->hop_info[0].hop_pte_addr == U64_MAX)
271
return -EFAULT;
272
273
hops->hop_info[0].hop_pte_val = hdev->asic_funcs->read_pte(hdev,
274
hops->hop_info[0].hop_pte_addr);
275
if (hops->hop_info[0].hop_pte_val == U64_MAX)
276
return -EFAULT;
277
278
for (i = 1 ; i < mmu_prop->num_hops ; i++) {
279
hops->hop_info[i].hop_addr =
280
hl_mmu_get_next_hop_addr(ctx, hops->hop_info[i - 1].hop_pte_val);
281
if (hops->hop_info[i].hop_addr == ULLONG_MAX)
282
return -EFAULT;
283
284
hops->hop_info[i].hop_pte_addr =
285
hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
286
hops->hop_info[i].hop_addr,
287
hops->scrambled_vaddr);
288
if (hops->hop_info[i].hop_pte_addr == U64_MAX)
289
return -EFAULT;
290
291
hops->hop_info[i].hop_pte_val =
292
hdev->asic_funcs->read_pte(hdev,
293
hops->hop_info[i].hop_pte_addr);
294
295
if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
296
return -EFAULT;
297
298
if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask)
299
break;
300
}
301
302
/* if passed over all hops then no last hop was found */
303
if (i == mmu_prop->num_hops)
304
return -EFAULT;
305
306
if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
307
return -EFAULT;
308
309
if (hops->scrambled_vaddr != virt_addr)
310
hops->unscrambled_paddr = hdev->asic_funcs->descramble_addr
311
(hdev, hops->hop_info[i].hop_pte_val);
312
else
313
hops->unscrambled_paddr = hops->hop_info[i].hop_pte_val;
314
315
hops->used_hops = i + 1;
316
317
return 0;
318
}
319
320
/*
321
* hl_mmu_v2_prepare - prepare mmu_if for working with mmu v2
322
*
323
* @hdev: pointer to the device structure
324
* @mmu_if: pointer to the mmu interface structure
325
*/
326
void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
327
{
328
mmu->init = hl_mmu_dr_init;
329
mmu->fini = hl_mmu_dr_fini;
330
mmu->ctx_init = hl_mmu_v2_ctx_init;
331
mmu->ctx_fini = hl_mmu_v2_ctx_fini;
332
mmu->map = hl_mmu_v2_map;
333
mmu->unmap = hl_mmu_v2_unmap;
334
mmu->flush = hl_mmu_dr_flush;
335
mmu->swap_out = hl_mmu_v2_swap_out;
336
mmu->swap_in = hl_mmu_v2_swap_in;
337
mmu->get_tlb_info = hl_mmu_v2_get_tlb_info;
338
}
339
340