Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/kvm/at.c
50676 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2017 - Linaro Ltd
4
* Author: Jintack Lim <[email protected]>
5
*/
6
7
#include <linux/kvm_host.h>
8
9
#include <asm/esr.h>
10
#include <asm/kvm_hyp.h>
11
#include <asm/kvm_mmu.h>
12
13
static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw)
14
{
15
wr->fst = fst;
16
wr->ptw = s1ptw;
17
wr->s2 = s1ptw;
18
wr->failed = true;
19
}
20
21
#define S1_MMU_DISABLED (-127)
22
23
static int get_ia_size(struct s1_walk_info *wi)
24
{
25
return 64 - wi->txsz;
26
}
27
28
/* Return true if the IPA is out of the OA range */
29
static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
30
{
31
if (wi->pa52bit)
32
return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits));
33
return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
34
}
35
36
static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr)
37
{
38
switch (BIT(wi->pgshift)) {
39
case SZ_64K:
40
default: /* IMPDEF: treat any other value as 64k */
41
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52))
42
return false;
43
return ((wi->regime == TR_EL2 ?
44
FIELD_GET(TCR_EL2_PS_MASK, tcr) :
45
FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110);
46
case SZ_16K:
47
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT))
48
return false;
49
break;
50
case SZ_4K:
51
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT))
52
return false;
53
break;
54
}
55
56
return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS));
57
}
58
59
static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc)
60
{
61
u64 addr;
62
63
if (!wi->pa52bit)
64
return desc & GENMASK_ULL(47, wi->pgshift);
65
66
switch (BIT(wi->pgshift)) {
67
case SZ_4K:
68
case SZ_16K:
69
addr = desc & GENMASK_ULL(49, wi->pgshift);
70
addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50;
71
break;
72
case SZ_64K:
73
default: /* IMPDEF: treat any other value as 64k */
74
addr = desc & GENMASK_ULL(47, wi->pgshift);
75
addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48;
76
break;
77
}
78
79
return addr;
80
}
81
82
/* Return the translation regime that applies to an AT instruction */
83
static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
84
{
85
/*
86
* We only get here from guest EL2, so the translation
87
* regime AT applies to is solely defined by {E2H,TGE}.
88
*/
89
switch (op) {
90
case OP_AT_S1E2R:
91
case OP_AT_S1E2W:
92
case OP_AT_S1E2A:
93
return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
94
default:
95
return (vcpu_el2_e2h_is_set(vcpu) &&
96
vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
97
}
98
}
99
100
static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime)
101
{
102
if (regime == TR_EL10) {
103
if (vcpu_has_nv(vcpu) &&
104
!(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En))
105
return 0;
106
107
return vcpu_read_sys_reg(vcpu, TCR2_EL1);
108
}
109
110
return vcpu_read_sys_reg(vcpu, TCR2_EL2);
111
}
112
113
static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
114
{
115
if (!kvm_has_s1pie(vcpu->kvm))
116
return false;
117
118
/* Abuse TCR2_EL1_PIE and use it for EL2 as well */
119
return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE;
120
}
121
122
static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
123
{
124
u64 val;
125
126
if (!kvm_has_s1poe(vcpu->kvm)) {
127
wi->poe = wi->e0poe = false;
128
return;
129
}
130
131
val = effective_tcr2(vcpu, wi->regime);
132
133
/* Abuse TCR2_EL1_* for EL2 */
134
wi->poe = val & TCR2_EL1_POE;
135
wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE);
136
}
137
138
static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
139
struct s1_walk_result *wr, u64 va)
140
{
141
u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
142
unsigned int stride, x;
143
bool va55, tbi, lva;
144
145
va55 = va & BIT(55);
146
147
if (vcpu_has_nv(vcpu)) {
148
hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
149
wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
150
} else {
151
WARN_ON_ONCE(wi->regime != TR_EL10);
152
wi->s2 = false;
153
hcr = 0;
154
}
155
156
switch (wi->regime) {
157
case TR_EL10:
158
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
159
tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
160
ttbr = (va55 ?
161
vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
162
vcpu_read_sys_reg(vcpu, TTBR0_EL1));
163
break;
164
case TR_EL2:
165
case TR_EL20:
166
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
167
tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
168
ttbr = (va55 ?
169
vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
170
vcpu_read_sys_reg(vcpu, TTBR0_EL2));
171
break;
172
default:
173
BUG();
174
}
175
176
/* Someone was silly enough to encode TG0/TG1 differently */
177
if (va55 && wi->regime != TR_EL2) {
178
wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
179
tg = FIELD_GET(TCR_TG1_MASK, tcr);
180
181
switch (tg << TCR_TG1_SHIFT) {
182
case TCR_TG1_4K:
183
wi->pgshift = 12; break;
184
case TCR_TG1_16K:
185
wi->pgshift = 14; break;
186
case TCR_TG1_64K:
187
default: /* IMPDEF: treat any other value as 64k */
188
wi->pgshift = 16; break;
189
}
190
} else {
191
wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
192
tg = FIELD_GET(TCR_TG0_MASK, tcr);
193
194
switch (tg << TCR_TG0_SHIFT) {
195
case TCR_TG0_4K:
196
wi->pgshift = 12; break;
197
case TCR_TG0_16K:
198
wi->pgshift = 14; break;
199
case TCR_TG0_64K:
200
default: /* IMPDEF: treat any other value as 64k */
201
wi->pgshift = 16; break;
202
}
203
}
204
205
wi->pa52bit = has_52bit_pa(vcpu, wi, tcr);
206
207
ia_bits = get_ia_size(wi);
208
209
/* AArch64.S1StartLevel() */
210
stride = wi->pgshift - 3;
211
wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
212
213
if (wi->regime == TR_EL2 && va55)
214
goto addrsz;
215
216
tbi = (wi->regime == TR_EL2 ?
217
FIELD_GET(TCR_EL2_TBI, tcr) :
218
(va55 ?
219
FIELD_GET(TCR_TBI1, tcr) :
220
FIELD_GET(TCR_TBI0, tcr)));
221
222
if (!tbi && (u64)sign_extend64(va, 55) != va)
223
goto addrsz;
224
225
wi->sh = (wi->regime == TR_EL2 ?
226
FIELD_GET(TCR_EL2_SH0_MASK, tcr) :
227
(va55 ?
228
FIELD_GET(TCR_SH1_MASK, tcr) :
229
FIELD_GET(TCR_SH0_MASK, tcr)));
230
231
va = (u64)sign_extend64(va, 55);
232
233
/* Let's put the MMU disabled case aside immediately */
234
switch (wi->regime) {
235
case TR_EL10:
236
/*
237
* If dealing with the EL1&0 translation regime, 3 things
238
* can disable the S1 translation:
239
*
240
* - HCR_EL2.DC = 1
241
* - HCR_EL2.{E2H,TGE} = {0,1}
242
* - SCTLR_EL1.M = 0
243
*
244
* The TGE part is interesting. If we have decided that this
245
* is EL1&0, then it means that either {E2H,TGE} == {1,0} or
246
* {0,x}, and we only need to test for TGE == 1.
247
*/
248
if (hcr & (HCR_DC | HCR_TGE)) {
249
wr->level = S1_MMU_DISABLED;
250
break;
251
}
252
fallthrough;
253
case TR_EL2:
254
case TR_EL20:
255
if (!(sctlr & SCTLR_ELx_M))
256
wr->level = S1_MMU_DISABLED;
257
break;
258
}
259
260
if (wr->level == S1_MMU_DISABLED) {
261
if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
262
goto addrsz;
263
264
wr->pa = va;
265
return 0;
266
}
267
268
wi->be = sctlr & SCTLR_ELx_EE;
269
270
wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
271
wi->hpd &= (wi->regime == TR_EL2 ?
272
FIELD_GET(TCR_EL2_HPD, tcr) :
273
(va55 ?
274
FIELD_GET(TCR_HPD1, tcr) :
275
FIELD_GET(TCR_HPD0, tcr)));
276
/* R_JHSVW */
277
wi->hpd |= s1pie_enabled(vcpu, wi->regime);
278
279
/* Do we have POE? */
280
compute_s1poe(vcpu, wi);
281
282
/* R_BVXDG */
283
wi->hpd |= (wi->poe || wi->e0poe);
284
285
/* R_PLCGL, R_YXNYW */
286
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
287
if (wi->txsz > 39)
288
goto transfault;
289
} else {
290
if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
291
goto transfault;
292
}
293
294
/* R_GTJBY, R_SXWGM */
295
switch (BIT(wi->pgshift)) {
296
case SZ_4K:
297
case SZ_16K:
298
lva = wi->pa52bit;
299
break;
300
case SZ_64K:
301
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
302
break;
303
}
304
305
if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
306
goto transfault;
307
308
/* R_YYVYV, I_THCZK */
309
if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
310
(va55 && va < GENMASK(63, ia_bits)))
311
goto transfault;
312
313
/* I_ZFSYQ */
314
if (wi->regime != TR_EL2 &&
315
(tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
316
goto transfault;
317
318
/* R_BNDVG and following statements */
319
if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
320
wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
321
goto transfault;
322
323
ps = (wi->regime == TR_EL2 ?
324
FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
325
326
wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit));
327
328
/* Compute minimal alignment */
329
x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
330
331
wi->baddr = ttbr & TTBRx_EL1_BADDR;
332
if (wi->pa52bit) {
333
/*
334
* Force the alignment on 64 bytes for top-level tables
335
* smaller than 8 entries, since TTBR.BADDR[5:2] are used to
336
* store bits [51:48] of the first level of lookup.
337
*/
338
x = max(x, 6);
339
340
wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48;
341
}
342
343
/* R_VPBBF */
344
if (check_output_size(wi->baddr, wi))
345
goto addrsz;
346
347
wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
348
349
wi->ha = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HAFDBS, AF);
350
wi->ha &= (wi->regime == TR_EL2 ?
351
FIELD_GET(TCR_EL2_HA, tcr) :
352
FIELD_GET(TCR_HA, tcr));
353
354
return 0;
355
356
addrsz:
357
/*
358
* Address Size Fault level 0 to indicate it comes from TTBR.
359
* yes, this is an oddity.
360
*/
361
fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false);
362
return -EFAULT;
363
364
transfault:
365
/* Translation Fault on start level */
366
fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false);
367
return -EFAULT;
368
}
369
370
static int kvm_read_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 *desc,
371
struct s1_walk_info *wi)
372
{
373
u64 val;
374
int r;
375
376
r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val));
377
if (r)
378
return r;
379
380
if (wi->be)
381
*desc = be64_to_cpu((__force __be64)val);
382
else
383
*desc = le64_to_cpu((__force __le64)val);
384
385
return 0;
386
}
387
388
static int kvm_swap_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 old, u64 new,
389
struct s1_walk_info *wi)
390
{
391
if (wi->be) {
392
old = (__force u64)cpu_to_be64(old);
393
new = (__force u64)cpu_to_be64(new);
394
} else {
395
old = (__force u64)cpu_to_le64(old);
396
new = (__force u64)cpu_to_le64(new);
397
}
398
399
return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
400
}
401
402
static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
403
struct s1_walk_result *wr, u64 va)
404
{
405
u64 va_top, va_bottom, baddr, desc, new_desc, ipa;
406
struct kvm_s2_trans s2_trans = {};
407
int level, stride, ret;
408
409
level = wi->sl;
410
stride = wi->pgshift - 3;
411
baddr = wi->baddr;
412
413
va_top = get_ia_size(wi) - 1;
414
415
while (1) {
416
u64 index;
417
418
va_bottom = (3 - level) * stride + wi->pgshift;
419
index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
420
421
ipa = baddr | index;
422
423
if (wi->s2) {
424
ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
425
if (ret) {
426
fail_s1_walk(wr,
427
(s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
428
true);
429
return ret;
430
}
431
432
if (!kvm_s2_trans_readable(&s2_trans)) {
433
fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
434
true);
435
436
return -EPERM;
437
}
438
439
ipa = kvm_s2_trans_output(&s2_trans);
440
}
441
442
if (wi->filter) {
443
ret = wi->filter->fn(&(struct s1_walk_context)
444
{
445
.wi = wi,
446
.table_ipa = baddr,
447
.level = level,
448
}, wi->filter->priv);
449
if (ret)
450
return ret;
451
}
452
453
ret = kvm_read_s1_desc(vcpu, ipa, &desc, wi);
454
if (ret) {
455
fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
456
return ret;
457
}
458
459
new_desc = desc;
460
461
/* Invalid descriptor */
462
if (!(desc & BIT(0)))
463
goto transfault;
464
465
/* Block mapping, check validity down the line */
466
if (!(desc & BIT(1)))
467
break;
468
469
/* Page mapping */
470
if (level == 3)
471
break;
472
473
/* Table handling */
474
if (!wi->hpd) {
475
wr->APTable |= FIELD_GET(S1_TABLE_AP, desc);
476
wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
477
wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
478
}
479
480
baddr = desc_to_oa(wi, desc);
481
482
/* Check for out-of-range OA */
483
if (check_output_size(baddr, wi))
484
goto addrsz;
485
486
/* Prepare for next round */
487
va_top = va_bottom - 1;
488
level++;
489
}
490
491
/* Block mapping, check the validity of the level */
492
if (!(desc & BIT(1))) {
493
bool valid_block = false;
494
495
switch (BIT(wi->pgshift)) {
496
case SZ_4K:
497
valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0);
498
break;
499
case SZ_16K:
500
case SZ_64K:
501
valid_block = level == 2 || (wi->pa52bit && level == 1);
502
break;
503
}
504
505
if (!valid_block)
506
goto transfault;
507
}
508
509
baddr = desc_to_oa(wi, desc);
510
if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
511
goto addrsz;
512
513
if (wi->ha)
514
new_desc |= PTE_AF;
515
516
if (new_desc != desc) {
517
if (wi->s2 && !kvm_s2_trans_writable(&s2_trans)) {
518
fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), true);
519
return -EPERM;
520
}
521
522
ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi);
523
if (ret)
524
return ret;
525
526
desc = new_desc;
527
}
528
529
if (!(desc & PTE_AF)) {
530
fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
531
return -EACCES;
532
}
533
534
va_bottom += contiguous_bit_shift(desc, wi, level);
535
536
wr->failed = false;
537
wr->level = level;
538
wr->desc = desc;
539
wr->pa = baddr & GENMASK(52, va_bottom);
540
wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
541
542
wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
543
if (wr->nG) {
544
u64 asid_ttbr, tcr;
545
546
switch (wi->regime) {
547
case TR_EL10:
548
tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
549
asid_ttbr = ((tcr & TCR_A1) ?
550
vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
551
vcpu_read_sys_reg(vcpu, TTBR0_EL1));
552
break;
553
case TR_EL20:
554
tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
555
asid_ttbr = ((tcr & TCR_A1) ?
556
vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
557
vcpu_read_sys_reg(vcpu, TTBR0_EL2));
558
break;
559
default:
560
BUG();
561
}
562
563
wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr);
564
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
565
!(tcr & TCR_ASID16))
566
wr->asid &= GENMASK(7, 0);
567
}
568
569
return 0;
570
571
addrsz:
572
fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), false);
573
return -EINVAL;
574
transfault:
575
fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), false);
576
return -ENOENT;
577
}
578
579
struct mmu_config {
580
u64 ttbr0;
581
u64 ttbr1;
582
u64 tcr;
583
u64 mair;
584
u64 tcr2;
585
u64 pir;
586
u64 pire0;
587
u64 por_el0;
588
u64 por_el1;
589
u64 sctlr;
590
u64 vttbr;
591
u64 vtcr;
592
};
593
594
static void __mmu_config_save(struct mmu_config *config)
595
{
596
config->ttbr0 = read_sysreg_el1(SYS_TTBR0);
597
config->ttbr1 = read_sysreg_el1(SYS_TTBR1);
598
config->tcr = read_sysreg_el1(SYS_TCR);
599
config->mair = read_sysreg_el1(SYS_MAIR);
600
if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
601
config->tcr2 = read_sysreg_el1(SYS_TCR2);
602
if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
603
config->pir = read_sysreg_el1(SYS_PIR);
604
config->pire0 = read_sysreg_el1(SYS_PIRE0);
605
}
606
if (system_supports_poe()) {
607
config->por_el1 = read_sysreg_el1(SYS_POR);
608
config->por_el0 = read_sysreg_s(SYS_POR_EL0);
609
}
610
}
611
config->sctlr = read_sysreg_el1(SYS_SCTLR);
612
config->vttbr = read_sysreg(vttbr_el2);
613
config->vtcr = read_sysreg(vtcr_el2);
614
}
615
616
static void __mmu_config_restore(struct mmu_config *config)
617
{
618
/*
619
* ARM errata 1165522 and 1530923 require TGE to be 1 before
620
* we update the guest state.
621
*/
622
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
623
624
write_sysreg_el1(config->ttbr0, SYS_TTBR0);
625
write_sysreg_el1(config->ttbr1, SYS_TTBR1);
626
write_sysreg_el1(config->tcr, SYS_TCR);
627
write_sysreg_el1(config->mair, SYS_MAIR);
628
if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
629
write_sysreg_el1(config->tcr2, SYS_TCR2);
630
if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
631
write_sysreg_el1(config->pir, SYS_PIR);
632
write_sysreg_el1(config->pire0, SYS_PIRE0);
633
}
634
if (system_supports_poe()) {
635
write_sysreg_el1(config->por_el1, SYS_POR);
636
write_sysreg_s(config->por_el0, SYS_POR_EL0);
637
}
638
}
639
write_sysreg_el1(config->sctlr, SYS_SCTLR);
640
write_sysreg(config->vttbr, vttbr_el2);
641
write_sysreg(config->vtcr, vtcr_el2);
642
}
643
644
static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
645
{
646
u64 host_pan;
647
bool fail;
648
649
host_pan = read_sysreg_s(SYS_PSTATE_PAN);
650
write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
651
652
switch (op) {
653
case OP_AT_S1E1RP:
654
fail = __kvm_at(OP_AT_S1E1RP, vaddr);
655
break;
656
case OP_AT_S1E1WP:
657
fail = __kvm_at(OP_AT_S1E1WP, vaddr);
658
break;
659
}
660
661
write_sysreg_s(host_pan, SYS_PSTATE_PAN);
662
663
return fail;
664
}
665
666
#define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic)
667
#define MEMATTR_NC 0b0100
668
#define MEMATTR_Wt 0b1000
669
#define MEMATTR_Wb 0b1100
670
#define MEMATTR_WbRaWa 0b1111
671
672
#define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0)
673
674
static u8 s2_memattr_to_attr(u8 memattr)
675
{
676
memattr &= 0b1111;
677
678
switch (memattr) {
679
case 0b0000:
680
case 0b0001:
681
case 0b0010:
682
case 0b0011:
683
return memattr << 2;
684
case 0b0100:
685
return MEMATTR(Wb, Wb);
686
case 0b0101:
687
return MEMATTR(NC, NC);
688
case 0b0110:
689
return MEMATTR(Wt, NC);
690
case 0b0111:
691
return MEMATTR(Wb, NC);
692
case 0b1000:
693
/* Reserved, assume NC */
694
return MEMATTR(NC, NC);
695
case 0b1001:
696
return MEMATTR(NC, Wt);
697
case 0b1010:
698
return MEMATTR(Wt, Wt);
699
case 0b1011:
700
return MEMATTR(Wb, Wt);
701
case 0b1100:
702
/* Reserved, assume NC */
703
return MEMATTR(NC, NC);
704
case 0b1101:
705
return MEMATTR(NC, Wb);
706
case 0b1110:
707
return MEMATTR(Wt, Wb);
708
case 0b1111:
709
return MEMATTR(Wb, Wb);
710
default:
711
unreachable();
712
}
713
}
714
715
static u8 combine_s1_s2_attr(u8 s1, u8 s2)
716
{
717
bool transient;
718
u8 final = 0;
719
720
/* Upgrade transient s1 to non-transient to simplify things */
721
switch (s1) {
722
case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */
723
transient = true;
724
s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
725
break;
726
case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */
727
transient = true;
728
s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
729
break;
730
default:
731
transient = false;
732
}
733
734
/* S2CombineS1AttrHints() */
735
if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
736
(s2 & GENMASK(3, 2)) == MEMATTR_NC)
737
final = MEMATTR_NC;
738
else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
739
(s2 & GENMASK(3, 2)) == MEMATTR_Wt)
740
final = MEMATTR_Wt;
741
else
742
final = MEMATTR_Wb;
743
744
if (final != MEMATTR_NC) {
745
/* Inherit RaWa hints form S1 */
746
if (transient) {
747
switch (s1 & GENMASK(3, 2)) {
748
case MEMATTR_Wt:
749
final = 0;
750
break;
751
case MEMATTR_Wb:
752
final = MEMATTR_NC;
753
break;
754
}
755
}
756
757
final |= s1 & GENMASK(1, 0);
758
}
759
760
return final;
761
}
762
763
#define ATTR_NSH 0b00
764
#define ATTR_RSV 0b01
765
#define ATTR_OSH 0b10
766
#define ATTR_ISH 0b11
767
768
static u8 compute_final_sh(u8 attr, u8 sh)
769
{
770
/* Any form of device, as well as NC has SH[1:0]=0b10 */
771
if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
772
return ATTR_OSH;
773
774
if (sh == ATTR_RSV) /* Reserved, mapped to NSH */
775
sh = ATTR_NSH;
776
777
return sh;
778
}
779
780
static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr,
781
u8 attr)
782
{
783
u8 sh;
784
785
/*
786
* non-52bit and LPA have their basic shareability described in the
787
* descriptor. LPA2 gets it from the corresponding field in TCR,
788
* conveniently recorded in the walk info.
789
*/
790
if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K)
791
sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc);
792
else
793
sh = wi->sh;
794
795
return compute_final_sh(attr, sh);
796
}
797
798
static u8 combine_sh(u8 s1_sh, u8 s2_sh)
799
{
800
if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
801
return ATTR_OSH;
802
if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
803
return ATTR_ISH;
804
805
return ATTR_NSH;
806
}
807
808
static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
809
struct kvm_s2_trans *tr)
810
{
811
u8 s1_parattr, s2_memattr, final_attr, s2_sh;
812
u64 par;
813
814
/* If S2 has failed to translate, report the damage */
815
if (tr->esr) {
816
par = SYS_PAR_EL1_RES1;
817
par |= SYS_PAR_EL1_F;
818
par |= SYS_PAR_EL1_S;
819
par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
820
return par;
821
}
822
823
s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
824
s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
825
826
if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
827
if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
828
s2_memattr &= ~BIT(3);
829
830
/* Combination of R_VRJSW and R_RHWZM */
831
switch (s2_memattr) {
832
case 0b0101:
833
if (MEMATTR_IS_DEVICE(s1_parattr))
834
final_attr = s1_parattr;
835
else
836
final_attr = MEMATTR(NC, NC);
837
break;
838
case 0b0110:
839
case 0b1110:
840
final_attr = MEMATTR(WbRaWa, WbRaWa);
841
break;
842
case 0b0111:
843
case 0b1111:
844
/* Preserve S1 attribute */
845
final_attr = s1_parattr;
846
break;
847
case 0b0100:
848
case 0b1100:
849
case 0b1101:
850
/* Reserved, do something non-silly */
851
final_attr = s1_parattr;
852
break;
853
default:
854
/*
855
* MemAttr[2]=0, Device from S2.
856
*
857
* FWB does not influence the way that stage 1
858
* memory types and attributes are combined
859
* with stage 2 Device type and attributes.
860
*/
861
final_attr = min(s2_memattr_to_attr(s2_memattr),
862
s1_parattr);
863
}
864
} else {
865
/* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
866
u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
867
868
if (MEMATTR_IS_DEVICE(s1_parattr) ||
869
MEMATTR_IS_DEVICE(s2_parattr)) {
870
final_attr = min(s1_parattr, s2_parattr);
871
} else {
872
/* At this stage, this is memory vs memory */
873
final_attr = combine_s1_s2_attr(s1_parattr & 0xf,
874
s2_parattr & 0xf);
875
final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
876
s2_parattr >> 4) << 4;
877
}
878
}
879
880
if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
881
!MEMATTR_IS_DEVICE(final_attr))
882
final_attr = MEMATTR(NC, NC);
883
884
s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc);
885
886
par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
887
par |= tr->output & GENMASK(47, 12);
888
par |= FIELD_PREP(SYS_PAR_EL1_SH,
889
combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
890
compute_final_sh(final_attr, s2_sh)));
891
892
return par;
893
}
894
895
static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
896
struct s1_walk_result *wr)
897
{
898
u64 par;
899
900
if (wr->failed) {
901
par = SYS_PAR_EL1_RES1;
902
par |= SYS_PAR_EL1_F;
903
par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
904
par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
905
par |= wr->s2 ? SYS_PAR_EL1_S : 0;
906
} else if (wr->level == S1_MMU_DISABLED) {
907
/* MMU off or HCR_EL2.DC == 1 */
908
par = SYS_PAR_EL1_NSE;
909
par |= wr->pa & SYS_PAR_EL1_PA;
910
911
if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) &&
912
(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
913
par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
914
MEMATTR(WbRaWa, WbRaWa));
915
par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
916
} else {
917
par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
918
par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
919
}
920
} else {
921
u64 mair, sctlr;
922
u8 sh;
923
924
par = SYS_PAR_EL1_NSE;
925
926
mair = (wi->regime == TR_EL10 ?
927
vcpu_read_sys_reg(vcpu, MAIR_EL1) :
928
vcpu_read_sys_reg(vcpu, MAIR_EL2));
929
930
mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
931
mair &= 0xff;
932
933
sctlr = (wi->regime == TR_EL10 ?
934
vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
935
vcpu_read_sys_reg(vcpu, SCTLR_EL2));
936
937
/* Force NC for memory if SCTLR_ELx.C is clear */
938
if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
939
mair = MEMATTR(NC, NC);
940
941
par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
942
par |= wr->pa & SYS_PAR_EL1_PA;
943
944
sh = compute_s1_sh(wi, wr, mair);
945
par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
946
}
947
948
return par;
949
}
950
951
static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
952
{
953
u64 sctlr;
954
955
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
956
return false;
957
958
if (s1pie_enabled(vcpu, regime))
959
return true;
960
961
if (regime == TR_EL10)
962
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
963
else
964
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
965
966
return sctlr & SCTLR_EL1_EPAN;
967
}
968
969
static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
970
struct s1_walk_info *wi,
971
struct s1_walk_result *wr)
972
{
973
bool wxn;
974
975
/* Non-hierarchical part of AArch64.S1DirectBasePermissions() */
976
if (wi->regime != TR_EL2) {
977
switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) {
978
case 0b00:
979
wr->pr = wr->pw = true;
980
wr->ur = wr->uw = false;
981
break;
982
case 0b01:
983
wr->pr = wr->pw = wr->ur = wr->uw = true;
984
break;
985
case 0b10:
986
wr->pr = true;
987
wr->pw = wr->ur = wr->uw = false;
988
break;
989
case 0b11:
990
wr->pr = wr->ur = true;
991
wr->pw = wr->uw = false;
992
break;
993
}
994
995
/* We don't use px for anything yet, but hey... */
996
wr->px = !((wr->desc & PTE_PXN) || wr->uw);
997
wr->ux = !(wr->desc & PTE_UXN);
998
} else {
999
wr->ur = wr->uw = wr->ux = false;
1000
1001
if (!(wr->desc & PTE_RDONLY)) {
1002
wr->pr = wr->pw = true;
1003
} else {
1004
wr->pr = true;
1005
wr->pw = false;
1006
}
1007
1008
/* XN maps to UXN */
1009
wr->px = !(wr->desc & PTE_UXN);
1010
}
1011
1012
switch (wi->regime) {
1013
case TR_EL2:
1014
case TR_EL20:
1015
wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
1016
break;
1017
case TR_EL10:
1018
wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
1019
break;
1020
}
1021
1022
wr->pwxn = wr->uwxn = wxn;
1023
wr->pov = wi->poe;
1024
wr->uov = wi->e0poe;
1025
}
1026
1027
static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu,
1028
struct s1_walk_info *wi,
1029
struct s1_walk_result *wr)
1030
{
1031
/* Hierarchical part of AArch64.S1DirectBasePermissions() */
1032
if (wi->regime != TR_EL2) {
1033
switch (wr->APTable) {
1034
case 0b00:
1035
break;
1036
case 0b01:
1037
wr->ur = wr->uw = false;
1038
break;
1039
case 0b10:
1040
wr->pw = wr->uw = false;
1041
break;
1042
case 0b11:
1043
wr->pw = wr->ur = wr->uw = false;
1044
break;
1045
}
1046
1047
wr->px &= !wr->PXNTable;
1048
wr->ux &= !wr->UXNTable;
1049
} else {
1050
if (wr->APTable & BIT(1))
1051
wr->pw = false;
1052
1053
/* XN maps to UXN */
1054
wr->px &= !wr->UXNTable;
1055
}
1056
}
1057
1058
#define perm_idx(v, r, i) ((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf)
1059
1060
#define set_priv_perms(wr, r, w, x) \
1061
do { \
1062
(wr)->pr = (r); \
1063
(wr)->pw = (w); \
1064
(wr)->px = (x); \
1065
} while (0)
1066
1067
#define set_unpriv_perms(wr, r, w, x) \
1068
do { \
1069
(wr)->ur = (r); \
1070
(wr)->uw = (w); \
1071
(wr)->ux = (x); \
1072
} while (0)
1073
1074
#define set_priv_wxn(wr, v) \
1075
do { \
1076
(wr)->pwxn = (v); \
1077
} while (0)
1078
1079
#define set_unpriv_wxn(wr, v) \
1080
do { \
1081
(wr)->uwxn = (v); \
1082
} while (0)
1083
1084
/* Similar to AArch64.S1IndirectBasePermissions(), without GCS */
1085
#define set_perms(w, wr, ip) \
1086
do { \
1087
/* R_LLZDZ */ \
1088
switch ((ip)) { \
1089
case 0b0000: \
1090
set_ ## w ## _perms((wr), false, false, false); \
1091
break; \
1092
case 0b0001: \
1093
set_ ## w ## _perms((wr), true , false, false); \
1094
break; \
1095
case 0b0010: \
1096
set_ ## w ## _perms((wr), false, false, true ); \
1097
break; \
1098
case 0b0011: \
1099
set_ ## w ## _perms((wr), true , false, true ); \
1100
break; \
1101
case 0b0100: \
1102
set_ ## w ## _perms((wr), false, false, false); \
1103
break; \
1104
case 0b0101: \
1105
set_ ## w ## _perms((wr), true , true , false); \
1106
break; \
1107
case 0b0110: \
1108
set_ ## w ## _perms((wr), true , true , true ); \
1109
break; \
1110
case 0b0111: \
1111
set_ ## w ## _perms((wr), true , true , true ); \
1112
break; \
1113
case 0b1000: \
1114
set_ ## w ## _perms((wr), true , false, false); \
1115
break; \
1116
case 0b1001: \
1117
set_ ## w ## _perms((wr), true , false, false); \
1118
break; \
1119
case 0b1010: \
1120
set_ ## w ## _perms((wr), true , false, true ); \
1121
break; \
1122
case 0b1011: \
1123
set_ ## w ## _perms((wr), false, false, false); \
1124
break; \
1125
case 0b1100: \
1126
set_ ## w ## _perms((wr), true , true , false); \
1127
break; \
1128
case 0b1101: \
1129
set_ ## w ## _perms((wr), false, false, false); \
1130
break; \
1131
case 0b1110: \
1132
set_ ## w ## _perms((wr), true , true , true ); \
1133
break; \
1134
case 0b1111: \
1135
set_ ## w ## _perms((wr), false, false, false); \
1136
break; \
1137
} \
1138
\
1139
/* R_HJYGR */ \
1140
set_ ## w ## _wxn((wr), ((ip) == 0b0110)); \
1141
\
1142
} while (0)
1143
1144
static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu,
1145
struct s1_walk_info *wi,
1146
struct s1_walk_result *wr)
1147
{
1148
u8 up, pp, idx;
1149
1150
idx = pte_pi_index(wr->desc);
1151
1152
switch (wi->regime) {
1153
case TR_EL10:
1154
pp = perm_idx(vcpu, PIR_EL1, idx);
1155
up = perm_idx(vcpu, PIRE0_EL1, idx);
1156
break;
1157
case TR_EL20:
1158
pp = perm_idx(vcpu, PIR_EL2, idx);
1159
up = perm_idx(vcpu, PIRE0_EL2, idx);
1160
break;
1161
case TR_EL2:
1162
pp = perm_idx(vcpu, PIR_EL2, idx);
1163
up = 0;
1164
break;
1165
}
1166
1167
set_perms(priv, wr, pp);
1168
1169
if (wi->regime != TR_EL2)
1170
set_perms(unpriv, wr, up);
1171
else
1172
set_unpriv_perms(wr, false, false, false);
1173
1174
wr->pov = wi->poe && !(pp & BIT(3));
1175
wr->uov = wi->e0poe && !(up & BIT(3));
1176
1177
/* R_VFPJF */
1178
if (wr->px && wr->uw) {
1179
set_priv_perms(wr, false, false, false);
1180
set_unpriv_perms(wr, false, false, false);
1181
}
1182
}
1183
1184
static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
1185
struct s1_walk_info *wi,
1186
struct s1_walk_result *wr)
1187
{
1188
u8 idx, pov_perms, uov_perms;
1189
1190
idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
1191
1192
if (wr->pov) {
1193
switch (wi->regime) {
1194
case TR_EL10:
1195
pov_perms = perm_idx(vcpu, POR_EL1, idx);
1196
break;
1197
case TR_EL20:
1198
pov_perms = perm_idx(vcpu, POR_EL2, idx);
1199
break;
1200
case TR_EL2:
1201
pov_perms = perm_idx(vcpu, POR_EL2, idx);
1202
break;
1203
}
1204
1205
if (pov_perms & ~POE_RWX)
1206
pov_perms = POE_NONE;
1207
1208
/* R_QXXPC, S1PrivOverflow enabled */
1209
if (wr->pwxn && (pov_perms & POE_X))
1210
pov_perms &= ~POE_W;
1211
1212
wr->pr &= pov_perms & POE_R;
1213
wr->pw &= pov_perms & POE_W;
1214
wr->px &= pov_perms & POE_X;
1215
}
1216
1217
if (wr->uov) {
1218
switch (wi->regime) {
1219
case TR_EL10:
1220
uov_perms = perm_idx(vcpu, POR_EL0, idx);
1221
break;
1222
case TR_EL20:
1223
uov_perms = perm_idx(vcpu, POR_EL0, idx);
1224
break;
1225
case TR_EL2:
1226
uov_perms = 0;
1227
break;
1228
}
1229
1230
if (uov_perms & ~POE_RWX)
1231
uov_perms = POE_NONE;
1232
1233
/* R_NPBXC, S1UnprivOverlay enabled */
1234
if (wr->uwxn && (uov_perms & POE_X))
1235
uov_perms &= ~POE_W;
1236
1237
wr->ur &= uov_perms & POE_R;
1238
wr->uw &= uov_perms & POE_W;
1239
wr->ux &= uov_perms & POE_X;
1240
}
1241
}
1242
1243
static void compute_s1_permissions(struct kvm_vcpu *vcpu,
1244
struct s1_walk_info *wi,
1245
struct s1_walk_result *wr)
1246
{
1247
bool pan;
1248
1249
if (!s1pie_enabled(vcpu, wi->regime))
1250
compute_s1_direct_permissions(vcpu, wi, wr);
1251
else
1252
compute_s1_indirect_permissions(vcpu, wi, wr);
1253
1254
if (!wi->hpd)
1255
compute_s1_hierarchical_permissions(vcpu, wi, wr);
1256
1257
compute_s1_overlay_permissions(vcpu, wi, wr);
1258
1259
/* R_QXXPC, S1PrivOverlay disabled */
1260
if (!wr->pov)
1261
wr->px &= !(wr->pwxn && wr->pw);
1262
1263
/* R_NPBXC, S1UnprivOverlay disabled */
1264
if (!wr->uov)
1265
wr->ux &= !(wr->uwxn && wr->uw);
1266
1267
pan = wi->pan && (wr->ur || wr->uw ||
1268
(pan3_enabled(vcpu, wi->regime) && wr->ux));
1269
wr->pw &= !pan;
1270
wr->pr &= !pan;
1271
}
1272
1273
static int handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr, u64 *par)
1274
{
1275
struct s1_walk_result wr = {};
1276
struct s1_walk_info wi = {};
1277
bool perm_fail = false;
1278
int ret, idx;
1279
1280
wi.regime = compute_translation_regime(vcpu, op);
1281
wi.as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
1282
wi.pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
1283
(*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
1284
1285
ret = setup_s1_walk(vcpu, &wi, &wr, vaddr);
1286
if (ret)
1287
goto compute_par;
1288
1289
if (wr.level == S1_MMU_DISABLED)
1290
goto compute_par;
1291
1292
idx = srcu_read_lock(&vcpu->kvm->srcu);
1293
1294
ret = walk_s1(vcpu, &wi, &wr, vaddr);
1295
1296
srcu_read_unlock(&vcpu->kvm->srcu, idx);
1297
1298
/*
1299
* Race to update a descriptor -- restart the walk.
1300
*/
1301
if (ret == -EAGAIN)
1302
return ret;
1303
if (ret)
1304
goto compute_par;
1305
1306
compute_s1_permissions(vcpu, &wi, &wr);
1307
1308
switch (op) {
1309
case OP_AT_S1E1RP:
1310
case OP_AT_S1E1R:
1311
case OP_AT_S1E2R:
1312
perm_fail = !wr.pr;
1313
break;
1314
case OP_AT_S1E1WP:
1315
case OP_AT_S1E1W:
1316
case OP_AT_S1E2W:
1317
perm_fail = !wr.pw;
1318
break;
1319
case OP_AT_S1E0R:
1320
perm_fail = !wr.ur;
1321
break;
1322
case OP_AT_S1E0W:
1323
perm_fail = !wr.uw;
1324
break;
1325
case OP_AT_S1E1A:
1326
case OP_AT_S1E2A:
1327
break;
1328
default:
1329
BUG();
1330
}
1331
1332
if (perm_fail)
1333
fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
1334
1335
compute_par:
1336
*par = compute_par_s1(vcpu, &wi, &wr);
1337
return 0;
1338
}
1339
1340
/*
1341
* Return the PAR_EL1 value as the result of a valid translation.
1342
*
1343
* If the translation is unsuccessful, the value may only contain
1344
* PAR_EL1.F, and cannot be taken at face value. It isn't an
1345
* indication of the translation having failed, only that the fast
1346
* path did not succeed, *unless* it indicates a S1 permission or
1347
* access fault.
1348
*/
1349
static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1350
{
1351
struct mmu_config config;
1352
struct kvm_s2_mmu *mmu;
1353
bool fail, mmu_cs;
1354
u64 par;
1355
1356
par = SYS_PAR_EL1_F;
1357
1358
/*
1359
* We've trapped, so everything is live on the CPU. As we will
1360
* be switching contexts behind everybody's back, disable
1361
* interrupts while holding the mmu lock.
1362
*/
1363
guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
1364
1365
/*
1366
* If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
1367
* the right one (as we trapped from vEL2). If not, save the
1368
* full MMU context.
1369
*
1370
* We are also guaranteed to be in the correct context if
1371
* we're not in a nested VM.
1372
*/
1373
mmu_cs = (vcpu_has_nv(vcpu) &&
1374
!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)));
1375
if (!mmu_cs)
1376
goto skip_mmu_switch;
1377
1378
/*
1379
* Obtaining the S2 MMU for a L2 is horribly racy, and we may not
1380
* find it (recycled by another vcpu, for example). When this
1381
* happens, admit defeat immediately and use the SW (slow) path.
1382
*/
1383
mmu = lookup_s2_mmu(vcpu);
1384
if (!mmu)
1385
return par;
1386
1387
__mmu_config_save(&config);
1388
1389
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0);
1390
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1);
1391
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR);
1392
write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR);
1393
if (kvm_has_tcr2(vcpu->kvm)) {
1394
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2);
1395
if (kvm_has_s1pie(vcpu->kvm)) {
1396
write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR);
1397
write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0);
1398
}
1399
if (kvm_has_s1poe(vcpu->kvm)) {
1400
write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR);
1401
write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0);
1402
}
1403
}
1404
write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR);
1405
__load_stage2(mmu, mmu->arch);
1406
1407
skip_mmu_switch:
1408
/* Temporarily switch back to guest context */
1409
write_sysreg_hcr(vcpu->arch.hcr_el2);
1410
isb();
1411
1412
switch (op) {
1413
case OP_AT_S1E1RP:
1414
case OP_AT_S1E1WP:
1415
fail = at_s1e1p_fast(vcpu, op, vaddr);
1416
break;
1417
case OP_AT_S1E1R:
1418
fail = __kvm_at(OP_AT_S1E1R, vaddr);
1419
break;
1420
case OP_AT_S1E1W:
1421
fail = __kvm_at(OP_AT_S1E1W, vaddr);
1422
break;
1423
case OP_AT_S1E0R:
1424
fail = __kvm_at(OP_AT_S1E0R, vaddr);
1425
break;
1426
case OP_AT_S1E0W:
1427
fail = __kvm_at(OP_AT_S1E0W, vaddr);
1428
break;
1429
case OP_AT_S1E1A:
1430
fail = __kvm_at(OP_AT_S1E1A, vaddr);
1431
break;
1432
default:
1433
WARN_ON_ONCE(1);
1434
fail = true;
1435
break;
1436
}
1437
1438
if (!fail)
1439
par = read_sysreg_par();
1440
1441
write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
1442
1443
if (mmu_cs)
1444
__mmu_config_restore(&config);
1445
1446
return par;
1447
}
1448
1449
static bool par_check_s1_perm_fault(u64 par)
1450
{
1451
u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1452
1453
return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
1454
!(par & SYS_PAR_EL1_S));
1455
}
1456
1457
static bool par_check_s1_access_fault(u64 par)
1458
{
1459
u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1460
1461
return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS &&
1462
!(par & SYS_PAR_EL1_S));
1463
}
1464
1465
int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1466
{
1467
u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
1468
int ret;
1469
1470
/*
1471
* If PAR_EL1 reports that AT failed on a S1 permission or access
1472
* fault, we know for sure that the PTW was able to walk the S1
1473
* tables and there's nothing else to do.
1474
*
1475
* If AT failed for any other reason, then we must walk the guest S1
1476
* to emulate the instruction.
1477
*/
1478
if ((par & SYS_PAR_EL1_F) &&
1479
!par_check_s1_perm_fault(par) &&
1480
!par_check_s1_access_fault(par)) {
1481
ret = handle_at_slow(vcpu, op, vaddr, &par);
1482
if (ret)
1483
return ret;
1484
}
1485
1486
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1487
return 0;
1488
}
1489
1490
int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1491
{
1492
u64 par;
1493
int ret;
1494
1495
/*
1496
* We've trapped, so everything is live on the CPU. As we will be
1497
* switching context behind everybody's back, disable interrupts...
1498
*/
1499
scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
1500
u64 val, hcr;
1501
bool fail;
1502
1503
val = hcr = read_sysreg(hcr_el2);
1504
val &= ~HCR_TGE;
1505
val |= HCR_VM;
1506
1507
if (!vcpu_el2_e2h_is_set(vcpu))
1508
val |= HCR_NV | HCR_NV1;
1509
1510
write_sysreg_hcr(val);
1511
isb();
1512
1513
par = SYS_PAR_EL1_F;
1514
1515
switch (op) {
1516
case OP_AT_S1E2R:
1517
fail = __kvm_at(OP_AT_S1E1R, vaddr);
1518
break;
1519
case OP_AT_S1E2W:
1520
fail = __kvm_at(OP_AT_S1E1W, vaddr);
1521
break;
1522
case OP_AT_S1E2A:
1523
fail = __kvm_at(OP_AT_S1E1A, vaddr);
1524
break;
1525
default:
1526
WARN_ON_ONCE(1);
1527
fail = true;
1528
}
1529
1530
isb();
1531
1532
if (!fail)
1533
par = read_sysreg_par();
1534
1535
write_sysreg_hcr(hcr);
1536
isb();
1537
}
1538
1539
/* We failed the translation, let's replay it in slow motion */
1540
if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) {
1541
ret = handle_at_slow(vcpu, op, vaddr, &par);
1542
if (ret)
1543
return ret;
1544
}
1545
1546
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1547
return 0;
1548
}
1549
1550
int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1551
{
1552
struct kvm_s2_trans out = {};
1553
u64 ipa, par;
1554
bool write;
1555
int ret;
1556
1557
/* Do the stage-1 translation */
1558
switch (op) {
1559
case OP_AT_S12E1R:
1560
op = OP_AT_S1E1R;
1561
write = false;
1562
break;
1563
case OP_AT_S12E1W:
1564
op = OP_AT_S1E1W;
1565
write = true;
1566
break;
1567
case OP_AT_S12E0R:
1568
op = OP_AT_S1E0R;
1569
write = false;
1570
break;
1571
case OP_AT_S12E0W:
1572
op = OP_AT_S1E0W;
1573
write = true;
1574
break;
1575
default:
1576
WARN_ON_ONCE(1);
1577
return 0;
1578
}
1579
1580
__kvm_at_s1e01(vcpu, op, vaddr);
1581
par = vcpu_read_sys_reg(vcpu, PAR_EL1);
1582
if (par & SYS_PAR_EL1_F)
1583
return 0;
1584
1585
/*
1586
* If we only have a single stage of translation (EL2&0), exit
1587
* early. Same thing if {VM,DC}=={0,0}.
1588
*/
1589
if (compute_translation_regime(vcpu, op) == TR_EL20 ||
1590
!(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
1591
return 0;
1592
1593
/* Do the stage-2 translation */
1594
ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
1595
out.esr = 0;
1596
ret = kvm_walk_nested_s2(vcpu, ipa, &out);
1597
if (ret < 0)
1598
return ret;
1599
1600
/* Check the access permission */
1601
if (!out.esr &&
1602
((!write && !out.readable) || (write && !out.writable)))
1603
out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
1604
1605
par = compute_par_s12(vcpu, par, &out);
1606
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1607
return 0;
1608
}
1609
1610
/*
1611
* Translate a VA for a given EL in a given translation regime, with
1612
* or without PAN. This requires wi->{regime, as_el0, pan} to be
1613
* set. The rest of the wi and wr should be 0-initialised.
1614
*/
1615
int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
1616
struct s1_walk_result *wr, u64 va)
1617
{
1618
int ret;
1619
1620
ret = setup_s1_walk(vcpu, wi, wr, va);
1621
if (ret)
1622
return ret;
1623
1624
if (wr->level == S1_MMU_DISABLED) {
1625
wr->ur = wr->uw = wr->ux = true;
1626
wr->pr = wr->pw = wr->px = true;
1627
} else {
1628
ret = walk_s1(vcpu, wi, wr, va);
1629
if (ret)
1630
return ret;
1631
1632
compute_s1_permissions(vcpu, wi, wr);
1633
}
1634
1635
return 0;
1636
}
1637
1638
struct desc_match {
1639
u64 ipa;
1640
int level;
1641
};
1642
1643
static int match_s1_desc(struct s1_walk_context *ctxt, void *priv)
1644
{
1645
struct desc_match *dm = priv;
1646
u64 ipa = dm->ipa;
1647
1648
/* Use S1 granule alignment */
1649
ipa &= GENMASK(51, ctxt->wi->pgshift);
1650
1651
/* Not the IPA we're looking for? Continue. */
1652
if (ipa != ctxt->table_ipa)
1653
return 0;
1654
1655
/* Note the level and interrupt the walk */
1656
dm->level = ctxt->level;
1657
return -EINTR;
1658
}
1659
1660
int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
1661
{
1662
struct desc_match dm = {
1663
.ipa = ipa,
1664
};
1665
struct s1_walk_info wi = {
1666
.filter = &(struct s1_walk_filter){
1667
.fn = match_s1_desc,
1668
.priv = &dm,
1669
},
1670
.as_el0 = false,
1671
.pan = false,
1672
};
1673
struct s1_walk_result wr = {};
1674
int ret;
1675
1676
if (is_hyp_ctxt(vcpu))
1677
wi.regime = vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
1678
else
1679
wi.regime = TR_EL10;
1680
1681
ret = setup_s1_walk(vcpu, &wi, &wr, va);
1682
if (ret)
1683
return ret;
1684
1685
/* We really expect the S1 MMU to be on here... */
1686
if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) {
1687
*level = 0;
1688
return 0;
1689
}
1690
1691
/* Walk the guest's PT, looking for a match along the way */
1692
ret = walk_s1(vcpu, &wi, &wr, va);
1693
switch (ret) {
1694
case -EINTR:
1695
/* We interrupted the walk on a match, return the level */
1696
*level = dm.level;
1697
return 0;
1698
case 0:
1699
/* The walk completed, we failed to find the entry */
1700
return -ENOENT;
1701
default:
1702
/* Any other error... */
1703
return ret;
1704
}
1705
}
1706
1707
static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
1708
{
1709
u64 tmp = old;
1710
int ret = 0;
1711
1712
uaccess_enable_privileged();
1713
1714
asm volatile(__LSE_PREAMBLE
1715
"1: cas %[old], %[new], %[addr]\n"
1716
"2:\n"
1717
_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret])
1718
: [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret)
1719
: [new] "r" (new)
1720
: "memory");
1721
1722
uaccess_disable_privileged();
1723
1724
if (ret)
1725
return ret;
1726
if (tmp != old)
1727
return -EAGAIN;
1728
1729
return ret;
1730
}
1731
1732
static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new)
1733
{
1734
int ret = 1;
1735
u64 tmp;
1736
1737
uaccess_enable_privileged();
1738
1739
asm volatile("prfm pstl1strm, %[addr]\n"
1740
"1: ldxr %[tmp], %[addr]\n"
1741
"sub %[tmp], %[tmp], %[old]\n"
1742
"cbnz %[tmp], 3f\n"
1743
"2: stlxr %w[ret], %[new], %[addr]\n"
1744
"3:\n"
1745
_ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w[ret])
1746
_ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w[ret])
1747
: [ret] "+r" (ret), [addr] "+Q" (*ptep), [tmp] "=&r" (tmp)
1748
: [old] "r" (old), [new] "r" (new)
1749
: "memory");
1750
1751
uaccess_disable_privileged();
1752
1753
/* STLXR didn't update the descriptor, or the compare failed */
1754
if (ret == 1)
1755
return -EAGAIN;
1756
1757
return ret;
1758
}
1759
1760
int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new)
1761
{
1762
struct kvm_memory_slot *slot;
1763
unsigned long hva;
1764
u64 __user *ptep;
1765
bool writable;
1766
int offset;
1767
gfn_t gfn;
1768
int r;
1769
1770
lockdep_assert(srcu_read_lock_held(&kvm->srcu));
1771
1772
gfn = ipa >> PAGE_SHIFT;
1773
offset = offset_in_page(ipa);
1774
slot = gfn_to_memslot(kvm, gfn);
1775
hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
1776
if (kvm_is_error_hva(hva))
1777
return -EINVAL;
1778
if (!writable)
1779
return -EPERM;
1780
1781
ptep = (u64 __user *)hva + offset;
1782
if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS))
1783
r = __lse_swap_desc(ptep, old, new);
1784
else
1785
r = __llsc_swap_desc(ptep, old, new);
1786
1787
if (r < 0)
1788
return r;
1789
1790
mark_page_dirty_in_slot(kvm, slot, gfn);
1791
return 0;
1792
}
1793
1794