Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/kvm/at.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2017 - Linaro Ltd
4
* Author: Jintack Lim <[email protected]>
5
*/
6
7
#include <linux/kvm_host.h>
8
9
#include <asm/esr.h>
10
#include <asm/kvm_hyp.h>
11
#include <asm/kvm_mmu.h>
12
13
static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw)
14
{
15
wr->fst = fst;
16
wr->ptw = s1ptw;
17
wr->s2 = s1ptw;
18
wr->failed = true;
19
}
20
21
#define S1_MMU_DISABLED (-127)
22
23
static int get_ia_size(struct s1_walk_info *wi)
24
{
25
return 64 - wi->txsz;
26
}
27
28
/* Return true if the IPA is out of the OA range */
29
static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
30
{
31
return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
32
}
33
34
/* Return the translation regime that applies to an AT instruction */
35
static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
36
{
37
/*
38
* We only get here from guest EL2, so the translation
39
* regime AT applies to is solely defined by {E2H,TGE}.
40
*/
41
switch (op) {
42
case OP_AT_S1E2R:
43
case OP_AT_S1E2W:
44
case OP_AT_S1E2A:
45
return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
46
break;
47
default:
48
return (vcpu_el2_e2h_is_set(vcpu) &&
49
vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
50
}
51
}
52
53
static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
54
{
55
if (!kvm_has_s1pie(vcpu->kvm))
56
return false;
57
58
switch (regime) {
59
case TR_EL2:
60
case TR_EL20:
61
return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE;
62
case TR_EL10:
63
return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) &&
64
(__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1_PIE);
65
default:
66
BUG();
67
}
68
}
69
70
static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
71
{
72
u64 val;
73
74
if (!kvm_has_s1poe(vcpu->kvm)) {
75
wi->poe = wi->e0poe = false;
76
return;
77
}
78
79
switch (wi->regime) {
80
case TR_EL2:
81
case TR_EL20:
82
val = vcpu_read_sys_reg(vcpu, TCR2_EL2);
83
wi->poe = val & TCR2_EL2_POE;
84
wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE);
85
break;
86
case TR_EL10:
87
if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) {
88
wi->poe = wi->e0poe = false;
89
return;
90
}
91
92
val = __vcpu_sys_reg(vcpu, TCR2_EL1);
93
wi->poe = val & TCR2_EL1_POE;
94
wi->e0poe = val & TCR2_EL1_E0POE;
95
}
96
}
97
98
static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
99
struct s1_walk_result *wr, u64 va)
100
{
101
u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
102
unsigned int stride, x;
103
bool va55, tbi, lva;
104
105
hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
106
107
va55 = va & BIT(55);
108
109
if (wi->regime == TR_EL2 && va55)
110
goto addrsz;
111
112
wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
113
114
switch (wi->regime) {
115
case TR_EL10:
116
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
117
tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
118
ttbr = (va55 ?
119
vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
120
vcpu_read_sys_reg(vcpu, TTBR0_EL1));
121
break;
122
case TR_EL2:
123
case TR_EL20:
124
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
125
tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
126
ttbr = (va55 ?
127
vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
128
vcpu_read_sys_reg(vcpu, TTBR0_EL2));
129
break;
130
default:
131
BUG();
132
}
133
134
tbi = (wi->regime == TR_EL2 ?
135
FIELD_GET(TCR_EL2_TBI, tcr) :
136
(va55 ?
137
FIELD_GET(TCR_TBI1, tcr) :
138
FIELD_GET(TCR_TBI0, tcr)));
139
140
if (!tbi && (u64)sign_extend64(va, 55) != va)
141
goto addrsz;
142
143
va = (u64)sign_extend64(va, 55);
144
145
/* Let's put the MMU disabled case aside immediately */
146
switch (wi->regime) {
147
case TR_EL10:
148
/*
149
* If dealing with the EL1&0 translation regime, 3 things
150
* can disable the S1 translation:
151
*
152
* - HCR_EL2.DC = 1
153
* - HCR_EL2.{E2H,TGE} = {0,1}
154
* - SCTLR_EL1.M = 0
155
*
156
* The TGE part is interesting. If we have decided that this
157
* is EL1&0, then it means that either {E2H,TGE} == {1,0} or
158
* {0,x}, and we only need to test for TGE == 1.
159
*/
160
if (hcr & (HCR_DC | HCR_TGE)) {
161
wr->level = S1_MMU_DISABLED;
162
break;
163
}
164
fallthrough;
165
case TR_EL2:
166
case TR_EL20:
167
if (!(sctlr & SCTLR_ELx_M))
168
wr->level = S1_MMU_DISABLED;
169
break;
170
}
171
172
if (wr->level == S1_MMU_DISABLED) {
173
if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
174
goto addrsz;
175
176
wr->pa = va;
177
return 0;
178
}
179
180
wi->be = sctlr & SCTLR_ELx_EE;
181
182
wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
183
wi->hpd &= (wi->regime == TR_EL2 ?
184
FIELD_GET(TCR_EL2_HPD, tcr) :
185
(va55 ?
186
FIELD_GET(TCR_HPD1, tcr) :
187
FIELD_GET(TCR_HPD0, tcr)));
188
/* R_JHSVW */
189
wi->hpd |= s1pie_enabled(vcpu, wi->regime);
190
191
/* Do we have POE? */
192
compute_s1poe(vcpu, wi);
193
194
/* R_BVXDG */
195
wi->hpd |= (wi->poe || wi->e0poe);
196
197
/* Someone was silly enough to encode TG0/TG1 differently */
198
if (va55) {
199
wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
200
tg = FIELD_GET(TCR_TG1_MASK, tcr);
201
202
switch (tg << TCR_TG1_SHIFT) {
203
case TCR_TG1_4K:
204
wi->pgshift = 12; break;
205
case TCR_TG1_16K:
206
wi->pgshift = 14; break;
207
case TCR_TG1_64K:
208
default: /* IMPDEF: treat any other value as 64k */
209
wi->pgshift = 16; break;
210
}
211
} else {
212
wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
213
tg = FIELD_GET(TCR_TG0_MASK, tcr);
214
215
switch (tg << TCR_TG0_SHIFT) {
216
case TCR_TG0_4K:
217
wi->pgshift = 12; break;
218
case TCR_TG0_16K:
219
wi->pgshift = 14; break;
220
case TCR_TG0_64K:
221
default: /* IMPDEF: treat any other value as 64k */
222
wi->pgshift = 16; break;
223
}
224
}
225
226
/* R_PLCGL, R_YXNYW */
227
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
228
if (wi->txsz > 39)
229
goto transfault_l0;
230
} else {
231
if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
232
goto transfault_l0;
233
}
234
235
/* R_GTJBY, R_SXWGM */
236
switch (BIT(wi->pgshift)) {
237
case SZ_4K:
238
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
239
lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
240
break;
241
case SZ_16K:
242
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
243
lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
244
break;
245
case SZ_64K:
246
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
247
break;
248
}
249
250
if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
251
goto transfault_l0;
252
253
ia_bits = get_ia_size(wi);
254
255
/* R_YYVYV, I_THCZK */
256
if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
257
(va55 && va < GENMASK(63, ia_bits)))
258
goto transfault_l0;
259
260
/* I_ZFSYQ */
261
if (wi->regime != TR_EL2 &&
262
(tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
263
goto transfault_l0;
264
265
/* R_BNDVG and following statements */
266
if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
267
wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
268
goto transfault_l0;
269
270
/* AArch64.S1StartLevel() */
271
stride = wi->pgshift - 3;
272
wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
273
274
ps = (wi->regime == TR_EL2 ?
275
FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
276
277
wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
278
279
/* Compute minimal alignment */
280
x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
281
282
wi->baddr = ttbr & TTBRx_EL1_BADDR;
283
284
/* R_VPBBF */
285
if (check_output_size(wi->baddr, wi))
286
goto addrsz;
287
288
wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
289
290
return 0;
291
292
addrsz: /* Address Size Fault level 0 */
293
fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false);
294
return -EFAULT;
295
296
transfault_l0: /* Translation Fault level 0 */
297
fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false);
298
return -EFAULT;
299
}
300
301
static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
302
struct s1_walk_result *wr, u64 va)
303
{
304
u64 va_top, va_bottom, baddr, desc;
305
int level, stride, ret;
306
307
level = wi->sl;
308
stride = wi->pgshift - 3;
309
baddr = wi->baddr;
310
311
va_top = get_ia_size(wi) - 1;
312
313
while (1) {
314
u64 index, ipa;
315
316
va_bottom = (3 - level) * stride + wi->pgshift;
317
index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
318
319
ipa = baddr | index;
320
321
if (wi->s2) {
322
struct kvm_s2_trans s2_trans = {};
323
324
ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
325
if (ret) {
326
fail_s1_walk(wr,
327
(s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
328
true);
329
return ret;
330
}
331
332
if (!kvm_s2_trans_readable(&s2_trans)) {
333
fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
334
true);
335
336
return -EPERM;
337
}
338
339
ipa = kvm_s2_trans_output(&s2_trans);
340
}
341
342
ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
343
if (ret) {
344
fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
345
return ret;
346
}
347
348
if (wi->be)
349
desc = be64_to_cpu((__force __be64)desc);
350
else
351
desc = le64_to_cpu((__force __le64)desc);
352
353
/* Invalid descriptor */
354
if (!(desc & BIT(0)))
355
goto transfault;
356
357
/* Block mapping, check validity down the line */
358
if (!(desc & BIT(1)))
359
break;
360
361
/* Page mapping */
362
if (level == 3)
363
break;
364
365
/* Table handling */
366
if (!wi->hpd) {
367
wr->APTable |= FIELD_GET(S1_TABLE_AP, desc);
368
wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
369
wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
370
}
371
372
baddr = desc & GENMASK_ULL(47, wi->pgshift);
373
374
/* Check for out-of-range OA */
375
if (check_output_size(baddr, wi))
376
goto addrsz;
377
378
/* Prepare for next round */
379
va_top = va_bottom - 1;
380
level++;
381
}
382
383
/* Block mapping, check the validity of the level */
384
if (!(desc & BIT(1))) {
385
bool valid_block = false;
386
387
switch (BIT(wi->pgshift)) {
388
case SZ_4K:
389
valid_block = level == 1 || level == 2;
390
break;
391
case SZ_16K:
392
case SZ_64K:
393
valid_block = level == 2;
394
break;
395
}
396
397
if (!valid_block)
398
goto transfault;
399
}
400
401
if (check_output_size(desc & GENMASK(47, va_bottom), wi))
402
goto addrsz;
403
404
if (!(desc & PTE_AF)) {
405
fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
406
return -EACCES;
407
}
408
409
va_bottom += contiguous_bit_shift(desc, wi, level);
410
411
wr->failed = false;
412
wr->level = level;
413
wr->desc = desc;
414
wr->pa = desc & GENMASK(47, va_bottom);
415
wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
416
417
wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
418
if (wr->nG) {
419
u64 asid_ttbr, tcr;
420
421
switch (wi->regime) {
422
case TR_EL10:
423
tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
424
asid_ttbr = ((tcr & TCR_A1) ?
425
vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
426
vcpu_read_sys_reg(vcpu, TTBR0_EL1));
427
break;
428
case TR_EL20:
429
tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
430
asid_ttbr = ((tcr & TCR_A1) ?
431
vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
432
vcpu_read_sys_reg(vcpu, TTBR0_EL2));
433
break;
434
default:
435
BUG();
436
}
437
438
wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr);
439
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
440
!(tcr & TCR_ASID16))
441
wr->asid &= GENMASK(7, 0);
442
}
443
444
return 0;
445
446
addrsz:
447
fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), false);
448
return -EINVAL;
449
transfault:
450
fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), false);
451
return -ENOENT;
452
}
453
454
struct mmu_config {
455
u64 ttbr0;
456
u64 ttbr1;
457
u64 tcr;
458
u64 mair;
459
u64 tcr2;
460
u64 pir;
461
u64 pire0;
462
u64 por_el0;
463
u64 por_el1;
464
u64 sctlr;
465
u64 vttbr;
466
u64 vtcr;
467
};
468
469
static void __mmu_config_save(struct mmu_config *config)
470
{
471
config->ttbr0 = read_sysreg_el1(SYS_TTBR0);
472
config->ttbr1 = read_sysreg_el1(SYS_TTBR1);
473
config->tcr = read_sysreg_el1(SYS_TCR);
474
config->mair = read_sysreg_el1(SYS_MAIR);
475
if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
476
config->tcr2 = read_sysreg_el1(SYS_TCR2);
477
if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
478
config->pir = read_sysreg_el1(SYS_PIR);
479
config->pire0 = read_sysreg_el1(SYS_PIRE0);
480
}
481
if (system_supports_poe()) {
482
config->por_el1 = read_sysreg_el1(SYS_POR);
483
config->por_el0 = read_sysreg_s(SYS_POR_EL0);
484
}
485
}
486
config->sctlr = read_sysreg_el1(SYS_SCTLR);
487
config->vttbr = read_sysreg(vttbr_el2);
488
config->vtcr = read_sysreg(vtcr_el2);
489
}
490
491
static void __mmu_config_restore(struct mmu_config *config)
492
{
493
/*
494
* ARM errata 1165522 and 1530923 require TGE to be 1 before
495
* we update the guest state.
496
*/
497
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
498
499
write_sysreg_el1(config->ttbr0, SYS_TTBR0);
500
write_sysreg_el1(config->ttbr1, SYS_TTBR1);
501
write_sysreg_el1(config->tcr, SYS_TCR);
502
write_sysreg_el1(config->mair, SYS_MAIR);
503
if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
504
write_sysreg_el1(config->tcr2, SYS_TCR2);
505
if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
506
write_sysreg_el1(config->pir, SYS_PIR);
507
write_sysreg_el1(config->pire0, SYS_PIRE0);
508
}
509
if (system_supports_poe()) {
510
write_sysreg_el1(config->por_el1, SYS_POR);
511
write_sysreg_s(config->por_el0, SYS_POR_EL0);
512
}
513
}
514
write_sysreg_el1(config->sctlr, SYS_SCTLR);
515
write_sysreg(config->vttbr, vttbr_el2);
516
write_sysreg(config->vtcr, vtcr_el2);
517
}
518
519
static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
520
{
521
u64 host_pan;
522
bool fail;
523
524
host_pan = read_sysreg_s(SYS_PSTATE_PAN);
525
write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
526
527
switch (op) {
528
case OP_AT_S1E1RP:
529
fail = __kvm_at(OP_AT_S1E1RP, vaddr);
530
break;
531
case OP_AT_S1E1WP:
532
fail = __kvm_at(OP_AT_S1E1WP, vaddr);
533
break;
534
}
535
536
write_sysreg_s(host_pan, SYS_PSTATE_PAN);
537
538
return fail;
539
}
540
541
#define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic)
542
#define MEMATTR_NC 0b0100
543
#define MEMATTR_Wt 0b1000
544
#define MEMATTR_Wb 0b1100
545
#define MEMATTR_WbRaWa 0b1111
546
547
#define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0)
548
549
static u8 s2_memattr_to_attr(u8 memattr)
550
{
551
memattr &= 0b1111;
552
553
switch (memattr) {
554
case 0b0000:
555
case 0b0001:
556
case 0b0010:
557
case 0b0011:
558
return memattr << 2;
559
case 0b0100:
560
return MEMATTR(Wb, Wb);
561
case 0b0101:
562
return MEMATTR(NC, NC);
563
case 0b0110:
564
return MEMATTR(Wt, NC);
565
case 0b0111:
566
return MEMATTR(Wb, NC);
567
case 0b1000:
568
/* Reserved, assume NC */
569
return MEMATTR(NC, NC);
570
case 0b1001:
571
return MEMATTR(NC, Wt);
572
case 0b1010:
573
return MEMATTR(Wt, Wt);
574
case 0b1011:
575
return MEMATTR(Wb, Wt);
576
case 0b1100:
577
/* Reserved, assume NC */
578
return MEMATTR(NC, NC);
579
case 0b1101:
580
return MEMATTR(NC, Wb);
581
case 0b1110:
582
return MEMATTR(Wt, Wb);
583
case 0b1111:
584
return MEMATTR(Wb, Wb);
585
default:
586
unreachable();
587
}
588
}
589
590
static u8 combine_s1_s2_attr(u8 s1, u8 s2)
591
{
592
bool transient;
593
u8 final = 0;
594
595
/* Upgrade transient s1 to non-transient to simplify things */
596
switch (s1) {
597
case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */
598
transient = true;
599
s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
600
break;
601
case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */
602
transient = true;
603
s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
604
break;
605
default:
606
transient = false;
607
}
608
609
/* S2CombineS1AttrHints() */
610
if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
611
(s2 & GENMASK(3, 2)) == MEMATTR_NC)
612
final = MEMATTR_NC;
613
else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
614
(s2 & GENMASK(3, 2)) == MEMATTR_Wt)
615
final = MEMATTR_Wt;
616
else
617
final = MEMATTR_Wb;
618
619
if (final != MEMATTR_NC) {
620
/* Inherit RaWa hints form S1 */
621
if (transient) {
622
switch (s1 & GENMASK(3, 2)) {
623
case MEMATTR_Wt:
624
final = 0;
625
break;
626
case MEMATTR_Wb:
627
final = MEMATTR_NC;
628
break;
629
}
630
}
631
632
final |= s1 & GENMASK(1, 0);
633
}
634
635
return final;
636
}
637
638
#define ATTR_NSH 0b00
639
#define ATTR_RSV 0b01
640
#define ATTR_OSH 0b10
641
#define ATTR_ISH 0b11
642
643
static u8 compute_sh(u8 attr, u64 desc)
644
{
645
u8 sh;
646
647
/* Any form of device, as well as NC has SH[1:0]=0b10 */
648
if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
649
return ATTR_OSH;
650
651
sh = FIELD_GET(PTE_SHARED, desc);
652
if (sh == ATTR_RSV) /* Reserved, mapped to NSH */
653
sh = ATTR_NSH;
654
655
return sh;
656
}
657
658
static u8 combine_sh(u8 s1_sh, u8 s2_sh)
659
{
660
if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
661
return ATTR_OSH;
662
if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
663
return ATTR_ISH;
664
665
return ATTR_NSH;
666
}
667
668
static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
669
struct kvm_s2_trans *tr)
670
{
671
u8 s1_parattr, s2_memattr, final_attr;
672
u64 par;
673
674
/* If S2 has failed to translate, report the damage */
675
if (tr->esr) {
676
par = SYS_PAR_EL1_RES1;
677
par |= SYS_PAR_EL1_F;
678
par |= SYS_PAR_EL1_S;
679
par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
680
return par;
681
}
682
683
s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
684
s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
685
686
if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
687
if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
688
s2_memattr &= ~BIT(3);
689
690
/* Combination of R_VRJSW and R_RHWZM */
691
switch (s2_memattr) {
692
case 0b0101:
693
if (MEMATTR_IS_DEVICE(s1_parattr))
694
final_attr = s1_parattr;
695
else
696
final_attr = MEMATTR(NC, NC);
697
break;
698
case 0b0110:
699
case 0b1110:
700
final_attr = MEMATTR(WbRaWa, WbRaWa);
701
break;
702
case 0b0111:
703
case 0b1111:
704
/* Preserve S1 attribute */
705
final_attr = s1_parattr;
706
break;
707
case 0b0100:
708
case 0b1100:
709
case 0b1101:
710
/* Reserved, do something non-silly */
711
final_attr = s1_parattr;
712
break;
713
default:
714
/*
715
* MemAttr[2]=0, Device from S2.
716
*
717
* FWB does not influence the way that stage 1
718
* memory types and attributes are combined
719
* with stage 2 Device type and attributes.
720
*/
721
final_attr = min(s2_memattr_to_attr(s2_memattr),
722
s1_parattr);
723
}
724
} else {
725
/* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
726
u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
727
728
if (MEMATTR_IS_DEVICE(s1_parattr) ||
729
MEMATTR_IS_DEVICE(s2_parattr)) {
730
final_attr = min(s1_parattr, s2_parattr);
731
} else {
732
/* At this stage, this is memory vs memory */
733
final_attr = combine_s1_s2_attr(s1_parattr & 0xf,
734
s2_parattr & 0xf);
735
final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
736
s2_parattr >> 4) << 4;
737
}
738
}
739
740
if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
741
!MEMATTR_IS_DEVICE(final_attr))
742
final_attr = MEMATTR(NC, NC);
743
744
par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
745
par |= tr->output & GENMASK(47, 12);
746
par |= FIELD_PREP(SYS_PAR_EL1_SH,
747
combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
748
compute_sh(final_attr, tr->desc)));
749
750
return par;
751
}
752
753
static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
754
enum trans_regime regime)
755
{
756
u64 par;
757
758
if (wr->failed) {
759
par = SYS_PAR_EL1_RES1;
760
par |= SYS_PAR_EL1_F;
761
par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
762
par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
763
par |= wr->s2 ? SYS_PAR_EL1_S : 0;
764
} else if (wr->level == S1_MMU_DISABLED) {
765
/* MMU off or HCR_EL2.DC == 1 */
766
par = SYS_PAR_EL1_NSE;
767
par |= wr->pa & GENMASK_ULL(47, 12);
768
769
if (regime == TR_EL10 &&
770
(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
771
par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
772
MEMATTR(WbRaWa, WbRaWa));
773
par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
774
} else {
775
par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
776
par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
777
}
778
} else {
779
u64 mair, sctlr;
780
u8 sh;
781
782
par = SYS_PAR_EL1_NSE;
783
784
mair = (regime == TR_EL10 ?
785
vcpu_read_sys_reg(vcpu, MAIR_EL1) :
786
vcpu_read_sys_reg(vcpu, MAIR_EL2));
787
788
mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
789
mair &= 0xff;
790
791
sctlr = (regime == TR_EL10 ?
792
vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
793
vcpu_read_sys_reg(vcpu, SCTLR_EL2));
794
795
/* Force NC for memory if SCTLR_ELx.C is clear */
796
if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
797
mair = MEMATTR(NC, NC);
798
799
par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
800
par |= wr->pa & GENMASK_ULL(47, 12);
801
802
sh = compute_sh(mair, wr->desc);
803
par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
804
}
805
806
return par;
807
}
808
809
static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
810
{
811
u64 sctlr;
812
813
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
814
return false;
815
816
if (s1pie_enabled(vcpu, regime))
817
return true;
818
819
if (regime == TR_EL10)
820
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
821
else
822
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
823
824
return sctlr & SCTLR_EL1_EPAN;
825
}
826
827
static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
828
struct s1_walk_info *wi,
829
struct s1_walk_result *wr)
830
{
831
bool wxn;
832
833
/* Non-hierarchical part of AArch64.S1DirectBasePermissions() */
834
if (wi->regime != TR_EL2) {
835
switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) {
836
case 0b00:
837
wr->pr = wr->pw = true;
838
wr->ur = wr->uw = false;
839
break;
840
case 0b01:
841
wr->pr = wr->pw = wr->ur = wr->uw = true;
842
break;
843
case 0b10:
844
wr->pr = true;
845
wr->pw = wr->ur = wr->uw = false;
846
break;
847
case 0b11:
848
wr->pr = wr->ur = true;
849
wr->pw = wr->uw = false;
850
break;
851
}
852
853
/* We don't use px for anything yet, but hey... */
854
wr->px = !((wr->desc & PTE_PXN) || wr->uw);
855
wr->ux = !(wr->desc & PTE_UXN);
856
} else {
857
wr->ur = wr->uw = wr->ux = false;
858
859
if (!(wr->desc & PTE_RDONLY)) {
860
wr->pr = wr->pw = true;
861
} else {
862
wr->pr = true;
863
wr->pw = false;
864
}
865
866
/* XN maps to UXN */
867
wr->px = !(wr->desc & PTE_UXN);
868
}
869
870
switch (wi->regime) {
871
case TR_EL2:
872
case TR_EL20:
873
wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
874
break;
875
case TR_EL10:
876
wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
877
break;
878
}
879
880
wr->pwxn = wr->uwxn = wxn;
881
wr->pov = wi->poe;
882
wr->uov = wi->e0poe;
883
}
884
885
static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu,
886
struct s1_walk_info *wi,
887
struct s1_walk_result *wr)
888
{
889
/* Hierarchical part of AArch64.S1DirectBasePermissions() */
890
if (wi->regime != TR_EL2) {
891
switch (wr->APTable) {
892
case 0b00:
893
break;
894
case 0b01:
895
wr->ur = wr->uw = false;
896
break;
897
case 0b10:
898
wr->pw = wr->uw = false;
899
break;
900
case 0b11:
901
wr->pw = wr->ur = wr->uw = false;
902
break;
903
}
904
905
wr->px &= !wr->PXNTable;
906
wr->ux &= !wr->UXNTable;
907
} else {
908
if (wr->APTable & BIT(1))
909
wr->pw = false;
910
911
/* XN maps to UXN */
912
wr->px &= !wr->UXNTable;
913
}
914
}
915
916
#define perm_idx(v, r, i) ((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf)
917
918
#define set_priv_perms(wr, r, w, x) \
919
do { \
920
(wr)->pr = (r); \
921
(wr)->pw = (w); \
922
(wr)->px = (x); \
923
} while (0)
924
925
#define set_unpriv_perms(wr, r, w, x) \
926
do { \
927
(wr)->ur = (r); \
928
(wr)->uw = (w); \
929
(wr)->ux = (x); \
930
} while (0)
931
932
#define set_priv_wxn(wr, v) \
933
do { \
934
(wr)->pwxn = (v); \
935
} while (0)
936
937
#define set_unpriv_wxn(wr, v) \
938
do { \
939
(wr)->uwxn = (v); \
940
} while (0)
941
942
/* Similar to AArch64.S1IndirectBasePermissions(), without GCS */
943
#define set_perms(w, wr, ip) \
944
do { \
945
/* R_LLZDZ */ \
946
switch ((ip)) { \
947
case 0b0000: \
948
set_ ## w ## _perms((wr), false, false, false); \
949
break; \
950
case 0b0001: \
951
set_ ## w ## _perms((wr), true , false, false); \
952
break; \
953
case 0b0010: \
954
set_ ## w ## _perms((wr), false, false, true ); \
955
break; \
956
case 0b0011: \
957
set_ ## w ## _perms((wr), true , false, true ); \
958
break; \
959
case 0b0100: \
960
set_ ## w ## _perms((wr), false, false, false); \
961
break; \
962
case 0b0101: \
963
set_ ## w ## _perms((wr), true , true , false); \
964
break; \
965
case 0b0110: \
966
set_ ## w ## _perms((wr), true , true , true ); \
967
break; \
968
case 0b0111: \
969
set_ ## w ## _perms((wr), true , true , true ); \
970
break; \
971
case 0b1000: \
972
set_ ## w ## _perms((wr), true , false, false); \
973
break; \
974
case 0b1001: \
975
set_ ## w ## _perms((wr), true , false, false); \
976
break; \
977
case 0b1010: \
978
set_ ## w ## _perms((wr), true , false, true ); \
979
break; \
980
case 0b1011: \
981
set_ ## w ## _perms((wr), false, false, false); \
982
break; \
983
case 0b1100: \
984
set_ ## w ## _perms((wr), true , true , false); \
985
break; \
986
case 0b1101: \
987
set_ ## w ## _perms((wr), false, false, false); \
988
break; \
989
case 0b1110: \
990
set_ ## w ## _perms((wr), true , true , true ); \
991
break; \
992
case 0b1111: \
993
set_ ## w ## _perms((wr), false, false, false); \
994
break; \
995
} \
996
\
997
/* R_HJYGR */ \
998
set_ ## w ## _wxn((wr), ((ip) == 0b0110)); \
999
\
1000
} while (0)
1001
1002
static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu,
1003
struct s1_walk_info *wi,
1004
struct s1_walk_result *wr)
1005
{
1006
u8 up, pp, idx;
1007
1008
idx = pte_pi_index(wr->desc);
1009
1010
switch (wi->regime) {
1011
case TR_EL10:
1012
pp = perm_idx(vcpu, PIR_EL1, idx);
1013
up = perm_idx(vcpu, PIRE0_EL1, idx);
1014
break;
1015
case TR_EL20:
1016
pp = perm_idx(vcpu, PIR_EL2, idx);
1017
up = perm_idx(vcpu, PIRE0_EL2, idx);
1018
break;
1019
case TR_EL2:
1020
pp = perm_idx(vcpu, PIR_EL2, idx);
1021
up = 0;
1022
break;
1023
}
1024
1025
set_perms(priv, wr, pp);
1026
1027
if (wi->regime != TR_EL2)
1028
set_perms(unpriv, wr, up);
1029
else
1030
set_unpriv_perms(wr, false, false, false);
1031
1032
wr->pov = wi->poe && !(pp & BIT(3));
1033
wr->uov = wi->e0poe && !(up & BIT(3));
1034
1035
/* R_VFPJF */
1036
if (wr->px && wr->uw) {
1037
set_priv_perms(wr, false, false, false);
1038
set_unpriv_perms(wr, false, false, false);
1039
}
1040
}
1041
1042
static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
1043
struct s1_walk_info *wi,
1044
struct s1_walk_result *wr)
1045
{
1046
u8 idx, pov_perms, uov_perms;
1047
1048
idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
1049
1050
if (wr->pov) {
1051
switch (wi->regime) {
1052
case TR_EL10:
1053
pov_perms = perm_idx(vcpu, POR_EL1, idx);
1054
break;
1055
case TR_EL20:
1056
pov_perms = perm_idx(vcpu, POR_EL2, idx);
1057
break;
1058
case TR_EL2:
1059
pov_perms = perm_idx(vcpu, POR_EL2, idx);
1060
break;
1061
}
1062
1063
if (pov_perms & ~POE_RWX)
1064
pov_perms = POE_NONE;
1065
1066
/* R_QXXPC, S1PrivOverflow enabled */
1067
if (wr->pwxn && (pov_perms & POE_X))
1068
pov_perms &= ~POE_W;
1069
1070
wr->pr &= pov_perms & POE_R;
1071
wr->pw &= pov_perms & POE_W;
1072
wr->px &= pov_perms & POE_X;
1073
}
1074
1075
if (wr->uov) {
1076
switch (wi->regime) {
1077
case TR_EL10:
1078
uov_perms = perm_idx(vcpu, POR_EL0, idx);
1079
break;
1080
case TR_EL20:
1081
uov_perms = perm_idx(vcpu, POR_EL0, idx);
1082
break;
1083
case TR_EL2:
1084
uov_perms = 0;
1085
break;
1086
}
1087
1088
if (uov_perms & ~POE_RWX)
1089
uov_perms = POE_NONE;
1090
1091
/* R_NPBXC, S1UnprivOverlay enabled */
1092
if (wr->uwxn && (uov_perms & POE_X))
1093
uov_perms &= ~POE_W;
1094
1095
wr->ur &= uov_perms & POE_R;
1096
wr->uw &= uov_perms & POE_W;
1097
wr->ux &= uov_perms & POE_X;
1098
}
1099
}
1100
1101
static void compute_s1_permissions(struct kvm_vcpu *vcpu,
1102
struct s1_walk_info *wi,
1103
struct s1_walk_result *wr)
1104
{
1105
bool pan;
1106
1107
if (!s1pie_enabled(vcpu, wi->regime))
1108
compute_s1_direct_permissions(vcpu, wi, wr);
1109
else
1110
compute_s1_indirect_permissions(vcpu, wi, wr);
1111
1112
if (!wi->hpd)
1113
compute_s1_hierarchical_permissions(vcpu, wi, wr);
1114
1115
compute_s1_overlay_permissions(vcpu, wi, wr);
1116
1117
/* R_QXXPC, S1PrivOverlay disabled */
1118
if (!wr->pov)
1119
wr->px &= !(wr->pwxn && wr->pw);
1120
1121
/* R_NPBXC, S1UnprivOverlay disabled */
1122
if (!wr->uov)
1123
wr->ux &= !(wr->uwxn && wr->uw);
1124
1125
pan = wi->pan && (wr->ur || wr->uw ||
1126
(pan3_enabled(vcpu, wi->regime) && wr->ux));
1127
wr->pw &= !pan;
1128
wr->pr &= !pan;
1129
}
1130
1131
static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1132
{
1133
struct s1_walk_result wr = {};
1134
struct s1_walk_info wi = {};
1135
bool perm_fail = false;
1136
int ret, idx;
1137
1138
wi.regime = compute_translation_regime(vcpu, op);
1139
wi.as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
1140
wi.pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
1141
(*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
1142
1143
ret = setup_s1_walk(vcpu, &wi, &wr, vaddr);
1144
if (ret)
1145
goto compute_par;
1146
1147
if (wr.level == S1_MMU_DISABLED)
1148
goto compute_par;
1149
1150
idx = srcu_read_lock(&vcpu->kvm->srcu);
1151
1152
ret = walk_s1(vcpu, &wi, &wr, vaddr);
1153
1154
srcu_read_unlock(&vcpu->kvm->srcu, idx);
1155
1156
if (ret)
1157
goto compute_par;
1158
1159
compute_s1_permissions(vcpu, &wi, &wr);
1160
1161
switch (op) {
1162
case OP_AT_S1E1RP:
1163
case OP_AT_S1E1R:
1164
case OP_AT_S1E2R:
1165
perm_fail = !wr.pr;
1166
break;
1167
case OP_AT_S1E1WP:
1168
case OP_AT_S1E1W:
1169
case OP_AT_S1E2W:
1170
perm_fail = !wr.pw;
1171
break;
1172
case OP_AT_S1E0R:
1173
perm_fail = !wr.ur;
1174
break;
1175
case OP_AT_S1E0W:
1176
perm_fail = !wr.uw;
1177
break;
1178
case OP_AT_S1E1A:
1179
case OP_AT_S1E2A:
1180
break;
1181
default:
1182
BUG();
1183
}
1184
1185
if (perm_fail)
1186
fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
1187
1188
compute_par:
1189
return compute_par_s1(vcpu, &wr, wi.regime);
1190
}
1191
1192
/*
1193
* Return the PAR_EL1 value as the result of a valid translation.
1194
*
1195
* If the translation is unsuccessful, the value may only contain
1196
* PAR_EL1.F, and cannot be taken at face value. It isn't an
1197
* indication of the translation having failed, only that the fast
1198
* path did not succeed, *unless* it indicates a S1 permission or
1199
* access fault.
1200
*/
1201
static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1202
{
1203
struct mmu_config config;
1204
struct kvm_s2_mmu *mmu;
1205
bool fail;
1206
u64 par;
1207
1208
par = SYS_PAR_EL1_F;
1209
1210
/*
1211
* We've trapped, so everything is live on the CPU. As we will
1212
* be switching contexts behind everybody's back, disable
1213
* interrupts while holding the mmu lock.
1214
*/
1215
guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
1216
1217
/*
1218
* If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
1219
* the right one (as we trapped from vEL2). If not, save the
1220
* full MMU context.
1221
*/
1222
if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
1223
goto skip_mmu_switch;
1224
1225
/*
1226
* Obtaining the S2 MMU for a L2 is horribly racy, and we may not
1227
* find it (recycled by another vcpu, for example). When this
1228
* happens, admit defeat immediately and use the SW (slow) path.
1229
*/
1230
mmu = lookup_s2_mmu(vcpu);
1231
if (!mmu)
1232
return par;
1233
1234
__mmu_config_save(&config);
1235
1236
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0);
1237
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1);
1238
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR);
1239
write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR);
1240
if (kvm_has_tcr2(vcpu->kvm)) {
1241
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2);
1242
if (kvm_has_s1pie(vcpu->kvm)) {
1243
write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR);
1244
write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0);
1245
}
1246
if (kvm_has_s1poe(vcpu->kvm)) {
1247
write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR);
1248
write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0);
1249
}
1250
}
1251
write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR);
1252
__load_stage2(mmu, mmu->arch);
1253
1254
skip_mmu_switch:
1255
/* Temporarily switch back to guest context */
1256
write_sysreg_hcr(vcpu->arch.hcr_el2);
1257
isb();
1258
1259
switch (op) {
1260
case OP_AT_S1E1RP:
1261
case OP_AT_S1E1WP:
1262
fail = at_s1e1p_fast(vcpu, op, vaddr);
1263
break;
1264
case OP_AT_S1E1R:
1265
fail = __kvm_at(OP_AT_S1E1R, vaddr);
1266
break;
1267
case OP_AT_S1E1W:
1268
fail = __kvm_at(OP_AT_S1E1W, vaddr);
1269
break;
1270
case OP_AT_S1E0R:
1271
fail = __kvm_at(OP_AT_S1E0R, vaddr);
1272
break;
1273
case OP_AT_S1E0W:
1274
fail = __kvm_at(OP_AT_S1E0W, vaddr);
1275
break;
1276
case OP_AT_S1E1A:
1277
fail = __kvm_at(OP_AT_S1E1A, vaddr);
1278
break;
1279
default:
1280
WARN_ON_ONCE(1);
1281
fail = true;
1282
break;
1283
}
1284
1285
if (!fail)
1286
par = read_sysreg_par();
1287
1288
write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
1289
1290
if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
1291
__mmu_config_restore(&config);
1292
1293
return par;
1294
}
1295
1296
static bool par_check_s1_perm_fault(u64 par)
1297
{
1298
u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1299
1300
return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
1301
!(par & SYS_PAR_EL1_S));
1302
}
1303
1304
static bool par_check_s1_access_fault(u64 par)
1305
{
1306
u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1307
1308
return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS &&
1309
!(par & SYS_PAR_EL1_S));
1310
}
1311
1312
void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1313
{
1314
u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
1315
1316
/*
1317
* If PAR_EL1 reports that AT failed on a S1 permission or access
1318
* fault, we know for sure that the PTW was able to walk the S1
1319
* tables and there's nothing else to do.
1320
*
1321
* If AT failed for any other reason, then we must walk the guest S1
1322
* to emulate the instruction.
1323
*/
1324
if ((par & SYS_PAR_EL1_F) &&
1325
!par_check_s1_perm_fault(par) &&
1326
!par_check_s1_access_fault(par))
1327
par = handle_at_slow(vcpu, op, vaddr);
1328
1329
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1330
}
1331
1332
void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1333
{
1334
u64 par;
1335
1336
/*
1337
* We've trapped, so everything is live on the CPU. As we will be
1338
* switching context behind everybody's back, disable interrupts...
1339
*/
1340
scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
1341
u64 val, hcr;
1342
bool fail;
1343
1344
val = hcr = read_sysreg(hcr_el2);
1345
val &= ~HCR_TGE;
1346
val |= HCR_VM;
1347
1348
if (!vcpu_el2_e2h_is_set(vcpu))
1349
val |= HCR_NV | HCR_NV1;
1350
1351
write_sysreg_hcr(val);
1352
isb();
1353
1354
par = SYS_PAR_EL1_F;
1355
1356
switch (op) {
1357
case OP_AT_S1E2R:
1358
fail = __kvm_at(OP_AT_S1E1R, vaddr);
1359
break;
1360
case OP_AT_S1E2W:
1361
fail = __kvm_at(OP_AT_S1E1W, vaddr);
1362
break;
1363
case OP_AT_S1E2A:
1364
fail = __kvm_at(OP_AT_S1E1A, vaddr);
1365
break;
1366
default:
1367
WARN_ON_ONCE(1);
1368
fail = true;
1369
}
1370
1371
isb();
1372
1373
if (!fail)
1374
par = read_sysreg_par();
1375
1376
write_sysreg_hcr(hcr);
1377
isb();
1378
}
1379
1380
/* We failed the translation, let's replay it in slow motion */
1381
if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
1382
par = handle_at_slow(vcpu, op, vaddr);
1383
1384
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1385
}
1386
1387
void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1388
{
1389
struct kvm_s2_trans out = {};
1390
u64 ipa, par;
1391
bool write;
1392
int ret;
1393
1394
/* Do the stage-1 translation */
1395
switch (op) {
1396
case OP_AT_S12E1R:
1397
op = OP_AT_S1E1R;
1398
write = false;
1399
break;
1400
case OP_AT_S12E1W:
1401
op = OP_AT_S1E1W;
1402
write = true;
1403
break;
1404
case OP_AT_S12E0R:
1405
op = OP_AT_S1E0R;
1406
write = false;
1407
break;
1408
case OP_AT_S12E0W:
1409
op = OP_AT_S1E0W;
1410
write = true;
1411
break;
1412
default:
1413
WARN_ON_ONCE(1);
1414
return;
1415
}
1416
1417
__kvm_at_s1e01(vcpu, op, vaddr);
1418
par = vcpu_read_sys_reg(vcpu, PAR_EL1);
1419
if (par & SYS_PAR_EL1_F)
1420
return;
1421
1422
/*
1423
* If we only have a single stage of translation (EL2&0), exit
1424
* early. Same thing if {VM,DC}=={0,0}.
1425
*/
1426
if (compute_translation_regime(vcpu, op) == TR_EL20 ||
1427
!(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
1428
return;
1429
1430
/* Do the stage-2 translation */
1431
ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
1432
out.esr = 0;
1433
ret = kvm_walk_nested_s2(vcpu, ipa, &out);
1434
if (ret < 0)
1435
return;
1436
1437
/* Check the access permission */
1438
if (!out.esr &&
1439
((!write && !out.readable) || (write && !out.writable)))
1440
out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
1441
1442
par = compute_par_s12(vcpu, par, &out);
1443
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1444
}
1445
1446
/*
1447
* Translate a VA for a given EL in a given translation regime, with
1448
* or without PAN. This requires wi->{regime, as_el0, pan} to be
1449
* set. The rest of the wi and wr should be 0-initialised.
1450
*/
1451
int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
1452
struct s1_walk_result *wr, u64 va)
1453
{
1454
int ret;
1455
1456
ret = setup_s1_walk(vcpu, wi, wr, va);
1457
if (ret)
1458
return ret;
1459
1460
if (wr->level == S1_MMU_DISABLED) {
1461
wr->ur = wr->uw = wr->ux = true;
1462
wr->pr = wr->pw = wr->px = true;
1463
} else {
1464
ret = walk_s1(vcpu, wi, wr, va);
1465
if (ret)
1466
return ret;
1467
1468
compute_s1_permissions(vcpu, wi, wr);
1469
}
1470
1471
return 0;
1472
}
1473
1474