Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/s390/kvm/vsie.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* kvm nested virtualization support for s390x
4
*
5
* Copyright IBM Corp. 2016, 2018
6
*
7
* Author(s): David Hildenbrand <[email protected]>
8
*/
9
#include <linux/vmalloc.h>
10
#include <linux/kvm_host.h>
11
#include <linux/bug.h>
12
#include <linux/list.h>
13
#include <linux/bitmap.h>
14
#include <linux/sched/signal.h>
15
#include <linux/io.h>
16
#include <linux/mman.h>
17
18
#include <asm/gmap.h>
19
#include <asm/mmu_context.h>
20
#include <asm/sclp.h>
21
#include <asm/nmi.h>
22
#include <asm/dis.h>
23
#include <asm/facility.h>
24
#include "kvm-s390.h"
25
#include "gaccess.h"
26
27
enum vsie_page_flags {
28
VSIE_PAGE_IN_USE = 0,
29
};
30
31
struct vsie_page {
32
struct kvm_s390_sie_block scb_s; /* 0x0000 */
33
/*
34
* the backup info for machine check. ensure it's at
35
* the same offset as that in struct sie_page!
36
*/
37
struct mcck_volatile_info mcck_info; /* 0x0200 */
38
/*
39
* The pinned original scb. Be aware that other VCPUs can modify
40
* it while we read from it. Values that are used for conditions or
41
* are reused conditionally, should be accessed via READ_ONCE.
42
*/
43
struct kvm_s390_sie_block *scb_o; /* 0x0218 */
44
/* the shadow gmap in use by the vsie_page */
45
struct gmap *gmap; /* 0x0220 */
46
/* address of the last reported fault to guest2 */
47
unsigned long fault_addr; /* 0x0228 */
48
/* calculated guest addresses of satellite control blocks */
49
gpa_t sca_gpa; /* 0x0230 */
50
gpa_t itdba_gpa; /* 0x0238 */
51
gpa_t gvrd_gpa; /* 0x0240 */
52
gpa_t riccbd_gpa; /* 0x0248 */
53
gpa_t sdnx_gpa; /* 0x0250 */
54
/*
55
* guest address of the original SCB. Remains set for free vsie
56
* pages, so we can properly look them up in our addr_to_page
57
* radix tree.
58
*/
59
gpa_t scb_gpa; /* 0x0258 */
60
/*
61
* Flags: must be set/cleared atomically after the vsie page can be
62
* looked up by other CPUs.
63
*/
64
unsigned long flags; /* 0x0260 */
65
__u8 reserved[0x0700 - 0x0268]; /* 0x0268 */
66
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
67
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
68
};
69
70
/**
71
* gmap_shadow_valid() - check if a shadow guest address space matches the
72
* given properties and is still valid
73
* @sg: pointer to the shadow guest address space structure
74
* @asce: ASCE for which the shadow table is requested
75
* @edat_level: edat level to be used for the shadow translation
76
*
77
* Returns 1 if the gmap shadow is still valid and matches the given
78
* properties, the caller can continue using it. Returns 0 otherwise; the
79
* caller has to request a new shadow gmap in this case.
80
*/
81
int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
82
{
83
if (sg->removed)
84
return 0;
85
return sg->orig_asce == asce && sg->edat_level == edat_level;
86
}
87
88
/* trigger a validity icpt for the given scb */
89
static int set_validity_icpt(struct kvm_s390_sie_block *scb,
90
__u16 reason_code)
91
{
92
scb->ipa = 0x1000;
93
scb->ipb = ((__u32) reason_code) << 16;
94
scb->icptcode = ICPT_VALIDITY;
95
return 1;
96
}
97
98
/* mark the prefix as unmapped, this will block the VSIE */
99
static void prefix_unmapped(struct vsie_page *vsie_page)
100
{
101
atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20);
102
}
103
104
/* mark the prefix as unmapped and wait until the VSIE has been left */
105
static void prefix_unmapped_sync(struct vsie_page *vsie_page)
106
{
107
prefix_unmapped(vsie_page);
108
if (vsie_page->scb_s.prog0c & PROG_IN_SIE)
109
atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags);
110
while (vsie_page->scb_s.prog0c & PROG_IN_SIE)
111
cpu_relax();
112
}
113
114
/* mark the prefix as mapped, this will allow the VSIE to run */
115
static void prefix_mapped(struct vsie_page *vsie_page)
116
{
117
atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20);
118
}
119
120
/* test if the prefix is mapped into the gmap shadow */
121
static int prefix_is_mapped(struct vsie_page *vsie_page)
122
{
123
return !(atomic_read(&vsie_page->scb_s.prog20) & PROG_REQUEST);
124
}
125
126
/* copy the updated intervention request bits into the shadow scb */
127
static void update_intervention_requests(struct vsie_page *vsie_page)
128
{
129
const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT;
130
int cpuflags;
131
132
cpuflags = atomic_read(&vsie_page->scb_o->cpuflags);
133
atomic_andnot(bits, &vsie_page->scb_s.cpuflags);
134
atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags);
135
}
136
137
/* shadow (filter and validate) the cpuflags */
138
static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
139
{
140
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
141
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
142
int newflags, cpuflags = atomic_read(&scb_o->cpuflags);
143
144
/* we don't allow ESA/390 guests */
145
if (!(cpuflags & CPUSTAT_ZARCH))
146
return set_validity_icpt(scb_s, 0x0001U);
147
148
if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS))
149
return set_validity_icpt(scb_s, 0x0001U);
150
else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR))
151
return set_validity_icpt(scb_s, 0x0007U);
152
153
/* intervention requests will be set later */
154
newflags = CPUSTAT_ZARCH;
155
if (cpuflags & CPUSTAT_GED && test_kvm_facility(vcpu->kvm, 8))
156
newflags |= CPUSTAT_GED;
157
if (cpuflags & CPUSTAT_GED2 && test_kvm_facility(vcpu->kvm, 78)) {
158
if (cpuflags & CPUSTAT_GED)
159
return set_validity_icpt(scb_s, 0x0001U);
160
newflags |= CPUSTAT_GED2;
161
}
162
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GPERE))
163
newflags |= cpuflags & CPUSTAT_P;
164
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_GSLS))
165
newflags |= cpuflags & CPUSTAT_SM;
166
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS))
167
newflags |= cpuflags & CPUSTAT_IBS;
168
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS))
169
newflags |= cpuflags & CPUSTAT_KSS;
170
171
atomic_set(&scb_s->cpuflags, newflags);
172
return 0;
173
}
174
/* Copy to APCB FORMAT1 from APCB FORMAT0 */
175
static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
176
unsigned long crycb_gpa, struct kvm_s390_apcb1 *apcb_h)
177
{
178
struct kvm_s390_apcb0 tmp;
179
unsigned long apcb_gpa;
180
181
apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb0);
182
183
if (read_guest_real(vcpu, apcb_gpa, &tmp,
184
sizeof(struct kvm_s390_apcb0)))
185
return -EFAULT;
186
187
apcb_s->apm[0] = apcb_h->apm[0] & tmp.apm[0];
188
apcb_s->aqm[0] = apcb_h->aqm[0] & tmp.aqm[0] & 0xffff000000000000UL;
189
apcb_s->adm[0] = apcb_h->adm[0] & tmp.adm[0] & 0xffff000000000000UL;
190
191
return 0;
192
193
}
194
195
/**
196
* setup_apcb00 - Copy to APCB FORMAT0 from APCB FORMAT0
197
* @vcpu: pointer to the virtual CPU
198
* @apcb_s: pointer to start of apcb in the shadow crycb
199
* @crycb_gpa: guest physical address to start of original guest crycb
200
* @apcb_h: pointer to start of apcb in the guest1
201
*
202
* Returns 0 and -EFAULT on error reading guest apcb
203
*/
204
static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
205
unsigned long crycb_gpa, unsigned long *apcb_h)
206
{
207
unsigned long apcb_gpa;
208
209
apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb0);
210
211
if (read_guest_real(vcpu, apcb_gpa, apcb_s,
212
sizeof(struct kvm_s390_apcb0)))
213
return -EFAULT;
214
215
bitmap_and(apcb_s, apcb_s, apcb_h,
216
BITS_PER_BYTE * sizeof(struct kvm_s390_apcb0));
217
218
return 0;
219
}
220
221
/**
222
* setup_apcb11 - Copy the FORMAT1 APCB from the guest to the shadow CRYCB
223
* @vcpu: pointer to the virtual CPU
224
* @apcb_s: pointer to start of apcb in the shadow crycb
225
* @crycb_gpa: guest physical address to start of original guest crycb
226
* @apcb_h: pointer to start of apcb in the host
227
*
228
* Returns 0 and -EFAULT on error reading guest apcb
229
*/
230
static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
231
unsigned long crycb_gpa,
232
unsigned long *apcb_h)
233
{
234
unsigned long apcb_gpa;
235
236
apcb_gpa = crycb_gpa + offsetof(struct kvm_s390_crypto_cb, apcb1);
237
238
if (read_guest_real(vcpu, apcb_gpa, apcb_s,
239
sizeof(struct kvm_s390_apcb1)))
240
return -EFAULT;
241
242
bitmap_and(apcb_s, apcb_s, apcb_h,
243
BITS_PER_BYTE * sizeof(struct kvm_s390_apcb1));
244
245
return 0;
246
}
247
248
/**
249
* setup_apcb - Create a shadow copy of the apcb.
250
* @vcpu: pointer to the virtual CPU
251
* @crycb_s: pointer to shadow crycb
252
* @crycb_gpa: guest physical address of original guest crycb
253
* @crycb_h: pointer to the host crycb
254
* @fmt_o: format of the original guest crycb.
255
* @fmt_h: format of the host crycb.
256
*
257
* Checks the compatibility between the guest and host crycb and calls the
258
* appropriate copy function.
259
*
260
* Return 0 or an error number if the guest and host crycb are incompatible.
261
*/
262
static int setup_apcb(struct kvm_vcpu *vcpu, struct kvm_s390_crypto_cb *crycb_s,
263
const u32 crycb_gpa,
264
struct kvm_s390_crypto_cb *crycb_h,
265
int fmt_o, int fmt_h)
266
{
267
switch (fmt_o) {
268
case CRYCB_FORMAT2:
269
if ((crycb_gpa & PAGE_MASK) != ((crycb_gpa + 256) & PAGE_MASK))
270
return -EACCES;
271
if (fmt_h != CRYCB_FORMAT2)
272
return -EINVAL;
273
return setup_apcb11(vcpu, (unsigned long *)&crycb_s->apcb1,
274
crycb_gpa,
275
(unsigned long *)&crycb_h->apcb1);
276
case CRYCB_FORMAT1:
277
switch (fmt_h) {
278
case CRYCB_FORMAT2:
279
return setup_apcb10(vcpu, &crycb_s->apcb1,
280
crycb_gpa,
281
&crycb_h->apcb1);
282
case CRYCB_FORMAT1:
283
return setup_apcb00(vcpu,
284
(unsigned long *) &crycb_s->apcb0,
285
crycb_gpa,
286
(unsigned long *) &crycb_h->apcb0);
287
}
288
break;
289
case CRYCB_FORMAT0:
290
if ((crycb_gpa & PAGE_MASK) != ((crycb_gpa + 32) & PAGE_MASK))
291
return -EACCES;
292
293
switch (fmt_h) {
294
case CRYCB_FORMAT2:
295
return setup_apcb10(vcpu, &crycb_s->apcb1,
296
crycb_gpa,
297
&crycb_h->apcb1);
298
case CRYCB_FORMAT1:
299
case CRYCB_FORMAT0:
300
return setup_apcb00(vcpu,
301
(unsigned long *) &crycb_s->apcb0,
302
crycb_gpa,
303
(unsigned long *) &crycb_h->apcb0);
304
}
305
}
306
return -EINVAL;
307
}
308
309
/**
310
* shadow_crycb - Create a shadow copy of the crycb block
311
* @vcpu: a pointer to the virtual CPU
312
* @vsie_page: a pointer to internal date used for the vSIE
313
*
314
* Create a shadow copy of the crycb block and setup key wrapping, if
315
* requested for guest 3 and enabled for guest 2.
316
*
317
* We accept format-1 or format-2, but we convert format-1 into format-2
318
* in the shadow CRYCB.
319
* Using format-2 enables the firmware to choose the right format when
320
* scheduling the SIE.
321
* There is nothing to do for format-0.
322
*
323
* This function centralize the issuing of set_validity_icpt() for all
324
* the subfunctions working on the crycb.
325
*
326
* Returns: - 0 if shadowed or nothing to do
327
* - > 0 if control has to be given to guest 2
328
*/
329
static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
330
{
331
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
332
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
333
const uint32_t crycbd_o = READ_ONCE(scb_o->crycbd);
334
const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
335
unsigned long *b1, *b2;
336
u8 ecb3_flags;
337
u32 ecd_flags;
338
int apie_h;
339
int apie_s;
340
int key_msk = test_kvm_facility(vcpu->kvm, 76);
341
int fmt_o = crycbd_o & CRYCB_FORMAT_MASK;
342
int fmt_h = vcpu->arch.sie_block->crycbd & CRYCB_FORMAT_MASK;
343
int ret = 0;
344
345
scb_s->crycbd = 0;
346
347
apie_h = vcpu->arch.sie_block->eca & ECA_APIE;
348
apie_s = apie_h & scb_o->eca;
349
if (!apie_s && (!key_msk || (fmt_o == CRYCB_FORMAT0)))
350
return 0;
351
352
if (!crycb_addr)
353
return set_validity_icpt(scb_s, 0x0039U);
354
355
if (fmt_o == CRYCB_FORMAT1)
356
if ((crycb_addr & PAGE_MASK) !=
357
((crycb_addr + 128) & PAGE_MASK))
358
return set_validity_icpt(scb_s, 0x003CU);
359
360
if (apie_s) {
361
ret = setup_apcb(vcpu, &vsie_page->crycb, crycb_addr,
362
vcpu->kvm->arch.crypto.crycb,
363
fmt_o, fmt_h);
364
if (ret)
365
goto end;
366
scb_s->eca |= scb_o->eca & ECA_APIE;
367
}
368
369
/* we may only allow it if enabled for guest 2 */
370
ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
371
(ECB3_AES | ECB3_DEA);
372
ecd_flags = scb_o->ecd & vcpu->arch.sie_block->ecd &
373
(ECD_ECC | ECD_HMAC);
374
if (!ecb3_flags && !ecd_flags)
375
goto end;
376
377
/* copy only the wrapping keys */
378
if (read_guest_real(vcpu, crycb_addr + 72,
379
vsie_page->crycb.dea_wrapping_key_mask, 56))
380
return set_validity_icpt(scb_s, 0x0035U);
381
382
scb_s->ecb3 |= ecb3_flags;
383
scb_s->ecd |= ecd_flags;
384
385
/* xor both blocks in one run */
386
b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
387
b2 = (unsigned long *)
388
vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
389
/* as 56%8 == 0, bitmap_xor won't overwrite any data */
390
bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
391
end:
392
switch (ret) {
393
case -EINVAL:
394
return set_validity_icpt(scb_s, 0x0022U);
395
case -EFAULT:
396
return set_validity_icpt(scb_s, 0x0035U);
397
case -EACCES:
398
return set_validity_icpt(scb_s, 0x003CU);
399
}
400
scb_s->crycbd = (u32)virt_to_phys(&vsie_page->crycb) | CRYCB_FORMAT2;
401
return 0;
402
}
403
404
/* shadow (round up/down) the ibc to avoid validity icpt */
405
static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
406
{
407
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
408
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
409
/* READ_ONCE does not work on bitfields - use a temporary variable */
410
const uint32_t __new_ibc = scb_o->ibc;
411
const uint32_t new_ibc = READ_ONCE(__new_ibc) & 0x0fffU;
412
__u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU;
413
414
scb_s->ibc = 0;
415
/* ibc installed in g2 and requested for g3 */
416
if (vcpu->kvm->arch.model.ibc && new_ibc) {
417
scb_s->ibc = new_ibc;
418
/* takte care of the minimum ibc level of the machine */
419
if (scb_s->ibc < min_ibc)
420
scb_s->ibc = min_ibc;
421
/* take care of the maximum ibc level set for the guest */
422
if (scb_s->ibc > vcpu->kvm->arch.model.ibc)
423
scb_s->ibc = vcpu->kvm->arch.model.ibc;
424
}
425
}
426
427
/* unshadow the scb, copying parameters back to the real scb */
428
static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
429
{
430
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
431
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
432
433
/* interception */
434
scb_o->icptcode = scb_s->icptcode;
435
scb_o->icptstatus = scb_s->icptstatus;
436
scb_o->ipa = scb_s->ipa;
437
scb_o->ipb = scb_s->ipb;
438
scb_o->gbea = scb_s->gbea;
439
440
/* timer */
441
scb_o->cputm = scb_s->cputm;
442
scb_o->ckc = scb_s->ckc;
443
scb_o->todpr = scb_s->todpr;
444
445
/* guest state */
446
scb_o->gpsw = scb_s->gpsw;
447
scb_o->gg14 = scb_s->gg14;
448
scb_o->gg15 = scb_s->gg15;
449
memcpy(scb_o->gcr, scb_s->gcr, 128);
450
scb_o->pp = scb_s->pp;
451
452
/* branch prediction */
453
if (test_kvm_facility(vcpu->kvm, 82)) {
454
scb_o->fpf &= ~FPF_BPBC;
455
scb_o->fpf |= scb_s->fpf & FPF_BPBC;
456
}
457
458
/* interrupt intercept */
459
switch (scb_s->icptcode) {
460
case ICPT_PROGI:
461
case ICPT_INSTPROGI:
462
case ICPT_EXTINT:
463
memcpy((void *)((u64)scb_o + 0xc0),
464
(void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
465
break;
466
}
467
468
if (scb_s->ihcpu != 0xffffU)
469
scb_o->ihcpu = scb_s->ihcpu;
470
}
471
472
/*
473
* Setup the shadow scb by copying and checking the relevant parts of the g2
474
* provided scb.
475
*
476
* Returns: - 0 if the scb has been shadowed
477
* - > 0 if control has to be given to guest 2
478
*/
479
static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
480
{
481
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
482
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
483
/* READ_ONCE does not work on bitfields - use a temporary variable */
484
const uint32_t __new_prefix = scb_o->prefix;
485
const uint32_t new_prefix = READ_ONCE(__new_prefix);
486
const bool wants_tx = READ_ONCE(scb_o->ecb) & ECB_TE;
487
bool had_tx = scb_s->ecb & ECB_TE;
488
unsigned long new_mso = 0;
489
int rc;
490
491
/* make sure we don't have any leftovers when reusing the scb */
492
scb_s->icptcode = 0;
493
scb_s->eca = 0;
494
scb_s->ecb = 0;
495
scb_s->ecb2 = 0;
496
scb_s->ecb3 = 0;
497
scb_s->ecd = 0;
498
scb_s->fac = 0;
499
scb_s->fpf = 0;
500
501
rc = prepare_cpuflags(vcpu, vsie_page);
502
if (rc)
503
goto out;
504
505
/* timer */
506
scb_s->cputm = scb_o->cputm;
507
scb_s->ckc = scb_o->ckc;
508
scb_s->todpr = scb_o->todpr;
509
scb_s->epoch = scb_o->epoch;
510
511
/* guest state */
512
scb_s->gpsw = scb_o->gpsw;
513
scb_s->gg14 = scb_o->gg14;
514
scb_s->gg15 = scb_o->gg15;
515
memcpy(scb_s->gcr, scb_o->gcr, 128);
516
scb_s->pp = scb_o->pp;
517
518
/* interception / execution handling */
519
scb_s->gbea = scb_o->gbea;
520
scb_s->lctl = scb_o->lctl;
521
scb_s->svcc = scb_o->svcc;
522
scb_s->ictl = scb_o->ictl;
523
/*
524
* SKEY handling functions can't deal with false setting of PTE invalid
525
* bits. Therefore we cannot provide interpretation and would later
526
* have to provide own emulation handlers.
527
*/
528
if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS))
529
scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
530
531
scb_s->icpua = scb_o->icpua;
532
533
if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
534
new_mso = READ_ONCE(scb_o->mso) & 0xfffffffffff00000UL;
535
/* if the hva of the prefix changes, we have to remap the prefix */
536
if (scb_s->mso != new_mso || scb_s->prefix != new_prefix)
537
prefix_unmapped(vsie_page);
538
/* SIE will do mso/msl validity and exception checks for us */
539
scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
540
scb_s->mso = new_mso;
541
scb_s->prefix = new_prefix;
542
543
/* We have to definitely flush the tlb if this scb never ran */
544
if (scb_s->ihcpu != 0xffffU)
545
scb_s->ihcpu = scb_o->ihcpu;
546
547
/* MVPG and Protection Exception Interpretation are always available */
548
scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI);
549
/* Host-protection-interruption introduced with ESOP */
550
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
551
scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
552
/*
553
* CPU Topology
554
* This facility only uses the utility field of the SCA and none of
555
* the cpu entries that are problematic with the other interpretation
556
* facilities so we can pass it through
557
*/
558
if (test_kvm_facility(vcpu->kvm, 11))
559
scb_s->ecb |= scb_o->ecb & ECB_PTF;
560
/* transactional execution */
561
if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
562
/* remap the prefix is tx is toggled on */
563
if (!had_tx)
564
prefix_unmapped(vsie_page);
565
scb_s->ecb |= ECB_TE;
566
}
567
/* specification exception interpretation */
568
scb_s->ecb |= scb_o->ecb & ECB_SPECI;
569
/* branch prediction */
570
if (test_kvm_facility(vcpu->kvm, 82))
571
scb_s->fpf |= scb_o->fpf & FPF_BPBC;
572
/* SIMD */
573
if (test_kvm_facility(vcpu->kvm, 129)) {
574
scb_s->eca |= scb_o->eca & ECA_VX;
575
scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
576
}
577
/* Run-time-Instrumentation */
578
if (test_kvm_facility(vcpu->kvm, 64))
579
scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI;
580
/* Instruction Execution Prevention */
581
if (test_kvm_facility(vcpu->kvm, 130))
582
scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP;
583
/* Guarded Storage */
584
if (test_kvm_facility(vcpu->kvm, 133)) {
585
scb_s->ecb |= scb_o->ecb & ECB_GS;
586
scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
587
}
588
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF))
589
scb_s->eca |= scb_o->eca & ECA_SII;
590
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB))
591
scb_s->eca |= scb_o->eca & ECA_IB;
592
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
593
scb_s->eca |= scb_o->eca & ECA_CEI;
594
/* Epoch Extension */
595
if (test_kvm_facility(vcpu->kvm, 139)) {
596
scb_s->ecd |= scb_o->ecd & ECD_MEF;
597
scb_s->epdx = scb_o->epdx;
598
}
599
600
/* etoken */
601
if (test_kvm_facility(vcpu->kvm, 156))
602
scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
603
604
scb_s->hpid = HPID_VSIE;
605
scb_s->cpnc = scb_o->cpnc;
606
607
prepare_ibc(vcpu, vsie_page);
608
rc = shadow_crycb(vcpu, vsie_page);
609
out:
610
if (rc)
611
unshadow_scb(vcpu, vsie_page);
612
return rc;
613
}
614
615
void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
616
unsigned long end)
617
{
618
struct kvm *kvm = gmap->private;
619
struct vsie_page *cur;
620
unsigned long prefix;
621
int i;
622
623
if (!gmap_is_shadow(gmap))
624
return;
625
/*
626
* Only new shadow blocks are added to the list during runtime,
627
* therefore we can safely reference them all the time.
628
*/
629
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
630
cur = READ_ONCE(kvm->arch.vsie.pages[i]);
631
if (!cur)
632
continue;
633
if (READ_ONCE(cur->gmap) != gmap)
634
continue;
635
prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
636
/* with mso/msl, the prefix lies at an offset */
637
prefix += cur->scb_s.mso;
638
if (prefix <= end && start <= prefix + 2 * PAGE_SIZE - 1)
639
prefix_unmapped_sync(cur);
640
}
641
}
642
643
/*
644
* Map the first prefix page and if tx is enabled also the second prefix page.
645
*
646
* The prefix will be protected, a gmap notifier will inform about unmaps.
647
* The shadow scb must not be executed until the prefix is remapped, this is
648
* guaranteed by properly handling PROG_REQUEST.
649
*
650
* Returns: - 0 on if successfully mapped or already mapped
651
* - > 0 if control has to be given to guest 2
652
* - -EAGAIN if the caller can retry immediately
653
* - -ENOMEM if out of memory
654
*/
655
static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
656
{
657
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
658
u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
659
int rc;
660
661
if (prefix_is_mapped(vsie_page))
662
return 0;
663
664
/* mark it as mapped so we can catch any concurrent unmappers */
665
prefix_mapped(vsie_page);
666
667
/* with mso/msl, the prefix lies at offset *mso* */
668
prefix += scb_s->mso;
669
670
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
671
if (!rc && (scb_s->ecb & ECB_TE))
672
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
673
prefix + PAGE_SIZE, NULL);
674
/*
675
* We don't have to mprotect, we will be called for all unshadows.
676
* SIE will detect if protection applies and trigger a validity.
677
*/
678
if (rc)
679
prefix_unmapped(vsie_page);
680
if (rc > 0 || rc == -EFAULT)
681
rc = set_validity_icpt(scb_s, 0x0037U);
682
return rc;
683
}
684
685
/*
686
* Pin the guest page given by gpa and set hpa to the pinned host address.
687
* Will always be pinned writable.
688
*
689
* Returns: - 0 on success
690
* - -EINVAL if the gpa is not valid guest storage
691
*/
692
static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
693
{
694
struct page *page;
695
696
page = gfn_to_page(kvm, gpa_to_gfn(gpa));
697
if (!page)
698
return -EINVAL;
699
*hpa = (hpa_t)page_to_phys(page) + (gpa & ~PAGE_MASK);
700
return 0;
701
}
702
703
/* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
704
static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
705
{
706
kvm_release_page_dirty(pfn_to_page(hpa >> PAGE_SHIFT));
707
/* mark the page always as dirty for migration */
708
mark_page_dirty(kvm, gpa_to_gfn(gpa));
709
}
710
711
/* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
712
static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
713
{
714
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
715
hpa_t hpa;
716
717
hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
718
if (hpa) {
719
unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
720
vsie_page->sca_gpa = 0;
721
scb_s->scaol = 0;
722
scb_s->scaoh = 0;
723
}
724
725
hpa = scb_s->itdba;
726
if (hpa) {
727
unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa);
728
vsie_page->itdba_gpa = 0;
729
scb_s->itdba = 0;
730
}
731
732
hpa = scb_s->gvrd;
733
if (hpa) {
734
unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa);
735
vsie_page->gvrd_gpa = 0;
736
scb_s->gvrd = 0;
737
}
738
739
hpa = scb_s->riccbd;
740
if (hpa) {
741
unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa);
742
vsie_page->riccbd_gpa = 0;
743
scb_s->riccbd = 0;
744
}
745
746
hpa = scb_s->sdnxo;
747
if (hpa) {
748
unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa);
749
vsie_page->sdnx_gpa = 0;
750
scb_s->sdnxo = 0;
751
}
752
}
753
754
/*
755
* Instead of shadowing some blocks, we can simply forward them because the
756
* addresses in the scb are 64 bit long.
757
*
758
* This works as long as the data lies in one page. If blocks ever exceed one
759
* page, we have to fall back to shadowing.
760
*
761
* As we reuse the sca, the vcpu pointers contained in it are invalid. We must
762
* therefore not enable any facilities that access these pointers (e.g. SIGPIF).
763
*
764
* Returns: - 0 if all blocks were pinned.
765
* - > 0 if control has to be given to guest 2
766
* - -ENOMEM if out of memory
767
*/
768
static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
769
{
770
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
771
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
772
hpa_t hpa;
773
gpa_t gpa;
774
int rc = 0;
775
776
gpa = READ_ONCE(scb_o->scaol) & ~0xfUL;
777
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
778
gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32;
779
if (gpa) {
780
if (gpa < 2 * PAGE_SIZE)
781
rc = set_validity_icpt(scb_s, 0x0038U);
782
else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
783
rc = set_validity_icpt(scb_s, 0x0011U);
784
else if ((gpa & PAGE_MASK) !=
785
((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
786
rc = set_validity_icpt(scb_s, 0x003bU);
787
if (!rc) {
788
rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
789
if (rc)
790
rc = set_validity_icpt(scb_s, 0x0034U);
791
}
792
if (rc)
793
goto unpin;
794
vsie_page->sca_gpa = gpa;
795
scb_s->scaoh = (u32)((u64)hpa >> 32);
796
scb_s->scaol = (u32)(u64)hpa;
797
}
798
799
gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
800
if (gpa && (scb_s->ecb & ECB_TE)) {
801
if (gpa < 2 * PAGE_SIZE) {
802
rc = set_validity_icpt(scb_s, 0x0080U);
803
goto unpin;
804
}
805
/* 256 bytes cannot cross page boundaries */
806
rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
807
if (rc) {
808
rc = set_validity_icpt(scb_s, 0x0080U);
809
goto unpin;
810
}
811
vsie_page->itdba_gpa = gpa;
812
scb_s->itdba = hpa;
813
}
814
815
gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL;
816
if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
817
if (gpa < 2 * PAGE_SIZE) {
818
rc = set_validity_icpt(scb_s, 0x1310U);
819
goto unpin;
820
}
821
/*
822
* 512 bytes vector registers cannot cross page boundaries
823
* if this block gets bigger, we have to shadow it.
824
*/
825
rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
826
if (rc) {
827
rc = set_validity_icpt(scb_s, 0x1310U);
828
goto unpin;
829
}
830
vsie_page->gvrd_gpa = gpa;
831
scb_s->gvrd = hpa;
832
}
833
834
gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL;
835
if (gpa && (scb_s->ecb3 & ECB3_RI)) {
836
if (gpa < 2 * PAGE_SIZE) {
837
rc = set_validity_icpt(scb_s, 0x0043U);
838
goto unpin;
839
}
840
/* 64 bytes cannot cross page boundaries */
841
rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
842
if (rc) {
843
rc = set_validity_icpt(scb_s, 0x0043U);
844
goto unpin;
845
}
846
/* Validity 0x0044 will be checked by SIE */
847
vsie_page->riccbd_gpa = gpa;
848
scb_s->riccbd = hpa;
849
}
850
if (((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) ||
851
(scb_s->ecd & ECD_ETOKENF)) {
852
unsigned long sdnxc;
853
854
gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
855
sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL;
856
if (!gpa || gpa < 2 * PAGE_SIZE) {
857
rc = set_validity_icpt(scb_s, 0x10b0U);
858
goto unpin;
859
}
860
if (sdnxc < 6 || sdnxc > 12) {
861
rc = set_validity_icpt(scb_s, 0x10b1U);
862
goto unpin;
863
}
864
if (gpa & ((1 << sdnxc) - 1)) {
865
rc = set_validity_icpt(scb_s, 0x10b2U);
866
goto unpin;
867
}
868
/* Due to alignment rules (checked above) this cannot
869
* cross page boundaries
870
*/
871
rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
872
if (rc) {
873
rc = set_validity_icpt(scb_s, 0x10b0U);
874
goto unpin;
875
}
876
vsie_page->sdnx_gpa = gpa;
877
scb_s->sdnxo = hpa | sdnxc;
878
}
879
return 0;
880
unpin:
881
unpin_blocks(vcpu, vsie_page);
882
return rc;
883
}
884
885
/* unpin the scb provided by guest 2, marking it as dirty */
886
static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
887
gpa_t gpa)
888
{
889
hpa_t hpa = virt_to_phys(vsie_page->scb_o);
890
891
if (hpa)
892
unpin_guest_page(vcpu->kvm, gpa, hpa);
893
vsie_page->scb_o = NULL;
894
}
895
896
/*
897
* Pin the scb at gpa provided by guest 2 at vsie_page->scb_o.
898
*
899
* Returns: - 0 if the scb was pinned.
900
* - > 0 if control has to be given to guest 2
901
*/
902
static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
903
gpa_t gpa)
904
{
905
hpa_t hpa;
906
int rc;
907
908
rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
909
if (rc) {
910
rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
911
WARN_ON_ONCE(rc);
912
return 1;
913
}
914
vsie_page->scb_o = phys_to_virt(hpa);
915
return 0;
916
}
917
918
/*
919
* Inject a fault into guest 2.
920
*
921
* Returns: - > 0 if control has to be given to guest 2
922
* < 0 if an error occurred during injection.
923
*/
924
static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
925
bool write_flag)
926
{
927
struct kvm_s390_pgm_info pgm = {
928
.code = code,
929
.trans_exc_code =
930
/* 0-51: virtual address */
931
(vaddr & 0xfffffffffffff000UL) |
932
/* 52-53: store / fetch */
933
(((unsigned int) !write_flag) + 1) << 10,
934
/* 62-63: asce id (always primary == 0) */
935
.exc_access_id = 0, /* always primary */
936
.op_access_id = 0, /* not MVPG */
937
};
938
int rc;
939
940
if (code == PGM_PROTECTION)
941
pgm.trans_exc_code |= 0x4UL;
942
943
rc = kvm_s390_inject_prog_irq(vcpu, &pgm);
944
return rc ? rc : 1;
945
}
946
947
/*
948
* Handle a fault during vsie execution on a gmap shadow.
949
*
950
* Returns: - 0 if the fault was resolved
951
* - > 0 if control has to be given to guest 2
952
* - < 0 if an error occurred
953
*/
954
static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
955
{
956
int rc;
957
958
if ((current->thread.gmap_int_code & PGM_INT_CODE_MASK) == PGM_PROTECTION)
959
/* we can directly forward all protection exceptions */
960
return inject_fault(vcpu, PGM_PROTECTION,
961
current->thread.gmap_teid.addr * PAGE_SIZE, 1);
962
963
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
964
current->thread.gmap_teid.addr * PAGE_SIZE, NULL);
965
if (rc > 0) {
966
rc = inject_fault(vcpu, rc,
967
current->thread.gmap_teid.addr * PAGE_SIZE,
968
kvm_s390_cur_gmap_fault_is_write());
969
if (rc >= 0)
970
vsie_page->fault_addr = current->thread.gmap_teid.addr * PAGE_SIZE;
971
}
972
return rc;
973
}
974
975
/*
976
* Retry the previous fault that required guest 2 intervention. This avoids
977
* one superfluous SIE re-entry and direct exit.
978
*
979
* Will ignore any errors. The next SIE fault will do proper fault handling.
980
*/
981
static void handle_last_fault(struct kvm_vcpu *vcpu,
982
struct vsie_page *vsie_page)
983
{
984
if (vsie_page->fault_addr)
985
kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
986
vsie_page->fault_addr, NULL);
987
vsie_page->fault_addr = 0;
988
}
989
990
static inline void clear_vsie_icpt(struct vsie_page *vsie_page)
991
{
992
vsie_page->scb_s.icptcode = 0;
993
}
994
995
/* rewind the psw and clear the vsie icpt, so we can retry execution */
996
static void retry_vsie_icpt(struct vsie_page *vsie_page)
997
{
998
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
999
int ilen = insn_length(scb_s->ipa >> 8);
1000
1001
/* take care of EXECUTE instructions */
1002
if (scb_s->icptstatus & 1) {
1003
ilen = (scb_s->icptstatus >> 4) & 0x6;
1004
if (!ilen)
1005
ilen = 4;
1006
}
1007
scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, ilen);
1008
clear_vsie_icpt(vsie_page);
1009
}
1010
1011
/*
1012
* Try to shadow + enable the guest 2 provided facility list.
1013
* Retry instruction execution if enabled for and provided by guest 2.
1014
*
1015
* Returns: - 0 if handled (retry or guest 2 icpt)
1016
* - > 0 if control has to be given to guest 2
1017
*/
1018
static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
1019
{
1020
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
1021
__u32 fac = READ_ONCE(vsie_page->scb_o->fac);
1022
1023
/*
1024
* Alternate-STFLE-Interpretive-Execution facilities are not supported
1025
* -> format-0 flcb
1026
*/
1027
if (fac && test_kvm_facility(vcpu->kvm, 7)) {
1028
retry_vsie_icpt(vsie_page);
1029
/*
1030
* The facility list origin (FLO) is in bits 1 - 28 of the FLD
1031
* so we need to mask here before reading.
1032
*/
1033
fac = fac & 0x7ffffff8U;
1034
/*
1035
* format-0 -> size of nested guest's facility list == guest's size
1036
* guest's size == host's size, since STFLE is interpretatively executed
1037
* using a format-0 for the guest, too.
1038
*/
1039
if (read_guest_real(vcpu, fac, &vsie_page->fac,
1040
stfle_size() * sizeof(u64)))
1041
return set_validity_icpt(scb_s, 0x1090U);
1042
scb_s->fac = (u32)virt_to_phys(&vsie_page->fac);
1043
}
1044
return 0;
1045
}
1046
1047
/*
1048
* Get a register for a nested guest.
1049
* @vcpu the vcpu of the guest
1050
* @vsie_page the vsie_page for the nested guest
1051
* @reg the register number, the upper 4 bits are ignored.
1052
* returns: the value of the register.
1053
*/
1054
static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, u8 reg)
1055
{
1056
/* no need to validate the parameter and/or perform error handling */
1057
reg &= 0xf;
1058
switch (reg) {
1059
case 15:
1060
return vsie_page->scb_s.gg15;
1061
case 14:
1062
return vsie_page->scb_s.gg14;
1063
default:
1064
return vcpu->run->s.regs.gprs[reg];
1065
}
1066
}
1067
1068
static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
1069
{
1070
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
1071
unsigned long pei_dest, pei_src, src, dest, mask, prefix;
1072
u64 *pei_block = &vsie_page->scb_o->mcic;
1073
int edat, rc_dest, rc_src;
1074
union ctlreg0 cr0;
1075
1076
cr0.val = vcpu->arch.sie_block->gcr[0];
1077
edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
1078
mask = _kvm_s390_logical_to_effective(&scb_s->gpsw, PAGE_MASK);
1079
prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
1080
1081
dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) & mask;
1082
dest = _kvm_s390_real_to_abs(prefix, dest) + scb_s->mso;
1083
src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask;
1084
src = _kvm_s390_real_to_abs(prefix, src) + scb_s->mso;
1085
1086
rc_dest = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, &pei_dest);
1087
rc_src = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, &pei_src);
1088
/*
1089
* Either everything went well, or something non-critical went wrong
1090
* e.g. because of a race. In either case, simply retry.
1091
*/
1092
if (rc_dest == -EAGAIN || rc_src == -EAGAIN || (!rc_dest && !rc_src)) {
1093
retry_vsie_icpt(vsie_page);
1094
return -EAGAIN;
1095
}
1096
/* Something more serious went wrong, propagate the error */
1097
if (rc_dest < 0)
1098
return rc_dest;
1099
if (rc_src < 0)
1100
return rc_src;
1101
1102
/* The only possible suppressing exception: just deliver it */
1103
if (rc_dest == PGM_TRANSLATION_SPEC || rc_src == PGM_TRANSLATION_SPEC) {
1104
clear_vsie_icpt(vsie_page);
1105
rc_dest = kvm_s390_inject_program_int(vcpu, PGM_TRANSLATION_SPEC);
1106
WARN_ON_ONCE(rc_dest);
1107
return 1;
1108
}
1109
1110
/*
1111
* Forward the PEI intercept to the guest if it was a page fault, or
1112
* also for segment and region table faults if EDAT applies.
1113
*/
1114
if (edat) {
1115
rc_dest = rc_dest == PGM_ASCE_TYPE ? rc_dest : 0;
1116
rc_src = rc_src == PGM_ASCE_TYPE ? rc_src : 0;
1117
} else {
1118
rc_dest = rc_dest != PGM_PAGE_TRANSLATION ? rc_dest : 0;
1119
rc_src = rc_src != PGM_PAGE_TRANSLATION ? rc_src : 0;
1120
}
1121
if (!rc_dest && !rc_src) {
1122
pei_block[0] = pei_dest;
1123
pei_block[1] = pei_src;
1124
return 1;
1125
}
1126
1127
retry_vsie_icpt(vsie_page);
1128
1129
/*
1130
* The host has edat, and the guest does not, or it was an ASCE type
1131
* exception. The host needs to inject the appropriate DAT interrupts
1132
* into the guest.
1133
*/
1134
if (rc_dest)
1135
return inject_fault(vcpu, rc_dest, dest, 1);
1136
return inject_fault(vcpu, rc_src, src, 0);
1137
}
1138
1139
/*
1140
* Run the vsie on a shadow scb and a shadow gmap, without any further
1141
* sanity checks, handling SIE faults.
1142
*
1143
* Returns: - 0 everything went fine
1144
* - > 0 if control has to be given to guest 2
1145
* - < 0 if an error occurred
1146
*/
1147
static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
1148
__releases(vcpu->kvm->srcu)
1149
__acquires(vcpu->kvm->srcu)
1150
{
1151
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
1152
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
1153
int guest_bp_isolation;
1154
int rc = 0;
1155
1156
handle_last_fault(vcpu, vsie_page);
1157
1158
kvm_vcpu_srcu_read_unlock(vcpu);
1159
1160
/* save current guest state of bp isolation override */
1161
guest_bp_isolation = test_thread_flag(TIF_ISOLATE_BP_GUEST);
1162
1163
/*
1164
* The guest is running with BPBC, so we have to force it on for our
1165
* nested guest. This is done by enabling BPBC globally, so the BPBC
1166
* control in the SCB (which the nested guest can modify) is simply
1167
* ignored.
1168
*/
1169
if (test_kvm_facility(vcpu->kvm, 82) &&
1170
vcpu->arch.sie_block->fpf & FPF_BPBC)
1171
set_thread_flag(TIF_ISOLATE_BP_GUEST);
1172
1173
/*
1174
* Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
1175
* and VCPU requests also hinder the vSIE from running and lead
1176
* to an immediate exit. kvm_s390_vsie_kick() has to be used to
1177
* also kick the vSIE.
1178
*/
1179
vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
1180
current->thread.gmap_int_code = 0;
1181
barrier();
1182
if (!kvm_s390_vcpu_sie_inhibited(vcpu)) {
1183
local_irq_disable();
1184
guest_timing_enter_irqoff();
1185
rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
1186
guest_timing_exit_irqoff();
1187
local_irq_enable();
1188
}
1189
barrier();
1190
vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
1191
1192
/* restore guest state for bp isolation override */
1193
if (!guest_bp_isolation)
1194
clear_thread_flag(TIF_ISOLATE_BP_GUEST);
1195
1196
kvm_vcpu_srcu_read_lock(vcpu);
1197
1198
if (rc == -EINTR) {
1199
VCPU_EVENT(vcpu, 3, "%s", "machine check");
1200
kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info);
1201
return 0;
1202
}
1203
1204
if (rc > 0)
1205
rc = 0; /* we could still have an icpt */
1206
else if (current->thread.gmap_int_code)
1207
return handle_fault(vcpu, vsie_page);
1208
1209
switch (scb_s->icptcode) {
1210
case ICPT_INST:
1211
if (scb_s->ipa == 0xb2b0)
1212
rc = handle_stfle(vcpu, vsie_page);
1213
break;
1214
case ICPT_STOP:
1215
/* stop not requested by g2 - must have been a kick */
1216
if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT))
1217
clear_vsie_icpt(vsie_page);
1218
break;
1219
case ICPT_VALIDITY:
1220
if ((scb_s->ipa & 0xf000) != 0xf000)
1221
scb_s->ipa += 0x1000;
1222
break;
1223
case ICPT_PARTEXEC:
1224
if (scb_s->ipa == 0xb254)
1225
rc = vsie_handle_mvpg(vcpu, vsie_page);
1226
break;
1227
}
1228
return rc;
1229
}
1230
1231
static void release_gmap_shadow(struct vsie_page *vsie_page)
1232
{
1233
if (vsie_page->gmap)
1234
gmap_put(vsie_page->gmap);
1235
WRITE_ONCE(vsie_page->gmap, NULL);
1236
prefix_unmapped(vsie_page);
1237
}
1238
1239
static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
1240
struct vsie_page *vsie_page)
1241
{
1242
unsigned long asce;
1243
union ctlreg0 cr0;
1244
struct gmap *gmap;
1245
int edat;
1246
1247
asce = vcpu->arch.sie_block->gcr[1];
1248
cr0.val = vcpu->arch.sie_block->gcr[0];
1249
edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
1250
edat += edat && test_kvm_facility(vcpu->kvm, 78);
1251
1252
/*
1253
* ASCE or EDAT could have changed since last icpt, or the gmap
1254
* we're holding has been unshadowed. If the gmap is still valid,
1255
* we can safely reuse it.
1256
*/
1257
if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat)) {
1258
vcpu->kvm->stat.gmap_shadow_reuse++;
1259
return 0;
1260
}
1261
1262
/* release the old shadow - if any, and mark the prefix as unmapped */
1263
release_gmap_shadow(vsie_page);
1264
gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
1265
if (IS_ERR(gmap))
1266
return PTR_ERR(gmap);
1267
vcpu->kvm->stat.gmap_shadow_create++;
1268
WRITE_ONCE(vsie_page->gmap, gmap);
1269
return 0;
1270
}
1271
1272
/*
1273
* Register the shadow scb at the VCPU, e.g. for kicking out of vsie.
1274
*/
1275
static void register_shadow_scb(struct kvm_vcpu *vcpu,
1276
struct vsie_page *vsie_page)
1277
{
1278
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
1279
1280
WRITE_ONCE(vcpu->arch.vsie_block, &vsie_page->scb_s);
1281
/*
1282
* External calls have to lead to a kick of the vcpu and
1283
* therefore the vsie -> Simulate Wait state.
1284
*/
1285
kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
1286
/*
1287
* We have to adjust the g3 epoch by the g2 epoch. The epoch will
1288
* automatically be adjusted on tod clock changes via kvm_sync_clock.
1289
*/
1290
preempt_disable();
1291
scb_s->epoch += vcpu->kvm->arch.epoch;
1292
1293
if (scb_s->ecd & ECD_MEF) {
1294
scb_s->epdx += vcpu->kvm->arch.epdx;
1295
if (scb_s->epoch < vcpu->kvm->arch.epoch)
1296
scb_s->epdx += 1;
1297
}
1298
1299
preempt_enable();
1300
}
1301
1302
/*
1303
* Unregister a shadow scb from a VCPU.
1304
*/
1305
static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
1306
{
1307
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
1308
WRITE_ONCE(vcpu->arch.vsie_block, NULL);
1309
}
1310
1311
/*
1312
* Run the vsie on a shadowed scb, managing the gmap shadow, handling
1313
* prefix pages and faults.
1314
*
1315
* Returns: - 0 if no errors occurred
1316
* - > 0 if control has to be given to guest 2
1317
* - -ENOMEM if out of memory
1318
*/
1319
static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
1320
{
1321
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
1322
int rc = 0;
1323
1324
while (1) {
1325
rc = acquire_gmap_shadow(vcpu, vsie_page);
1326
if (!rc)
1327
rc = map_prefix(vcpu, vsie_page);
1328
if (!rc) {
1329
update_intervention_requests(vsie_page);
1330
rc = do_vsie_run(vcpu, vsie_page);
1331
}
1332
atomic_andnot(PROG_BLOCK_SIE, &scb_s->prog20);
1333
1334
if (rc == -EAGAIN)
1335
rc = 0;
1336
1337
/*
1338
* Exit the loop if the guest needs to process the intercept
1339
*/
1340
if (rc || scb_s->icptcode)
1341
break;
1342
1343
/*
1344
* Exit the loop if the host needs to process an intercept,
1345
* but rewind the PSW to re-enter SIE once that's completed
1346
* instead of passing a "no action" intercept to the guest.
1347
*/
1348
if (signal_pending(current) ||
1349
kvm_s390_vcpu_has_irq(vcpu, 0) ||
1350
kvm_s390_vcpu_sie_inhibited(vcpu)) {
1351
kvm_s390_rewind_psw(vcpu, 4);
1352
break;
1353
}
1354
cond_resched();
1355
}
1356
1357
if (rc == -EFAULT) {
1358
/*
1359
* Addressing exceptions are always presentes as intercepts.
1360
* As addressing exceptions are suppressing and our guest 3 PSW
1361
* points at the responsible instruction, we have to
1362
* forward the PSW and set the ilc. If we can't read guest 3
1363
* instruction, we can use an arbitrary ilc. Let's always use
1364
* ilen = 4 for now, so we can avoid reading in guest 3 virtual
1365
* memory. (we could also fake the shadow so the hardware
1366
* handles it).
1367
*/
1368
scb_s->icptcode = ICPT_PROGI;
1369
scb_s->iprcc = PGM_ADDRESSING;
1370
scb_s->pgmilc = 4;
1371
scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
1372
rc = 1;
1373
}
1374
return rc;
1375
}
1376
1377
/* Try getting a given vsie page, returning "true" on success. */
1378
static inline bool try_get_vsie_page(struct vsie_page *vsie_page)
1379
{
1380
if (test_bit(VSIE_PAGE_IN_USE, &vsie_page->flags))
1381
return false;
1382
return !test_and_set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
1383
}
1384
1385
/* Put a vsie page acquired through get_vsie_page / try_get_vsie_page. */
1386
static void put_vsie_page(struct vsie_page *vsie_page)
1387
{
1388
clear_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
1389
}
1390
1391
/*
1392
* Get or create a vsie page for a scb address.
1393
*
1394
* Returns: - address of a vsie page (cached or new one)
1395
* - NULL if the same scb address is already used by another VCPU
1396
* - ERR_PTR(-ENOMEM) if out of memory
1397
*/
1398
static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
1399
{
1400
struct vsie_page *vsie_page;
1401
int nr_vcpus;
1402
1403
rcu_read_lock();
1404
vsie_page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
1405
rcu_read_unlock();
1406
if (vsie_page) {
1407
if (try_get_vsie_page(vsie_page)) {
1408
if (vsie_page->scb_gpa == addr)
1409
return vsie_page;
1410
/*
1411
* We raced with someone reusing + putting this vsie
1412
* page before we grabbed it.
1413
*/
1414
put_vsie_page(vsie_page);
1415
}
1416
}
1417
1418
/*
1419
* We want at least #online_vcpus shadows, so every VCPU can execute
1420
* the VSIE in parallel.
1421
*/
1422
nr_vcpus = atomic_read(&kvm->online_vcpus);
1423
1424
mutex_lock(&kvm->arch.vsie.mutex);
1425
if (kvm->arch.vsie.page_count < nr_vcpus) {
1426
vsie_page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
1427
if (!vsie_page) {
1428
mutex_unlock(&kvm->arch.vsie.mutex);
1429
return ERR_PTR(-ENOMEM);
1430
}
1431
__set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags);
1432
kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = vsie_page;
1433
kvm->arch.vsie.page_count++;
1434
} else {
1435
/* reuse an existing entry that belongs to nobody */
1436
while (true) {
1437
vsie_page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
1438
if (try_get_vsie_page(vsie_page))
1439
break;
1440
kvm->arch.vsie.next++;
1441
kvm->arch.vsie.next %= nr_vcpus;
1442
}
1443
if (vsie_page->scb_gpa != ULONG_MAX)
1444
radix_tree_delete(&kvm->arch.vsie.addr_to_page,
1445
vsie_page->scb_gpa >> 9);
1446
}
1447
/* Mark it as invalid until it resides in the tree. */
1448
vsie_page->scb_gpa = ULONG_MAX;
1449
1450
/* Double use of the same address or allocation failure. */
1451
if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9,
1452
vsie_page)) {
1453
put_vsie_page(vsie_page);
1454
mutex_unlock(&kvm->arch.vsie.mutex);
1455
return NULL;
1456
}
1457
vsie_page->scb_gpa = addr;
1458
mutex_unlock(&kvm->arch.vsie.mutex);
1459
1460
memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
1461
release_gmap_shadow(vsie_page);
1462
vsie_page->fault_addr = 0;
1463
vsie_page->scb_s.ihcpu = 0xffffU;
1464
return vsie_page;
1465
}
1466
1467
int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
1468
{
1469
struct vsie_page *vsie_page;
1470
unsigned long scb_addr;
1471
int rc;
1472
1473
vcpu->stat.instruction_sie++;
1474
if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2))
1475
return -EOPNOTSUPP;
1476
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
1477
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
1478
1479
BUILD_BUG_ON(sizeof(struct vsie_page) != PAGE_SIZE);
1480
scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
1481
1482
/* 512 byte alignment */
1483
if (unlikely(scb_addr & 0x1ffUL))
1484
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
1485
1486
if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) ||
1487
kvm_s390_vcpu_sie_inhibited(vcpu)) {
1488
kvm_s390_rewind_psw(vcpu, 4);
1489
return 0;
1490
}
1491
1492
vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
1493
if (IS_ERR(vsie_page))
1494
return PTR_ERR(vsie_page);
1495
else if (!vsie_page)
1496
/* double use of sie control block - simply do nothing */
1497
return 0;
1498
1499
rc = pin_scb(vcpu, vsie_page, scb_addr);
1500
if (rc)
1501
goto out_put;
1502
rc = shadow_scb(vcpu, vsie_page);
1503
if (rc)
1504
goto out_unpin_scb;
1505
rc = pin_blocks(vcpu, vsie_page);
1506
if (rc)
1507
goto out_unshadow;
1508
register_shadow_scb(vcpu, vsie_page);
1509
rc = vsie_run(vcpu, vsie_page);
1510
unregister_shadow_scb(vcpu);
1511
unpin_blocks(vcpu, vsie_page);
1512
out_unshadow:
1513
unshadow_scb(vcpu, vsie_page);
1514
out_unpin_scb:
1515
unpin_scb(vcpu, vsie_page, scb_addr);
1516
out_put:
1517
put_vsie_page(vsie_page);
1518
1519
return rc < 0 ? rc : 0;
1520
}
1521
1522
/* Init the vsie data structures. To be called when a vm is initialized. */
1523
void kvm_s390_vsie_init(struct kvm *kvm)
1524
{
1525
mutex_init(&kvm->arch.vsie.mutex);
1526
INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL_ACCOUNT);
1527
}
1528
1529
/* Destroy the vsie data structures. To be called when a vm is destroyed. */
1530
void kvm_s390_vsie_destroy(struct kvm *kvm)
1531
{
1532
struct vsie_page *vsie_page;
1533
int i;
1534
1535
mutex_lock(&kvm->arch.vsie.mutex);
1536
for (i = 0; i < kvm->arch.vsie.page_count; i++) {
1537
vsie_page = kvm->arch.vsie.pages[i];
1538
kvm->arch.vsie.pages[i] = NULL;
1539
release_gmap_shadow(vsie_page);
1540
/* free the radix tree entry */
1541
if (vsie_page->scb_gpa != ULONG_MAX)
1542
radix_tree_delete(&kvm->arch.vsie.addr_to_page,
1543
vsie_page->scb_gpa >> 9);
1544
free_page((unsigned long)vsie_page);
1545
}
1546
kvm->arch.vsie.page_count = 0;
1547
mutex_unlock(&kvm->arch.vsie.mutex);
1548
}
1549
1550
void kvm_s390_vsie_kick(struct kvm_vcpu *vcpu)
1551
{
1552
struct kvm_s390_sie_block *scb = READ_ONCE(vcpu->arch.vsie_block);
1553
1554
/*
1555
* Even if the VCPU lets go of the shadow sie block reference, it is
1556
* still valid in the cache. So we can safely kick it.
1557
*/
1558
if (scb) {
1559
atomic_or(PROG_BLOCK_SIE, &scb->prog20);
1560
if (scb->prog0c & PROG_IN_SIE)
1561
atomic_or(CPUSTAT_STOP_INT, &scb->cpuflags);
1562
}
1563
}
1564
1565