Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/s390/kvm/kvm-s390.c
26442 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* hosting IBM Z kernel virtual machines (s390x)
4
*
5
* Copyright IBM Corp. 2008, 2020
6
*
7
* Author(s): Carsten Otte <[email protected]>
8
* Christian Borntraeger <[email protected]>
9
* Christian Ehrhardt <[email protected]>
10
* Jason J. Herne <[email protected]>
11
*/
12
13
#define KMSG_COMPONENT "kvm-s390"
14
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
15
16
#include <linux/compiler.h>
17
#include <linux/export.h>
18
#include <linux/err.h>
19
#include <linux/fs.h>
20
#include <linux/hrtimer.h>
21
#include <linux/init.h>
22
#include <linux/kvm.h>
23
#include <linux/kvm_host.h>
24
#include <linux/mman.h>
25
#include <linux/module.h>
26
#include <linux/moduleparam.h>
27
#include <linux/cpufeature.h>
28
#include <linux/random.h>
29
#include <linux/slab.h>
30
#include <linux/timer.h>
31
#include <linux/vmalloc.h>
32
#include <linux/bitmap.h>
33
#include <linux/sched/signal.h>
34
#include <linux/string.h>
35
#include <linux/pgtable.h>
36
#include <linux/mmu_notifier.h>
37
38
#include <asm/access-regs.h>
39
#include <asm/asm-offsets.h>
40
#include <asm/lowcore.h>
41
#include <asm/machine.h>
42
#include <asm/stp.h>
43
#include <asm/gmap.h>
44
#include <asm/gmap_helpers.h>
45
#include <asm/nmi.h>
46
#include <asm/isc.h>
47
#include <asm/sclp.h>
48
#include <asm/cpacf.h>
49
#include <asm/timex.h>
50
#include <asm/asm.h>
51
#include <asm/fpu.h>
52
#include <asm/ap.h>
53
#include <asm/uv.h>
54
#include "kvm-s390.h"
55
#include "gaccess.h"
56
#include "pci.h"
57
58
#define CREATE_TRACE_POINTS
59
#include "trace.h"
60
#include "trace-s390.h"
61
62
#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
63
#define LOCAL_IRQS 32
64
#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
65
(KVM_MAX_VCPUS + LOCAL_IRQS))
66
67
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
68
KVM_GENERIC_VM_STATS(),
69
STATS_DESC_COUNTER(VM, inject_io),
70
STATS_DESC_COUNTER(VM, inject_float_mchk),
71
STATS_DESC_COUNTER(VM, inject_pfault_done),
72
STATS_DESC_COUNTER(VM, inject_service_signal),
73
STATS_DESC_COUNTER(VM, inject_virtio),
74
STATS_DESC_COUNTER(VM, aen_forward),
75
STATS_DESC_COUNTER(VM, gmap_shadow_reuse),
76
STATS_DESC_COUNTER(VM, gmap_shadow_create),
77
STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry),
78
STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry),
79
STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry),
80
STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry),
81
STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry),
82
};
83
84
const struct kvm_stats_header kvm_vm_stats_header = {
85
.name_size = KVM_STATS_NAME_SIZE,
86
.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
87
.id_offset = sizeof(struct kvm_stats_header),
88
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
89
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
90
sizeof(kvm_vm_stats_desc),
91
};
92
93
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
94
KVM_GENERIC_VCPU_STATS(),
95
STATS_DESC_COUNTER(VCPU, exit_userspace),
96
STATS_DESC_COUNTER(VCPU, exit_null),
97
STATS_DESC_COUNTER(VCPU, exit_external_request),
98
STATS_DESC_COUNTER(VCPU, exit_io_request),
99
STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
100
STATS_DESC_COUNTER(VCPU, exit_stop_request),
101
STATS_DESC_COUNTER(VCPU, exit_validity),
102
STATS_DESC_COUNTER(VCPU, exit_instruction),
103
STATS_DESC_COUNTER(VCPU, exit_pei),
104
STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
105
STATS_DESC_COUNTER(VCPU, instruction_lctl),
106
STATS_DESC_COUNTER(VCPU, instruction_lctlg),
107
STATS_DESC_COUNTER(VCPU, instruction_stctl),
108
STATS_DESC_COUNTER(VCPU, instruction_stctg),
109
STATS_DESC_COUNTER(VCPU, exit_program_interruption),
110
STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
111
STATS_DESC_COUNTER(VCPU, exit_operation_exception),
112
STATS_DESC_COUNTER(VCPU, deliver_ckc),
113
STATS_DESC_COUNTER(VCPU, deliver_cputm),
114
STATS_DESC_COUNTER(VCPU, deliver_external_call),
115
STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
116
STATS_DESC_COUNTER(VCPU, deliver_service_signal),
117
STATS_DESC_COUNTER(VCPU, deliver_virtio),
118
STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
119
STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
120
STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
121
STATS_DESC_COUNTER(VCPU, deliver_program),
122
STATS_DESC_COUNTER(VCPU, deliver_io),
123
STATS_DESC_COUNTER(VCPU, deliver_machine_check),
124
STATS_DESC_COUNTER(VCPU, exit_wait_state),
125
STATS_DESC_COUNTER(VCPU, inject_ckc),
126
STATS_DESC_COUNTER(VCPU, inject_cputm),
127
STATS_DESC_COUNTER(VCPU, inject_external_call),
128
STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
129
STATS_DESC_COUNTER(VCPU, inject_mchk),
130
STATS_DESC_COUNTER(VCPU, inject_pfault_init),
131
STATS_DESC_COUNTER(VCPU, inject_program),
132
STATS_DESC_COUNTER(VCPU, inject_restart),
133
STATS_DESC_COUNTER(VCPU, inject_set_prefix),
134
STATS_DESC_COUNTER(VCPU, inject_stop_signal),
135
STATS_DESC_COUNTER(VCPU, instruction_epsw),
136
STATS_DESC_COUNTER(VCPU, instruction_gs),
137
STATS_DESC_COUNTER(VCPU, instruction_io_other),
138
STATS_DESC_COUNTER(VCPU, instruction_lpsw),
139
STATS_DESC_COUNTER(VCPU, instruction_lpswe),
140
STATS_DESC_COUNTER(VCPU, instruction_lpswey),
141
STATS_DESC_COUNTER(VCPU, instruction_pfmf),
142
STATS_DESC_COUNTER(VCPU, instruction_ptff),
143
STATS_DESC_COUNTER(VCPU, instruction_sck),
144
STATS_DESC_COUNTER(VCPU, instruction_sckpf),
145
STATS_DESC_COUNTER(VCPU, instruction_stidp),
146
STATS_DESC_COUNTER(VCPU, instruction_spx),
147
STATS_DESC_COUNTER(VCPU, instruction_stpx),
148
STATS_DESC_COUNTER(VCPU, instruction_stap),
149
STATS_DESC_COUNTER(VCPU, instruction_iske),
150
STATS_DESC_COUNTER(VCPU, instruction_ri),
151
STATS_DESC_COUNTER(VCPU, instruction_rrbe),
152
STATS_DESC_COUNTER(VCPU, instruction_sske),
153
STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
154
STATS_DESC_COUNTER(VCPU, instruction_stsi),
155
STATS_DESC_COUNTER(VCPU, instruction_stfl),
156
STATS_DESC_COUNTER(VCPU, instruction_tb),
157
STATS_DESC_COUNTER(VCPU, instruction_tpi),
158
STATS_DESC_COUNTER(VCPU, instruction_tprot),
159
STATS_DESC_COUNTER(VCPU, instruction_tsch),
160
STATS_DESC_COUNTER(VCPU, instruction_sie),
161
STATS_DESC_COUNTER(VCPU, instruction_essa),
162
STATS_DESC_COUNTER(VCPU, instruction_sthyi),
163
STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
164
STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
165
STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
166
STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
167
STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
168
STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
169
STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
170
STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
171
STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
172
STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
173
STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
174
STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
175
STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
176
STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
177
STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
178
STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
179
STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
180
STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
181
STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
182
STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
183
STATS_DESC_COUNTER(VCPU, diag_9c_forward),
184
STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
185
STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
186
STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
187
STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
188
STATS_DESC_COUNTER(VCPU, pfault_sync)
189
};
190
191
const struct kvm_stats_header kvm_vcpu_stats_header = {
192
.name_size = KVM_STATS_NAME_SIZE,
193
.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
194
.id_offset = sizeof(struct kvm_stats_header),
195
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
196
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
197
sizeof(kvm_vcpu_stats_desc),
198
};
199
200
/* allow nested virtualization in KVM (if enabled by user space) */
201
static int nested;
202
module_param(nested, int, S_IRUGO);
203
MODULE_PARM_DESC(nested, "Nested virtualization support");
204
205
/* allow 1m huge page guest backing, if !nested */
206
static int hpage;
207
module_param(hpage, int, 0444);
208
MODULE_PARM_DESC(hpage, "1m huge page backing support");
209
210
/* maximum percentage of steal time for polling. >100 is treated like 100 */
211
static u8 halt_poll_max_steal = 10;
212
module_param(halt_poll_max_steal, byte, 0644);
213
MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
214
215
/* if set to true, the GISA will be initialized and used if available */
216
static bool use_gisa = true;
217
module_param(use_gisa, bool, 0644);
218
MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
219
220
/* maximum diag9c forwarding per second */
221
unsigned int diag9c_forwarding_hz;
222
module_param(diag9c_forwarding_hz, uint, 0644);
223
MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
224
225
/*
226
* allow asynchronous deinit for protected guests; enable by default since
227
* the feature is opt-in anyway
228
*/
229
static int async_destroy = 1;
230
module_param(async_destroy, int, 0444);
231
MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
232
233
/*
234
* For now we handle at most 16 double words as this is what the s390 base
235
* kernel handles and stores in the prefix page. If we ever need to go beyond
236
* this, this requires changes to code, but the external uapi can stay.
237
*/
238
#define SIZE_INTERNAL 16
239
240
/*
241
* Base feature mask that defines default mask for facilities. Consists of the
242
* defines in FACILITIES_KVM and the non-hypervisor managed bits.
243
*/
244
static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
245
/*
246
* Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
247
* and defines the facilities that can be enabled via a cpu model.
248
*/
249
static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
250
251
static unsigned long kvm_s390_fac_size(void)
252
{
253
BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
254
BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
255
BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
256
sizeof(stfle_fac_list));
257
258
return SIZE_INTERNAL;
259
}
260
261
/* available cpu features supported by kvm */
262
static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
263
/* available subfunctions indicated via query / "test bit" */
264
static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
265
266
static struct gmap_notifier gmap_notifier;
267
static struct gmap_notifier vsie_gmap_notifier;
268
debug_info_t *kvm_s390_dbf;
269
debug_info_t *kvm_s390_dbf_uv;
270
271
/* Section: not file related */
272
/* forward declarations */
273
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
274
unsigned long end);
275
static int sca_switch_to_extended(struct kvm *kvm);
276
277
static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
278
{
279
u8 delta_idx = 0;
280
281
/*
282
* The TOD jumps by delta, we have to compensate this by adding
283
* -delta to the epoch.
284
*/
285
delta = -delta;
286
287
/* sign-extension - we're adding to signed values below */
288
if ((s64)delta < 0)
289
delta_idx = -1;
290
291
scb->epoch += delta;
292
if (scb->ecd & ECD_MEF) {
293
scb->epdx += delta_idx;
294
if (scb->epoch < delta)
295
scb->epdx += 1;
296
}
297
}
298
299
/*
300
* This callback is executed during stop_machine(). All CPUs are therefore
301
* temporarily stopped. In order not to change guest behavior, we have to
302
* disable preemption whenever we touch the epoch of kvm and the VCPUs,
303
* so a CPU won't be stopped while calculating with the epoch.
304
*/
305
static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
306
void *v)
307
{
308
struct kvm *kvm;
309
struct kvm_vcpu *vcpu;
310
unsigned long i;
311
unsigned long long *delta = v;
312
313
list_for_each_entry(kvm, &vm_list, vm_list) {
314
kvm_for_each_vcpu(i, vcpu, kvm) {
315
kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
316
if (i == 0) {
317
kvm->arch.epoch = vcpu->arch.sie_block->epoch;
318
kvm->arch.epdx = vcpu->arch.sie_block->epdx;
319
}
320
if (vcpu->arch.cputm_enabled)
321
vcpu->arch.cputm_start += *delta;
322
if (vcpu->arch.vsie_block)
323
kvm_clock_sync_scb(vcpu->arch.vsie_block,
324
*delta);
325
}
326
}
327
return NOTIFY_OK;
328
}
329
330
static struct notifier_block kvm_clock_notifier = {
331
.notifier_call = kvm_clock_sync,
332
};
333
334
static void allow_cpu_feat(unsigned long nr)
335
{
336
set_bit_inv(nr, kvm_s390_available_cpu_feat);
337
}
338
339
static inline int plo_test_bit(unsigned char nr)
340
{
341
unsigned long function = (unsigned long)nr | 0x100;
342
int cc;
343
344
asm volatile(
345
" lgr 0,%[function]\n"
346
/* Parameter registers are ignored for "test bit" */
347
" plo 0,0,0,0(0)\n"
348
CC_IPM(cc)
349
: CC_OUT(cc, cc)
350
: [function] "d" (function)
351
: CC_CLOBBER_LIST("0"));
352
return CC_TRANSFORM(cc) == 0;
353
}
354
355
static __always_inline void pfcr_query(u8 (*query)[16])
356
{
357
asm volatile(
358
" lghi 0,0\n"
359
" .insn rsy,0xeb0000000016,0,0,%[query]\n"
360
: [query] "=QS" (*query)
361
:
362
: "cc", "0");
363
}
364
365
static __always_inline void __sortl_query(u8 (*query)[32])
366
{
367
asm volatile(
368
" lghi 0,0\n"
369
" la 1,%[query]\n"
370
/* Parameter registers are ignored */
371
" .insn rre,0xb9380000,2,4\n"
372
: [query] "=R" (*query)
373
:
374
: "cc", "0", "1");
375
}
376
377
static __always_inline void __dfltcc_query(u8 (*query)[32])
378
{
379
asm volatile(
380
" lghi 0,0\n"
381
" la 1,%[query]\n"
382
/* Parameter registers are ignored */
383
" .insn rrf,0xb9390000,2,4,6,0\n"
384
: [query] "=R" (*query)
385
:
386
: "cc", "0", "1");
387
}
388
389
static void __init kvm_s390_cpu_feat_init(void)
390
{
391
int i;
392
393
for (i = 0; i < 256; ++i) {
394
if (plo_test_bit(i))
395
kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
396
}
397
398
if (test_facility(28)) /* TOD-clock steering */
399
ptff(kvm_s390_available_subfunc.ptff,
400
sizeof(kvm_s390_available_subfunc.ptff),
401
PTFF_QAF);
402
403
if (test_facility(17)) { /* MSA */
404
__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
405
kvm_s390_available_subfunc.kmac);
406
__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
407
kvm_s390_available_subfunc.kmc);
408
__cpacf_query(CPACF_KM, (cpacf_mask_t *)
409
kvm_s390_available_subfunc.km);
410
__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
411
kvm_s390_available_subfunc.kimd);
412
__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
413
kvm_s390_available_subfunc.klmd);
414
}
415
if (test_facility(76)) /* MSA3 */
416
__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
417
kvm_s390_available_subfunc.pckmo);
418
if (test_facility(77)) { /* MSA4 */
419
__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
420
kvm_s390_available_subfunc.kmctr);
421
__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
422
kvm_s390_available_subfunc.kmf);
423
__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
424
kvm_s390_available_subfunc.kmo);
425
__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
426
kvm_s390_available_subfunc.pcc);
427
}
428
if (test_facility(57)) /* MSA5 */
429
__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
430
kvm_s390_available_subfunc.ppno);
431
432
if (test_facility(146)) /* MSA8 */
433
__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
434
kvm_s390_available_subfunc.kma);
435
436
if (test_facility(155)) /* MSA9 */
437
__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
438
kvm_s390_available_subfunc.kdsa);
439
440
if (test_facility(150)) /* SORTL */
441
__sortl_query(&kvm_s390_available_subfunc.sortl);
442
443
if (test_facility(151)) /* DFLTCC */
444
__dfltcc_query(&kvm_s390_available_subfunc.dfltcc);
445
446
if (test_facility(201)) /* PFCR */
447
pfcr_query(&kvm_s390_available_subfunc.pfcr);
448
449
if (machine_has_esop())
450
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
451
/*
452
* We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
453
* 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
454
*/
455
if (!sclp.has_sief2 || !machine_has_esop() || !sclp.has_64bscao ||
456
!test_facility(3) || !nested)
457
return;
458
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
459
if (sclp.has_64bscao)
460
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
461
if (sclp.has_siif)
462
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
463
if (sclp.has_gpere)
464
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
465
if (sclp.has_gsls)
466
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
467
if (sclp.has_ib)
468
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
469
if (sclp.has_cei)
470
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
471
if (sclp.has_ibs)
472
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
473
if (sclp.has_kss)
474
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
475
/*
476
* KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
477
* all skey handling functions read/set the skey from the PGSTE
478
* instead of the real storage key.
479
*
480
* KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
481
* pages being detected as preserved although they are resident.
482
*
483
* KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
484
* have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
485
*
486
* For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
487
* KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
488
* correctly shadowed. We can do that for the PGSTE but not for PTE.I.
489
*
490
* KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
491
* cannot easily shadow the SCA because of the ipte lock.
492
*/
493
}
494
495
static int __init __kvm_s390_init(void)
496
{
497
int rc = -ENOMEM;
498
499
kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
500
if (!kvm_s390_dbf)
501
return -ENOMEM;
502
503
kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
504
if (!kvm_s390_dbf_uv)
505
goto err_kvm_uv;
506
507
if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
508
debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
509
goto err_debug_view;
510
511
kvm_s390_cpu_feat_init();
512
513
/* Register floating interrupt controller interface. */
514
rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
515
if (rc) {
516
pr_err("A FLIC registration call failed with rc=%d\n", rc);
517
goto err_flic;
518
}
519
520
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
521
rc = kvm_s390_pci_init();
522
if (rc) {
523
pr_err("Unable to allocate AIFT for PCI\n");
524
goto err_pci;
525
}
526
}
527
528
rc = kvm_s390_gib_init(GAL_ISC);
529
if (rc)
530
goto err_gib;
531
532
gmap_notifier.notifier_call = kvm_gmap_notifier;
533
gmap_register_pte_notifier(&gmap_notifier);
534
vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
535
gmap_register_pte_notifier(&vsie_gmap_notifier);
536
atomic_notifier_chain_register(&s390_epoch_delta_notifier,
537
&kvm_clock_notifier);
538
539
return 0;
540
541
err_gib:
542
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
543
kvm_s390_pci_exit();
544
err_pci:
545
err_flic:
546
err_debug_view:
547
debug_unregister(kvm_s390_dbf_uv);
548
err_kvm_uv:
549
debug_unregister(kvm_s390_dbf);
550
return rc;
551
}
552
553
static void __kvm_s390_exit(void)
554
{
555
gmap_unregister_pte_notifier(&gmap_notifier);
556
gmap_unregister_pte_notifier(&vsie_gmap_notifier);
557
atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
558
&kvm_clock_notifier);
559
560
kvm_s390_gib_destroy();
561
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
562
kvm_s390_pci_exit();
563
debug_unregister(kvm_s390_dbf);
564
debug_unregister(kvm_s390_dbf_uv);
565
}
566
567
/* Section: device related */
568
long kvm_arch_dev_ioctl(struct file *filp,
569
unsigned int ioctl, unsigned long arg)
570
{
571
if (ioctl == KVM_S390_ENABLE_SIE)
572
return s390_enable_sie();
573
return -EINVAL;
574
}
575
576
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
577
{
578
int r;
579
580
switch (ext) {
581
case KVM_CAP_S390_PSW:
582
case KVM_CAP_S390_GMAP:
583
case KVM_CAP_SYNC_MMU:
584
#ifdef CONFIG_KVM_S390_UCONTROL
585
case KVM_CAP_S390_UCONTROL:
586
#endif
587
case KVM_CAP_ASYNC_PF:
588
case KVM_CAP_SYNC_REGS:
589
case KVM_CAP_ONE_REG:
590
case KVM_CAP_ENABLE_CAP:
591
case KVM_CAP_S390_CSS_SUPPORT:
592
case KVM_CAP_IOEVENTFD:
593
case KVM_CAP_S390_IRQCHIP:
594
case KVM_CAP_VM_ATTRIBUTES:
595
case KVM_CAP_MP_STATE:
596
case KVM_CAP_IMMEDIATE_EXIT:
597
case KVM_CAP_S390_INJECT_IRQ:
598
case KVM_CAP_S390_USER_SIGP:
599
case KVM_CAP_S390_USER_STSI:
600
case KVM_CAP_S390_SKEYS:
601
case KVM_CAP_S390_IRQ_STATE:
602
case KVM_CAP_S390_USER_INSTR0:
603
case KVM_CAP_S390_CMMA_MIGRATION:
604
case KVM_CAP_S390_AIS:
605
case KVM_CAP_S390_AIS_MIGRATION:
606
case KVM_CAP_S390_VCPU_RESETS:
607
case KVM_CAP_SET_GUEST_DEBUG:
608
case KVM_CAP_S390_DIAG318:
609
case KVM_CAP_IRQFD_RESAMPLE:
610
r = 1;
611
break;
612
case KVM_CAP_SET_GUEST_DEBUG2:
613
r = KVM_GUESTDBG_VALID_MASK;
614
break;
615
case KVM_CAP_S390_HPAGE_1M:
616
r = 0;
617
if (hpage && !(kvm && kvm_is_ucontrol(kvm)))
618
r = 1;
619
break;
620
case KVM_CAP_S390_MEM_OP:
621
r = MEM_OP_MAX_SIZE;
622
break;
623
case KVM_CAP_S390_MEM_OP_EXTENSION:
624
/*
625
* Flag bits indicating which extensions are supported.
626
* If r > 0, the base extension must also be supported/indicated,
627
* in order to maintain backwards compatibility.
628
*/
629
r = KVM_S390_MEMOP_EXTENSION_CAP_BASE |
630
KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG;
631
break;
632
case KVM_CAP_NR_VCPUS:
633
case KVM_CAP_MAX_VCPUS:
634
case KVM_CAP_MAX_VCPU_ID:
635
r = KVM_S390_BSCA_CPU_SLOTS;
636
if (!kvm_s390_use_sca_entries())
637
r = KVM_MAX_VCPUS;
638
else if (sclp.has_esca && sclp.has_64bscao)
639
r = KVM_S390_ESCA_CPU_SLOTS;
640
if (ext == KVM_CAP_NR_VCPUS)
641
r = min_t(unsigned int, num_online_cpus(), r);
642
break;
643
case KVM_CAP_S390_COW:
644
r = machine_has_esop();
645
break;
646
case KVM_CAP_S390_VECTOR_REGISTERS:
647
r = test_facility(129);
648
break;
649
case KVM_CAP_S390_RI:
650
r = test_facility(64);
651
break;
652
case KVM_CAP_S390_GS:
653
r = test_facility(133);
654
break;
655
case KVM_CAP_S390_BPB:
656
r = test_facility(82);
657
break;
658
case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
659
r = async_destroy && is_prot_virt_host();
660
break;
661
case KVM_CAP_S390_PROTECTED:
662
r = is_prot_virt_host();
663
break;
664
case KVM_CAP_S390_PROTECTED_DUMP: {
665
u64 pv_cmds_dump[] = {
666
BIT_UVC_CMD_DUMP_INIT,
667
BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
668
BIT_UVC_CMD_DUMP_CPU,
669
BIT_UVC_CMD_DUMP_COMPLETE,
670
};
671
int i;
672
673
r = is_prot_virt_host();
674
675
for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
676
if (!test_bit_inv(pv_cmds_dump[i],
677
(unsigned long *)&uv_info.inst_calls_list)) {
678
r = 0;
679
break;
680
}
681
}
682
break;
683
}
684
case KVM_CAP_S390_ZPCI_OP:
685
r = kvm_s390_pci_interp_allowed();
686
break;
687
case KVM_CAP_S390_CPU_TOPOLOGY:
688
r = test_facility(11);
689
break;
690
default:
691
r = 0;
692
}
693
return r;
694
}
695
696
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
697
{
698
int i;
699
gfn_t cur_gfn, last_gfn;
700
unsigned long gaddr, vmaddr;
701
struct gmap *gmap = kvm->arch.gmap;
702
DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
703
704
/* Loop over all guest segments */
705
cur_gfn = memslot->base_gfn;
706
last_gfn = memslot->base_gfn + memslot->npages;
707
for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
708
gaddr = gfn_to_gpa(cur_gfn);
709
vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
710
if (kvm_is_error_hva(vmaddr))
711
continue;
712
713
bitmap_zero(bitmap, _PAGE_ENTRIES);
714
gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
715
for (i = 0; i < _PAGE_ENTRIES; i++) {
716
if (test_bit(i, bitmap))
717
mark_page_dirty(kvm, cur_gfn + i);
718
}
719
720
if (fatal_signal_pending(current))
721
return;
722
cond_resched();
723
}
724
}
725
726
/* Section: vm related */
727
static void sca_del_vcpu(struct kvm_vcpu *vcpu);
728
729
/*
730
* Get (and clear) the dirty memory log for a memory slot.
731
*/
732
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
733
struct kvm_dirty_log *log)
734
{
735
int r;
736
unsigned long n;
737
struct kvm_memory_slot *memslot;
738
int is_dirty;
739
740
if (kvm_is_ucontrol(kvm))
741
return -EINVAL;
742
743
mutex_lock(&kvm->slots_lock);
744
745
r = -EINVAL;
746
if (log->slot >= KVM_USER_MEM_SLOTS)
747
goto out;
748
749
r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
750
if (r)
751
goto out;
752
753
/* Clear the dirty log */
754
if (is_dirty) {
755
n = kvm_dirty_bitmap_bytes(memslot);
756
memset(memslot->dirty_bitmap, 0, n);
757
}
758
r = 0;
759
out:
760
mutex_unlock(&kvm->slots_lock);
761
return r;
762
}
763
764
static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
765
{
766
unsigned long i;
767
struct kvm_vcpu *vcpu;
768
769
kvm_for_each_vcpu(i, vcpu, kvm) {
770
kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
771
}
772
}
773
774
int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
775
{
776
int r;
777
778
if (cap->flags)
779
return -EINVAL;
780
781
switch (cap->cap) {
782
case KVM_CAP_S390_IRQCHIP:
783
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
784
kvm->arch.use_irqchip = 1;
785
r = 0;
786
break;
787
case KVM_CAP_S390_USER_SIGP:
788
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
789
kvm->arch.user_sigp = 1;
790
r = 0;
791
break;
792
case KVM_CAP_S390_VECTOR_REGISTERS:
793
mutex_lock(&kvm->lock);
794
if (kvm->created_vcpus) {
795
r = -EBUSY;
796
} else if (cpu_has_vx()) {
797
set_kvm_facility(kvm->arch.model.fac_mask, 129);
798
set_kvm_facility(kvm->arch.model.fac_list, 129);
799
if (test_facility(134)) {
800
set_kvm_facility(kvm->arch.model.fac_mask, 134);
801
set_kvm_facility(kvm->arch.model.fac_list, 134);
802
}
803
if (test_facility(135)) {
804
set_kvm_facility(kvm->arch.model.fac_mask, 135);
805
set_kvm_facility(kvm->arch.model.fac_list, 135);
806
}
807
if (test_facility(148)) {
808
set_kvm_facility(kvm->arch.model.fac_mask, 148);
809
set_kvm_facility(kvm->arch.model.fac_list, 148);
810
}
811
if (test_facility(152)) {
812
set_kvm_facility(kvm->arch.model.fac_mask, 152);
813
set_kvm_facility(kvm->arch.model.fac_list, 152);
814
}
815
if (test_facility(192)) {
816
set_kvm_facility(kvm->arch.model.fac_mask, 192);
817
set_kvm_facility(kvm->arch.model.fac_list, 192);
818
}
819
if (test_facility(198)) {
820
set_kvm_facility(kvm->arch.model.fac_mask, 198);
821
set_kvm_facility(kvm->arch.model.fac_list, 198);
822
}
823
if (test_facility(199)) {
824
set_kvm_facility(kvm->arch.model.fac_mask, 199);
825
set_kvm_facility(kvm->arch.model.fac_list, 199);
826
}
827
r = 0;
828
} else
829
r = -EINVAL;
830
mutex_unlock(&kvm->lock);
831
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
832
r ? "(not available)" : "(success)");
833
break;
834
case KVM_CAP_S390_RI:
835
r = -EINVAL;
836
mutex_lock(&kvm->lock);
837
if (kvm->created_vcpus) {
838
r = -EBUSY;
839
} else if (test_facility(64)) {
840
set_kvm_facility(kvm->arch.model.fac_mask, 64);
841
set_kvm_facility(kvm->arch.model.fac_list, 64);
842
r = 0;
843
}
844
mutex_unlock(&kvm->lock);
845
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
846
r ? "(not available)" : "(success)");
847
break;
848
case KVM_CAP_S390_AIS:
849
mutex_lock(&kvm->lock);
850
if (kvm->created_vcpus) {
851
r = -EBUSY;
852
} else {
853
set_kvm_facility(kvm->arch.model.fac_mask, 72);
854
set_kvm_facility(kvm->arch.model.fac_list, 72);
855
r = 0;
856
}
857
mutex_unlock(&kvm->lock);
858
VM_EVENT(kvm, 3, "ENABLE: AIS %s",
859
r ? "(not available)" : "(success)");
860
break;
861
case KVM_CAP_S390_GS:
862
r = -EINVAL;
863
mutex_lock(&kvm->lock);
864
if (kvm->created_vcpus) {
865
r = -EBUSY;
866
} else if (test_facility(133)) {
867
set_kvm_facility(kvm->arch.model.fac_mask, 133);
868
set_kvm_facility(kvm->arch.model.fac_list, 133);
869
r = 0;
870
}
871
mutex_unlock(&kvm->lock);
872
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
873
r ? "(not available)" : "(success)");
874
break;
875
case KVM_CAP_S390_HPAGE_1M:
876
mutex_lock(&kvm->lock);
877
if (kvm->created_vcpus)
878
r = -EBUSY;
879
else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
880
r = -EINVAL;
881
else {
882
r = 0;
883
mmap_write_lock(kvm->mm);
884
kvm->mm->context.allow_gmap_hpage_1m = 1;
885
mmap_write_unlock(kvm->mm);
886
/*
887
* We might have to create fake 4k page
888
* tables. To avoid that the hardware works on
889
* stale PGSTEs, we emulate these instructions.
890
*/
891
kvm->arch.use_skf = 0;
892
kvm->arch.use_pfmfi = 0;
893
}
894
mutex_unlock(&kvm->lock);
895
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
896
r ? "(not available)" : "(success)");
897
break;
898
case KVM_CAP_S390_USER_STSI:
899
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
900
kvm->arch.user_stsi = 1;
901
r = 0;
902
break;
903
case KVM_CAP_S390_USER_INSTR0:
904
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
905
kvm->arch.user_instr0 = 1;
906
icpt_operexc_on_all_vcpus(kvm);
907
r = 0;
908
break;
909
case KVM_CAP_S390_CPU_TOPOLOGY:
910
r = -EINVAL;
911
mutex_lock(&kvm->lock);
912
if (kvm->created_vcpus) {
913
r = -EBUSY;
914
} else if (test_facility(11)) {
915
set_kvm_facility(kvm->arch.model.fac_mask, 11);
916
set_kvm_facility(kvm->arch.model.fac_list, 11);
917
r = 0;
918
}
919
mutex_unlock(&kvm->lock);
920
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
921
r ? "(not available)" : "(success)");
922
break;
923
default:
924
r = -EINVAL;
925
break;
926
}
927
return r;
928
}
929
930
static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
931
{
932
int ret;
933
934
switch (attr->attr) {
935
case KVM_S390_VM_MEM_LIMIT_SIZE:
936
ret = 0;
937
VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
938
kvm->arch.mem_limit);
939
if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
940
ret = -EFAULT;
941
break;
942
default:
943
ret = -ENXIO;
944
break;
945
}
946
return ret;
947
}
948
949
static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
950
{
951
int ret;
952
unsigned int idx;
953
switch (attr->attr) {
954
case KVM_S390_VM_MEM_ENABLE_CMMA:
955
ret = -ENXIO;
956
if (!sclp.has_cmma)
957
break;
958
959
VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
960
mutex_lock(&kvm->lock);
961
if (kvm->created_vcpus)
962
ret = -EBUSY;
963
else if (kvm->mm->context.allow_gmap_hpage_1m)
964
ret = -EINVAL;
965
else {
966
kvm->arch.use_cmma = 1;
967
/* Not compatible with cmma. */
968
kvm->arch.use_pfmfi = 0;
969
ret = 0;
970
}
971
mutex_unlock(&kvm->lock);
972
break;
973
case KVM_S390_VM_MEM_CLR_CMMA:
974
ret = -ENXIO;
975
if (!sclp.has_cmma)
976
break;
977
ret = -EINVAL;
978
if (!kvm->arch.use_cmma)
979
break;
980
981
VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
982
mutex_lock(&kvm->lock);
983
idx = srcu_read_lock(&kvm->srcu);
984
s390_reset_cmma(kvm->arch.gmap->mm);
985
srcu_read_unlock(&kvm->srcu, idx);
986
mutex_unlock(&kvm->lock);
987
ret = 0;
988
break;
989
case KVM_S390_VM_MEM_LIMIT_SIZE: {
990
unsigned long new_limit;
991
992
if (kvm_is_ucontrol(kvm))
993
return -EINVAL;
994
995
if (get_user(new_limit, (u64 __user *)attr->addr))
996
return -EFAULT;
997
998
if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
999
new_limit > kvm->arch.mem_limit)
1000
return -E2BIG;
1001
1002
if (!new_limit)
1003
return -EINVAL;
1004
1005
/* gmap_create takes last usable address */
1006
if (new_limit != KVM_S390_NO_MEM_LIMIT)
1007
new_limit -= 1;
1008
1009
ret = -EBUSY;
1010
mutex_lock(&kvm->lock);
1011
if (!kvm->created_vcpus) {
1012
/* gmap_create will round the limit up */
1013
struct gmap *new = gmap_create(current->mm, new_limit);
1014
1015
if (!new) {
1016
ret = -ENOMEM;
1017
} else {
1018
gmap_remove(kvm->arch.gmap);
1019
new->private = kvm;
1020
kvm->arch.gmap = new;
1021
ret = 0;
1022
}
1023
}
1024
mutex_unlock(&kvm->lock);
1025
VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
1026
VM_EVENT(kvm, 3, "New guest asce: 0x%p",
1027
(void *) kvm->arch.gmap->asce);
1028
break;
1029
}
1030
default:
1031
ret = -ENXIO;
1032
break;
1033
}
1034
return ret;
1035
}
1036
1037
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
1038
1039
void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
1040
{
1041
struct kvm_vcpu *vcpu;
1042
unsigned long i;
1043
1044
kvm_s390_vcpu_block_all(kvm);
1045
1046
kvm_for_each_vcpu(i, vcpu, kvm) {
1047
kvm_s390_vcpu_crypto_setup(vcpu);
1048
/* recreate the shadow crycb by leaving the VSIE handler */
1049
kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1050
}
1051
1052
kvm_s390_vcpu_unblock_all(kvm);
1053
}
1054
1055
static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
1056
{
1057
mutex_lock(&kvm->lock);
1058
switch (attr->attr) {
1059
case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1060
if (!test_kvm_facility(kvm, 76)) {
1061
mutex_unlock(&kvm->lock);
1062
return -EINVAL;
1063
}
1064
get_random_bytes(
1065
kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1066
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1067
kvm->arch.crypto.aes_kw = 1;
1068
VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
1069
break;
1070
case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1071
if (!test_kvm_facility(kvm, 76)) {
1072
mutex_unlock(&kvm->lock);
1073
return -EINVAL;
1074
}
1075
get_random_bytes(
1076
kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1077
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1078
kvm->arch.crypto.dea_kw = 1;
1079
VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
1080
break;
1081
case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1082
if (!test_kvm_facility(kvm, 76)) {
1083
mutex_unlock(&kvm->lock);
1084
return -EINVAL;
1085
}
1086
kvm->arch.crypto.aes_kw = 0;
1087
memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
1088
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1089
VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
1090
break;
1091
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1092
if (!test_kvm_facility(kvm, 76)) {
1093
mutex_unlock(&kvm->lock);
1094
return -EINVAL;
1095
}
1096
kvm->arch.crypto.dea_kw = 0;
1097
memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
1098
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1099
VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1100
break;
1101
case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1102
if (!ap_instructions_available()) {
1103
mutex_unlock(&kvm->lock);
1104
return -EOPNOTSUPP;
1105
}
1106
kvm->arch.crypto.apie = 1;
1107
break;
1108
case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1109
if (!ap_instructions_available()) {
1110
mutex_unlock(&kvm->lock);
1111
return -EOPNOTSUPP;
1112
}
1113
kvm->arch.crypto.apie = 0;
1114
break;
1115
default:
1116
mutex_unlock(&kvm->lock);
1117
return -ENXIO;
1118
}
1119
1120
kvm_s390_vcpu_crypto_reset_all(kvm);
1121
mutex_unlock(&kvm->lock);
1122
return 0;
1123
}
1124
1125
static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
1126
{
1127
/* Only set the ECB bits after guest requests zPCI interpretation */
1128
if (!vcpu->kvm->arch.use_zpci_interp)
1129
return;
1130
1131
vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
1132
vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
1133
}
1134
1135
void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
1136
{
1137
struct kvm_vcpu *vcpu;
1138
unsigned long i;
1139
1140
lockdep_assert_held(&kvm->lock);
1141
1142
if (!kvm_s390_pci_interp_allowed())
1143
return;
1144
1145
/*
1146
* If host is configured for PCI and the necessary facilities are
1147
* available, turn on interpretation for the life of this guest
1148
*/
1149
kvm->arch.use_zpci_interp = 1;
1150
1151
kvm_s390_vcpu_block_all(kvm);
1152
1153
kvm_for_each_vcpu(i, vcpu, kvm) {
1154
kvm_s390_vcpu_pci_setup(vcpu);
1155
kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1156
}
1157
1158
kvm_s390_vcpu_unblock_all(kvm);
1159
}
1160
1161
static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1162
{
1163
unsigned long cx;
1164
struct kvm_vcpu *vcpu;
1165
1166
kvm_for_each_vcpu(cx, vcpu, kvm)
1167
kvm_s390_sync_request(req, vcpu);
1168
}
1169
1170
/*
1171
* Must be called with kvm->srcu held to avoid races on memslots, and with
1172
* kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1173
*/
1174
static int kvm_s390_vm_start_migration(struct kvm *kvm)
1175
{
1176
struct kvm_memory_slot *ms;
1177
struct kvm_memslots *slots;
1178
unsigned long ram_pages = 0;
1179
int bkt;
1180
1181
/* migration mode already enabled */
1182
if (kvm->arch.migration_mode)
1183
return 0;
1184
slots = kvm_memslots(kvm);
1185
if (!slots || kvm_memslots_empty(slots))
1186
return -EINVAL;
1187
1188
if (!kvm->arch.use_cmma) {
1189
kvm->arch.migration_mode = 1;
1190
return 0;
1191
}
1192
/* mark all the pages in active slots as dirty */
1193
kvm_for_each_memslot(ms, bkt, slots) {
1194
if (!ms->dirty_bitmap)
1195
return -EINVAL;
1196
/*
1197
* The second half of the bitmap is only used on x86,
1198
* and would be wasted otherwise, so we put it to good
1199
* use here to keep track of the state of the storage
1200
* attributes.
1201
*/
1202
memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1203
ram_pages += ms->npages;
1204
}
1205
atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1206
kvm->arch.migration_mode = 1;
1207
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1208
return 0;
1209
}
1210
1211
/*
1212
* Must be called with kvm->slots_lock to avoid races with ourselves and
1213
* kvm_s390_vm_start_migration.
1214
*/
1215
static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1216
{
1217
/* migration mode already disabled */
1218
if (!kvm->arch.migration_mode)
1219
return 0;
1220
kvm->arch.migration_mode = 0;
1221
if (kvm->arch.use_cmma)
1222
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1223
return 0;
1224
}
1225
1226
static int kvm_s390_vm_set_migration(struct kvm *kvm,
1227
struct kvm_device_attr *attr)
1228
{
1229
int res = -ENXIO;
1230
1231
mutex_lock(&kvm->slots_lock);
1232
switch (attr->attr) {
1233
case KVM_S390_VM_MIGRATION_START:
1234
res = kvm_s390_vm_start_migration(kvm);
1235
break;
1236
case KVM_S390_VM_MIGRATION_STOP:
1237
res = kvm_s390_vm_stop_migration(kvm);
1238
break;
1239
default:
1240
break;
1241
}
1242
mutex_unlock(&kvm->slots_lock);
1243
1244
return res;
1245
}
1246
1247
static int kvm_s390_vm_get_migration(struct kvm *kvm,
1248
struct kvm_device_attr *attr)
1249
{
1250
u64 mig = kvm->arch.migration_mode;
1251
1252
if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1253
return -ENXIO;
1254
1255
if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1256
return -EFAULT;
1257
return 0;
1258
}
1259
1260
static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1261
1262
static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1263
{
1264
struct kvm_s390_vm_tod_clock gtod;
1265
1266
if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1267
return -EFAULT;
1268
1269
if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1270
return -EINVAL;
1271
__kvm_s390_set_tod_clock(kvm, &gtod);
1272
1273
VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1274
gtod.epoch_idx, gtod.tod);
1275
1276
return 0;
1277
}
1278
1279
static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1280
{
1281
u8 gtod_high;
1282
1283
if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1284
sizeof(gtod_high)))
1285
return -EFAULT;
1286
1287
if (gtod_high != 0)
1288
return -EINVAL;
1289
VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1290
1291
return 0;
1292
}
1293
1294
static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1295
{
1296
struct kvm_s390_vm_tod_clock gtod = { 0 };
1297
1298
if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1299
sizeof(gtod.tod)))
1300
return -EFAULT;
1301
1302
__kvm_s390_set_tod_clock(kvm, &gtod);
1303
VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1304
return 0;
1305
}
1306
1307
static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1308
{
1309
int ret;
1310
1311
if (attr->flags)
1312
return -EINVAL;
1313
1314
mutex_lock(&kvm->lock);
1315
/*
1316
* For protected guests, the TOD is managed by the ultravisor, so trying
1317
* to change it will never bring the expected results.
1318
*/
1319
if (kvm_s390_pv_is_protected(kvm)) {
1320
ret = -EOPNOTSUPP;
1321
goto out_unlock;
1322
}
1323
1324
switch (attr->attr) {
1325
case KVM_S390_VM_TOD_EXT:
1326
ret = kvm_s390_set_tod_ext(kvm, attr);
1327
break;
1328
case KVM_S390_VM_TOD_HIGH:
1329
ret = kvm_s390_set_tod_high(kvm, attr);
1330
break;
1331
case KVM_S390_VM_TOD_LOW:
1332
ret = kvm_s390_set_tod_low(kvm, attr);
1333
break;
1334
default:
1335
ret = -ENXIO;
1336
break;
1337
}
1338
1339
out_unlock:
1340
mutex_unlock(&kvm->lock);
1341
return ret;
1342
}
1343
1344
static void kvm_s390_get_tod_clock(struct kvm *kvm,
1345
struct kvm_s390_vm_tod_clock *gtod)
1346
{
1347
union tod_clock clk;
1348
1349
preempt_disable();
1350
1351
store_tod_clock_ext(&clk);
1352
1353
gtod->tod = clk.tod + kvm->arch.epoch;
1354
gtod->epoch_idx = 0;
1355
if (test_kvm_facility(kvm, 139)) {
1356
gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1357
if (gtod->tod < clk.tod)
1358
gtod->epoch_idx += 1;
1359
}
1360
1361
preempt_enable();
1362
}
1363
1364
static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1365
{
1366
struct kvm_s390_vm_tod_clock gtod;
1367
1368
memset(&gtod, 0, sizeof(gtod));
1369
kvm_s390_get_tod_clock(kvm, &gtod);
1370
if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1371
return -EFAULT;
1372
1373
VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1374
gtod.epoch_idx, gtod.tod);
1375
return 0;
1376
}
1377
1378
static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1379
{
1380
u8 gtod_high = 0;
1381
1382
if (copy_to_user((void __user *)attr->addr, &gtod_high,
1383
sizeof(gtod_high)))
1384
return -EFAULT;
1385
VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1386
1387
return 0;
1388
}
1389
1390
static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1391
{
1392
u64 gtod;
1393
1394
gtod = kvm_s390_get_tod_clock_fast(kvm);
1395
if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1396
return -EFAULT;
1397
VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1398
1399
return 0;
1400
}
1401
1402
static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1403
{
1404
int ret;
1405
1406
if (attr->flags)
1407
return -EINVAL;
1408
1409
switch (attr->attr) {
1410
case KVM_S390_VM_TOD_EXT:
1411
ret = kvm_s390_get_tod_ext(kvm, attr);
1412
break;
1413
case KVM_S390_VM_TOD_HIGH:
1414
ret = kvm_s390_get_tod_high(kvm, attr);
1415
break;
1416
case KVM_S390_VM_TOD_LOW:
1417
ret = kvm_s390_get_tod_low(kvm, attr);
1418
break;
1419
default:
1420
ret = -ENXIO;
1421
break;
1422
}
1423
return ret;
1424
}
1425
1426
static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1427
{
1428
struct kvm_s390_vm_cpu_processor *proc;
1429
u16 lowest_ibc, unblocked_ibc;
1430
int ret = 0;
1431
1432
mutex_lock(&kvm->lock);
1433
if (kvm->created_vcpus) {
1434
ret = -EBUSY;
1435
goto out;
1436
}
1437
proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1438
if (!proc) {
1439
ret = -ENOMEM;
1440
goto out;
1441
}
1442
if (!copy_from_user(proc, (void __user *)attr->addr,
1443
sizeof(*proc))) {
1444
kvm->arch.model.cpuid = proc->cpuid;
1445
lowest_ibc = sclp.ibc >> 16 & 0xfff;
1446
unblocked_ibc = sclp.ibc & 0xfff;
1447
if (lowest_ibc && proc->ibc) {
1448
if (proc->ibc > unblocked_ibc)
1449
kvm->arch.model.ibc = unblocked_ibc;
1450
else if (proc->ibc < lowest_ibc)
1451
kvm->arch.model.ibc = lowest_ibc;
1452
else
1453
kvm->arch.model.ibc = proc->ibc;
1454
}
1455
memcpy(kvm->arch.model.fac_list, proc->fac_list,
1456
S390_ARCH_FAC_LIST_SIZE_BYTE);
1457
VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1458
kvm->arch.model.ibc,
1459
kvm->arch.model.cpuid);
1460
VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1461
kvm->arch.model.fac_list[0],
1462
kvm->arch.model.fac_list[1],
1463
kvm->arch.model.fac_list[2]);
1464
} else
1465
ret = -EFAULT;
1466
kfree(proc);
1467
out:
1468
mutex_unlock(&kvm->lock);
1469
return ret;
1470
}
1471
1472
static int kvm_s390_set_processor_feat(struct kvm *kvm,
1473
struct kvm_device_attr *attr)
1474
{
1475
struct kvm_s390_vm_cpu_feat data;
1476
1477
if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1478
return -EFAULT;
1479
if (!bitmap_subset((unsigned long *) data.feat,
1480
kvm_s390_available_cpu_feat,
1481
KVM_S390_VM_CPU_FEAT_NR_BITS))
1482
return -EINVAL;
1483
1484
mutex_lock(&kvm->lock);
1485
if (kvm->created_vcpus) {
1486
mutex_unlock(&kvm->lock);
1487
return -EBUSY;
1488
}
1489
bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1490
mutex_unlock(&kvm->lock);
1491
VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1492
data.feat[0],
1493
data.feat[1],
1494
data.feat[2]);
1495
return 0;
1496
}
1497
1498
static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1499
struct kvm_device_attr *attr)
1500
{
1501
mutex_lock(&kvm->lock);
1502
if (kvm->created_vcpus) {
1503
mutex_unlock(&kvm->lock);
1504
return -EBUSY;
1505
}
1506
1507
if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1508
sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1509
mutex_unlock(&kvm->lock);
1510
return -EFAULT;
1511
}
1512
mutex_unlock(&kvm->lock);
1513
1514
VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1515
((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1516
((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1517
((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1518
((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1519
VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1520
((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1521
((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1522
VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1523
((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1524
((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1525
VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1526
((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1527
((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1528
VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1529
((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1530
((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1531
VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1532
((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1533
((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1534
VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1535
((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1536
((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1537
VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1538
((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1539
((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1540
VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1541
((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1542
((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1543
VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1544
((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1545
((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1546
VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1547
((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1548
((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1549
VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1550
((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1551
((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1552
VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1553
((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1554
((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1555
VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1556
((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1557
((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1558
VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1559
((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1560
((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1561
VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1562
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1563
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1564
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1565
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1566
VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1567
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1568
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1569
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1570
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1571
VM_EVENT(kvm, 3, "GET: guest PFCR subfunc 0x%16.16lx.%16.16lx",
1572
((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1573
((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1574
1575
return 0;
1576
}
1577
1578
#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK \
1579
( \
1580
((struct kvm_s390_vm_cpu_uv_feat){ \
1581
.ap = 1, \
1582
.ap_intr = 1, \
1583
}) \
1584
.feat \
1585
)
1586
1587
static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1588
{
1589
struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
1590
unsigned long data, filter;
1591
1592
filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
1593
if (get_user(data, &ptr->feat))
1594
return -EFAULT;
1595
if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
1596
return -EINVAL;
1597
1598
mutex_lock(&kvm->lock);
1599
if (kvm->created_vcpus) {
1600
mutex_unlock(&kvm->lock);
1601
return -EBUSY;
1602
}
1603
kvm->arch.model.uv_feat_guest.feat = data;
1604
mutex_unlock(&kvm->lock);
1605
1606
VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
1607
1608
return 0;
1609
}
1610
1611
static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1612
{
1613
int ret = -ENXIO;
1614
1615
switch (attr->attr) {
1616
case KVM_S390_VM_CPU_PROCESSOR:
1617
ret = kvm_s390_set_processor(kvm, attr);
1618
break;
1619
case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1620
ret = kvm_s390_set_processor_feat(kvm, attr);
1621
break;
1622
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1623
ret = kvm_s390_set_processor_subfunc(kvm, attr);
1624
break;
1625
case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
1626
ret = kvm_s390_set_uv_feat(kvm, attr);
1627
break;
1628
}
1629
return ret;
1630
}
1631
1632
static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1633
{
1634
struct kvm_s390_vm_cpu_processor *proc;
1635
int ret = 0;
1636
1637
proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1638
if (!proc) {
1639
ret = -ENOMEM;
1640
goto out;
1641
}
1642
proc->cpuid = kvm->arch.model.cpuid;
1643
proc->ibc = kvm->arch.model.ibc;
1644
memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1645
S390_ARCH_FAC_LIST_SIZE_BYTE);
1646
VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1647
kvm->arch.model.ibc,
1648
kvm->arch.model.cpuid);
1649
VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1650
kvm->arch.model.fac_list[0],
1651
kvm->arch.model.fac_list[1],
1652
kvm->arch.model.fac_list[2]);
1653
if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1654
ret = -EFAULT;
1655
kfree(proc);
1656
out:
1657
return ret;
1658
}
1659
1660
static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1661
{
1662
struct kvm_s390_vm_cpu_machine *mach;
1663
int ret = 0;
1664
1665
mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1666
if (!mach) {
1667
ret = -ENOMEM;
1668
goto out;
1669
}
1670
get_cpu_id((struct cpuid *) &mach->cpuid);
1671
mach->ibc = sclp.ibc;
1672
memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1673
S390_ARCH_FAC_LIST_SIZE_BYTE);
1674
memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1675
sizeof(stfle_fac_list));
1676
VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1677
kvm->arch.model.ibc,
1678
kvm->arch.model.cpuid);
1679
VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1680
mach->fac_mask[0],
1681
mach->fac_mask[1],
1682
mach->fac_mask[2]);
1683
VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1684
mach->fac_list[0],
1685
mach->fac_list[1],
1686
mach->fac_list[2]);
1687
if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1688
ret = -EFAULT;
1689
kfree(mach);
1690
out:
1691
return ret;
1692
}
1693
1694
static int kvm_s390_get_processor_feat(struct kvm *kvm,
1695
struct kvm_device_attr *attr)
1696
{
1697
struct kvm_s390_vm_cpu_feat data;
1698
1699
bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1700
if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1701
return -EFAULT;
1702
VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1703
data.feat[0],
1704
data.feat[1],
1705
data.feat[2]);
1706
return 0;
1707
}
1708
1709
static int kvm_s390_get_machine_feat(struct kvm *kvm,
1710
struct kvm_device_attr *attr)
1711
{
1712
struct kvm_s390_vm_cpu_feat data;
1713
1714
bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1715
if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1716
return -EFAULT;
1717
VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1718
data.feat[0],
1719
data.feat[1],
1720
data.feat[2]);
1721
return 0;
1722
}
1723
1724
static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1725
struct kvm_device_attr *attr)
1726
{
1727
if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1728
sizeof(struct kvm_s390_vm_cpu_subfunc)))
1729
return -EFAULT;
1730
1731
VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1732
((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1733
((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1734
((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1735
((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1736
VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1737
((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1738
((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1739
VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1740
((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1741
((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1742
VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1743
((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1744
((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1745
VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1746
((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1747
((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1748
VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1749
((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1750
((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1751
VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1752
((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1753
((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1754
VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1755
((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1756
((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1757
VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1758
((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1759
((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1760
VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1761
((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1762
((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1763
VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1764
((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1765
((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1766
VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1767
((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1768
((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1769
VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1770
((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1771
((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1772
VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1773
((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1774
((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1775
VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1776
((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1777
((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1778
VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1779
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1780
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1781
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1782
((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1783
VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1784
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1785
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1786
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1787
((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1788
VM_EVENT(kvm, 3, "GET: guest PFCR subfunc 0x%16.16lx.%16.16lx",
1789
((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1790
((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1791
1792
return 0;
1793
}
1794
1795
static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1796
struct kvm_device_attr *attr)
1797
{
1798
if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1799
sizeof(struct kvm_s390_vm_cpu_subfunc)))
1800
return -EFAULT;
1801
1802
VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1803
((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1804
((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1805
((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1806
((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1807
VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1808
((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1809
((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1810
VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1811
((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1812
((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1813
VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1814
((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1815
((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1816
VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1817
((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1818
((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1819
VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1820
((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1821
((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1822
VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1823
((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1824
((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1825
VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1826
((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1827
((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1828
VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1829
((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1830
((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1831
VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1832
((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1833
((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1834
VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1835
((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1836
((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1837
VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1838
((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1839
((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1840
VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1841
((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1842
((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1843
VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1844
((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1845
((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1846
VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1847
((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1848
((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1849
VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1850
((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1851
((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1852
((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1853
((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1854
VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1855
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1856
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1857
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1858
((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1859
VM_EVENT(kvm, 3, "GET: host PFCR subfunc 0x%16.16lx.%16.16lx",
1860
((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1861
((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1862
1863
return 0;
1864
}
1865
1866
static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1867
{
1868
struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
1869
unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
1870
1871
if (put_user(feat, &dst->feat))
1872
return -EFAULT;
1873
VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
1874
1875
return 0;
1876
}
1877
1878
static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1879
{
1880
struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
1881
unsigned long feat;
1882
1883
BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
1884
1885
feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
1886
if (put_user(feat, &dst->feat))
1887
return -EFAULT;
1888
VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
1889
1890
return 0;
1891
}
1892
1893
static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1894
{
1895
int ret = -ENXIO;
1896
1897
switch (attr->attr) {
1898
case KVM_S390_VM_CPU_PROCESSOR:
1899
ret = kvm_s390_get_processor(kvm, attr);
1900
break;
1901
case KVM_S390_VM_CPU_MACHINE:
1902
ret = kvm_s390_get_machine(kvm, attr);
1903
break;
1904
case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1905
ret = kvm_s390_get_processor_feat(kvm, attr);
1906
break;
1907
case KVM_S390_VM_CPU_MACHINE_FEAT:
1908
ret = kvm_s390_get_machine_feat(kvm, attr);
1909
break;
1910
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1911
ret = kvm_s390_get_processor_subfunc(kvm, attr);
1912
break;
1913
case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1914
ret = kvm_s390_get_machine_subfunc(kvm, attr);
1915
break;
1916
case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
1917
ret = kvm_s390_get_processor_uv_feat(kvm, attr);
1918
break;
1919
case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
1920
ret = kvm_s390_get_machine_uv_feat(kvm, attr);
1921
break;
1922
}
1923
return ret;
1924
}
1925
1926
/**
1927
* kvm_s390_update_topology_change_report - update CPU topology change report
1928
* @kvm: guest KVM description
1929
* @val: set or clear the MTCR bit
1930
*
1931
* Updates the Multiprocessor Topology-Change-Report bit to signal
1932
* the guest with a topology change.
1933
* This is only relevant if the topology facility is present.
1934
*
1935
* The SCA version, bsca or esca, doesn't matter as offset is the same.
1936
*/
1937
static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
1938
{
1939
union sca_utility new, old;
1940
struct bsca_block *sca;
1941
1942
read_lock(&kvm->arch.sca_lock);
1943
sca = kvm->arch.sca;
1944
old = READ_ONCE(sca->utility);
1945
do {
1946
new = old;
1947
new.mtcr = val;
1948
} while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
1949
read_unlock(&kvm->arch.sca_lock);
1950
}
1951
1952
static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
1953
struct kvm_device_attr *attr)
1954
{
1955
if (!test_kvm_facility(kvm, 11))
1956
return -ENXIO;
1957
1958
kvm_s390_update_topology_change_report(kvm, !!attr->attr);
1959
return 0;
1960
}
1961
1962
static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
1963
struct kvm_device_attr *attr)
1964
{
1965
u8 topo;
1966
1967
if (!test_kvm_facility(kvm, 11))
1968
return -ENXIO;
1969
1970
read_lock(&kvm->arch.sca_lock);
1971
topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
1972
read_unlock(&kvm->arch.sca_lock);
1973
1974
return put_user(topo, (u8 __user *)attr->addr);
1975
}
1976
1977
static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1978
{
1979
int ret;
1980
1981
switch (attr->group) {
1982
case KVM_S390_VM_MEM_CTRL:
1983
ret = kvm_s390_set_mem_control(kvm, attr);
1984
break;
1985
case KVM_S390_VM_TOD:
1986
ret = kvm_s390_set_tod(kvm, attr);
1987
break;
1988
case KVM_S390_VM_CPU_MODEL:
1989
ret = kvm_s390_set_cpu_model(kvm, attr);
1990
break;
1991
case KVM_S390_VM_CRYPTO:
1992
ret = kvm_s390_vm_set_crypto(kvm, attr);
1993
break;
1994
case KVM_S390_VM_MIGRATION:
1995
ret = kvm_s390_vm_set_migration(kvm, attr);
1996
break;
1997
case KVM_S390_VM_CPU_TOPOLOGY:
1998
ret = kvm_s390_set_topo_change_indication(kvm, attr);
1999
break;
2000
default:
2001
ret = -ENXIO;
2002
break;
2003
}
2004
2005
return ret;
2006
}
2007
2008
static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
2009
{
2010
int ret;
2011
2012
switch (attr->group) {
2013
case KVM_S390_VM_MEM_CTRL:
2014
ret = kvm_s390_get_mem_control(kvm, attr);
2015
break;
2016
case KVM_S390_VM_TOD:
2017
ret = kvm_s390_get_tod(kvm, attr);
2018
break;
2019
case KVM_S390_VM_CPU_MODEL:
2020
ret = kvm_s390_get_cpu_model(kvm, attr);
2021
break;
2022
case KVM_S390_VM_MIGRATION:
2023
ret = kvm_s390_vm_get_migration(kvm, attr);
2024
break;
2025
case KVM_S390_VM_CPU_TOPOLOGY:
2026
ret = kvm_s390_get_topo_change_indication(kvm, attr);
2027
break;
2028
default:
2029
ret = -ENXIO;
2030
break;
2031
}
2032
2033
return ret;
2034
}
2035
2036
static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
2037
{
2038
int ret;
2039
2040
switch (attr->group) {
2041
case KVM_S390_VM_MEM_CTRL:
2042
switch (attr->attr) {
2043
case KVM_S390_VM_MEM_ENABLE_CMMA:
2044
case KVM_S390_VM_MEM_CLR_CMMA:
2045
ret = sclp.has_cmma ? 0 : -ENXIO;
2046
break;
2047
case KVM_S390_VM_MEM_LIMIT_SIZE:
2048
ret = 0;
2049
break;
2050
default:
2051
ret = -ENXIO;
2052
break;
2053
}
2054
break;
2055
case KVM_S390_VM_TOD:
2056
switch (attr->attr) {
2057
case KVM_S390_VM_TOD_LOW:
2058
case KVM_S390_VM_TOD_HIGH:
2059
ret = 0;
2060
break;
2061
default:
2062
ret = -ENXIO;
2063
break;
2064
}
2065
break;
2066
case KVM_S390_VM_CPU_MODEL:
2067
switch (attr->attr) {
2068
case KVM_S390_VM_CPU_PROCESSOR:
2069
case KVM_S390_VM_CPU_MACHINE:
2070
case KVM_S390_VM_CPU_PROCESSOR_FEAT:
2071
case KVM_S390_VM_CPU_MACHINE_FEAT:
2072
case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
2073
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
2074
case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
2075
case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
2076
ret = 0;
2077
break;
2078
default:
2079
ret = -ENXIO;
2080
break;
2081
}
2082
break;
2083
case KVM_S390_VM_CRYPTO:
2084
switch (attr->attr) {
2085
case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
2086
case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
2087
case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
2088
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
2089
ret = 0;
2090
break;
2091
case KVM_S390_VM_CRYPTO_ENABLE_APIE:
2092
case KVM_S390_VM_CRYPTO_DISABLE_APIE:
2093
ret = ap_instructions_available() ? 0 : -ENXIO;
2094
break;
2095
default:
2096
ret = -ENXIO;
2097
break;
2098
}
2099
break;
2100
case KVM_S390_VM_MIGRATION:
2101
ret = 0;
2102
break;
2103
case KVM_S390_VM_CPU_TOPOLOGY:
2104
ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
2105
break;
2106
default:
2107
ret = -ENXIO;
2108
break;
2109
}
2110
2111
return ret;
2112
}
2113
2114
static int kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2115
{
2116
uint8_t *keys;
2117
uint64_t hva;
2118
int srcu_idx, i, r = 0;
2119
2120
if (args->flags != 0)
2121
return -EINVAL;
2122
2123
/* Is this guest using storage keys? */
2124
if (!mm_uses_skeys(current->mm))
2125
return KVM_S390_GET_SKEYS_NONE;
2126
2127
/* Enforce sane limit on memory allocation */
2128
if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2129
return -EINVAL;
2130
2131
keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2132
if (!keys)
2133
return -ENOMEM;
2134
2135
mmap_read_lock(current->mm);
2136
srcu_idx = srcu_read_lock(&kvm->srcu);
2137
for (i = 0; i < args->count; i++) {
2138
hva = gfn_to_hva(kvm, args->start_gfn + i);
2139
if (kvm_is_error_hva(hva)) {
2140
r = -EFAULT;
2141
break;
2142
}
2143
2144
r = get_guest_storage_key(current->mm, hva, &keys[i]);
2145
if (r)
2146
break;
2147
}
2148
srcu_read_unlock(&kvm->srcu, srcu_idx);
2149
mmap_read_unlock(current->mm);
2150
2151
if (!r) {
2152
r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
2153
sizeof(uint8_t) * args->count);
2154
if (r)
2155
r = -EFAULT;
2156
}
2157
2158
kvfree(keys);
2159
return r;
2160
}
2161
2162
static int kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2163
{
2164
uint8_t *keys;
2165
uint64_t hva;
2166
int srcu_idx, i, r = 0;
2167
bool unlocked;
2168
2169
if (args->flags != 0)
2170
return -EINVAL;
2171
2172
/* Enforce sane limit on memory allocation */
2173
if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2174
return -EINVAL;
2175
2176
keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2177
if (!keys)
2178
return -ENOMEM;
2179
2180
r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
2181
sizeof(uint8_t) * args->count);
2182
if (r) {
2183
r = -EFAULT;
2184
goto out;
2185
}
2186
2187
/* Enable storage key handling for the guest */
2188
r = s390_enable_skey();
2189
if (r)
2190
goto out;
2191
2192
i = 0;
2193
mmap_read_lock(current->mm);
2194
srcu_idx = srcu_read_lock(&kvm->srcu);
2195
while (i < args->count) {
2196
unlocked = false;
2197
hva = gfn_to_hva(kvm, args->start_gfn + i);
2198
if (kvm_is_error_hva(hva)) {
2199
r = -EFAULT;
2200
break;
2201
}
2202
2203
/* Lowest order bit is reserved */
2204
if (keys[i] & 0x01) {
2205
r = -EINVAL;
2206
break;
2207
}
2208
2209
r = set_guest_storage_key(current->mm, hva, keys[i], 0);
2210
if (r) {
2211
r = fixup_user_fault(current->mm, hva,
2212
FAULT_FLAG_WRITE, &unlocked);
2213
if (r)
2214
break;
2215
}
2216
if (!r)
2217
i++;
2218
}
2219
srcu_read_unlock(&kvm->srcu, srcu_idx);
2220
mmap_read_unlock(current->mm);
2221
out:
2222
kvfree(keys);
2223
return r;
2224
}
2225
2226
/*
2227
* Base address and length must be sent at the start of each block, therefore
2228
* it's cheaper to send some clean data, as long as it's less than the size of
2229
* two longs.
2230
*/
2231
#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
2232
/* for consistency */
2233
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
2234
2235
static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2236
u8 *res, unsigned long bufsize)
2237
{
2238
unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2239
2240
args->count = 0;
2241
while (args->count < bufsize) {
2242
hva = gfn_to_hva(kvm, cur_gfn);
2243
/*
2244
* We return an error if the first value was invalid, but we
2245
* return successfully if at least one value was copied.
2246
*/
2247
if (kvm_is_error_hva(hva))
2248
return args->count ? 0 : -EFAULT;
2249
if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2250
pgstev = 0;
2251
res[args->count++] = (pgstev >> 24) & 0x43;
2252
cur_gfn++;
2253
}
2254
2255
return 0;
2256
}
2257
2258
static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
2259
gfn_t gfn)
2260
{
2261
return ____gfn_to_memslot(slots, gfn, true);
2262
}
2263
2264
static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2265
unsigned long cur_gfn)
2266
{
2267
struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
2268
unsigned long ofs = cur_gfn - ms->base_gfn;
2269
struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
2270
2271
if (ms->base_gfn + ms->npages <= cur_gfn) {
2272
mnode = rb_next(mnode);
2273
/* If we are above the highest slot, wrap around */
2274
if (!mnode)
2275
mnode = rb_first(&slots->gfn_tree);
2276
2277
ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2278
ofs = 0;
2279
}
2280
2281
if (cur_gfn < ms->base_gfn)
2282
ofs = 0;
2283
2284
ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2285
while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
2286
ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2287
ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
2288
}
2289
return ms->base_gfn + ofs;
2290
}
2291
2292
static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2293
u8 *res, unsigned long bufsize)
2294
{
2295
unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2296
struct kvm_memslots *slots = kvm_memslots(kvm);
2297
struct kvm_memory_slot *ms;
2298
2299
if (unlikely(kvm_memslots_empty(slots)))
2300
return 0;
2301
2302
cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2303
ms = gfn_to_memslot(kvm, cur_gfn);
2304
args->count = 0;
2305
args->start_gfn = cur_gfn;
2306
if (!ms)
2307
return 0;
2308
next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2309
mem_end = kvm_s390_get_gfn_end(slots);
2310
2311
while (args->count < bufsize) {
2312
hva = gfn_to_hva(kvm, cur_gfn);
2313
if (kvm_is_error_hva(hva))
2314
return 0;
2315
/* Decrement only if we actually flipped the bit to 0 */
2316
if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2317
atomic64_dec(&kvm->arch.cmma_dirty_pages);
2318
if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2319
pgstev = 0;
2320
/* Save the value */
2321
res[args->count++] = (pgstev >> 24) & 0x43;
2322
/* If the next bit is too far away, stop. */
2323
if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2324
return 0;
2325
/* If we reached the previous "next", find the next one */
2326
if (cur_gfn == next_gfn)
2327
next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2328
/* Reached the end of memory or of the buffer, stop */
2329
if ((next_gfn >= mem_end) ||
2330
(next_gfn - args->start_gfn >= bufsize))
2331
return 0;
2332
cur_gfn++;
2333
/* Reached the end of the current memslot, take the next one. */
2334
if (cur_gfn - ms->base_gfn >= ms->npages) {
2335
ms = gfn_to_memslot(kvm, cur_gfn);
2336
if (!ms)
2337
return 0;
2338
}
2339
}
2340
return 0;
2341
}
2342
2343
/*
2344
* This function searches for the next page with dirty CMMA attributes, and
2345
* saves the attributes in the buffer up to either the end of the buffer or
2346
* until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2347
* no trailing clean bytes are saved.
2348
* In case no dirty bits were found, or if CMMA was not enabled or used, the
2349
* output buffer will indicate 0 as length.
2350
*/
2351
static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2352
struct kvm_s390_cmma_log *args)
2353
{
2354
unsigned long bufsize;
2355
int srcu_idx, peek, ret;
2356
u8 *values;
2357
2358
if (!kvm->arch.use_cmma)
2359
return -ENXIO;
2360
/* Invalid/unsupported flags were specified */
2361
if (args->flags & ~KVM_S390_CMMA_PEEK)
2362
return -EINVAL;
2363
/* Migration mode query, and we are not doing a migration */
2364
peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2365
if (!peek && !kvm->arch.migration_mode)
2366
return -EINVAL;
2367
/* CMMA is disabled or was not used, or the buffer has length zero */
2368
bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2369
if (!bufsize || !kvm->mm->context.uses_cmm) {
2370
memset(args, 0, sizeof(*args));
2371
return 0;
2372
}
2373
/* We are not peeking, and there are no dirty pages */
2374
if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2375
memset(args, 0, sizeof(*args));
2376
return 0;
2377
}
2378
2379
values = vmalloc(bufsize);
2380
if (!values)
2381
return -ENOMEM;
2382
2383
mmap_read_lock(kvm->mm);
2384
srcu_idx = srcu_read_lock(&kvm->srcu);
2385
if (peek)
2386
ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2387
else
2388
ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2389
srcu_read_unlock(&kvm->srcu, srcu_idx);
2390
mmap_read_unlock(kvm->mm);
2391
2392
if (kvm->arch.migration_mode)
2393
args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2394
else
2395
args->remaining = 0;
2396
2397
if (copy_to_user((void __user *)args->values, values, args->count))
2398
ret = -EFAULT;
2399
2400
vfree(values);
2401
return ret;
2402
}
2403
2404
/*
2405
* This function sets the CMMA attributes for the given pages. If the input
2406
* buffer has zero length, no action is taken, otherwise the attributes are
2407
* set and the mm->context.uses_cmm flag is set.
2408
*/
2409
static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2410
const struct kvm_s390_cmma_log *args)
2411
{
2412
unsigned long hva, mask, pgstev, i;
2413
uint8_t *bits;
2414
int srcu_idx, r = 0;
2415
2416
mask = args->mask;
2417
2418
if (!kvm->arch.use_cmma)
2419
return -ENXIO;
2420
/* invalid/unsupported flags */
2421
if (args->flags != 0)
2422
return -EINVAL;
2423
/* Enforce sane limit on memory allocation */
2424
if (args->count > KVM_S390_CMMA_SIZE_MAX)
2425
return -EINVAL;
2426
/* Nothing to do */
2427
if (args->count == 0)
2428
return 0;
2429
2430
bits = vmalloc(array_size(sizeof(*bits), args->count));
2431
if (!bits)
2432
return -ENOMEM;
2433
2434
r = copy_from_user(bits, (void __user *)args->values, args->count);
2435
if (r) {
2436
r = -EFAULT;
2437
goto out;
2438
}
2439
2440
mmap_read_lock(kvm->mm);
2441
srcu_idx = srcu_read_lock(&kvm->srcu);
2442
for (i = 0; i < args->count; i++) {
2443
hva = gfn_to_hva(kvm, args->start_gfn + i);
2444
if (kvm_is_error_hva(hva)) {
2445
r = -EFAULT;
2446
break;
2447
}
2448
2449
pgstev = bits[i];
2450
pgstev = pgstev << 24;
2451
mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2452
set_pgste_bits(kvm->mm, hva, mask, pgstev);
2453
}
2454
srcu_read_unlock(&kvm->srcu, srcu_idx);
2455
mmap_read_unlock(kvm->mm);
2456
2457
if (!kvm->mm->context.uses_cmm) {
2458
mmap_write_lock(kvm->mm);
2459
kvm->mm->context.uses_cmm = 1;
2460
mmap_write_unlock(kvm->mm);
2461
}
2462
out:
2463
vfree(bits);
2464
return r;
2465
}
2466
2467
/**
2468
* kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
2469
* non protected.
2470
* @kvm: the VM whose protected vCPUs are to be converted
2471
* @rc: return value for the RC field of the UVC (in case of error)
2472
* @rrc: return value for the RRC field of the UVC (in case of error)
2473
*
2474
* Does not stop in case of error, tries to convert as many
2475
* CPUs as possible. In case of error, the RC and RRC of the last error are
2476
* returned.
2477
*
2478
* Return: 0 in case of success, otherwise -EIO
2479
*/
2480
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2481
{
2482
struct kvm_vcpu *vcpu;
2483
unsigned long i;
2484
u16 _rc, _rrc;
2485
int ret = 0;
2486
2487
/*
2488
* We ignore failures and try to destroy as many CPUs as possible.
2489
* At the same time we must not free the assigned resources when
2490
* this fails, as the ultravisor has still access to that memory.
2491
* So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2492
* behind.
2493
* We want to return the first failure rc and rrc, though.
2494
*/
2495
kvm_for_each_vcpu(i, vcpu, kvm) {
2496
mutex_lock(&vcpu->mutex);
2497
if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
2498
*rc = _rc;
2499
*rrc = _rrc;
2500
ret = -EIO;
2501
}
2502
mutex_unlock(&vcpu->mutex);
2503
}
2504
/* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2505
if (use_gisa)
2506
kvm_s390_gisa_enable(kvm);
2507
return ret;
2508
}
2509
2510
/**
2511
* kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
2512
* to protected.
2513
* @kvm: the VM whose protected vCPUs are to be converted
2514
* @rc: return value for the RC field of the UVC (in case of error)
2515
* @rrc: return value for the RRC field of the UVC (in case of error)
2516
*
2517
* Tries to undo the conversion in case of error.
2518
*
2519
* Return: 0 in case of success, otherwise -EIO
2520
*/
2521
static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2522
{
2523
unsigned long i;
2524
int r = 0;
2525
u16 dummy;
2526
2527
struct kvm_vcpu *vcpu;
2528
2529
/* Disable the GISA if the ultravisor does not support AIV. */
2530
if (!uv_has_feature(BIT_UV_FEAT_AIV))
2531
kvm_s390_gisa_disable(kvm);
2532
2533
kvm_for_each_vcpu(i, vcpu, kvm) {
2534
mutex_lock(&vcpu->mutex);
2535
r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2536
mutex_unlock(&vcpu->mutex);
2537
if (r)
2538
break;
2539
}
2540
if (r)
2541
kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2542
return r;
2543
}
2544
2545
/*
2546
* Here we provide user space with a direct interface to query UV
2547
* related data like UV maxima and available features as well as
2548
* feature specific data.
2549
*
2550
* To facilitate future extension of the data structures we'll try to
2551
* write data up to the maximum requested length.
2552
*/
2553
static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
2554
{
2555
ssize_t len_min;
2556
2557
switch (info->header.id) {
2558
case KVM_PV_INFO_VM: {
2559
len_min = sizeof(info->header) + sizeof(info->vm);
2560
2561
if (info->header.len_max < len_min)
2562
return -EINVAL;
2563
2564
memcpy(info->vm.inst_calls_list,
2565
uv_info.inst_calls_list,
2566
sizeof(uv_info.inst_calls_list));
2567
2568
/* It's max cpuid not max cpus, so it's off by one */
2569
info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
2570
info->vm.max_guests = uv_info.max_num_sec_conf;
2571
info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
2572
info->vm.feature_indication = uv_info.uv_feature_indications;
2573
2574
return len_min;
2575
}
2576
case KVM_PV_INFO_DUMP: {
2577
len_min = sizeof(info->header) + sizeof(info->dump);
2578
2579
if (info->header.len_max < len_min)
2580
return -EINVAL;
2581
2582
info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
2583
info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
2584
info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
2585
return len_min;
2586
}
2587
default:
2588
return -EINVAL;
2589
}
2590
}
2591
2592
static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
2593
struct kvm_s390_pv_dmp dmp)
2594
{
2595
int r = -EINVAL;
2596
void __user *result_buff = (void __user *)dmp.buff_addr;
2597
2598
switch (dmp.subcmd) {
2599
case KVM_PV_DUMP_INIT: {
2600
if (kvm->arch.pv.dumping)
2601
break;
2602
2603
/*
2604
* Block SIE entry as concurrent dump UVCs could lead
2605
* to validities.
2606
*/
2607
kvm_s390_vcpu_block_all(kvm);
2608
2609
r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2610
UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
2611
KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
2612
cmd->rc, cmd->rrc);
2613
if (!r) {
2614
kvm->arch.pv.dumping = true;
2615
} else {
2616
kvm_s390_vcpu_unblock_all(kvm);
2617
r = -EINVAL;
2618
}
2619
break;
2620
}
2621
case KVM_PV_DUMP_CONFIG_STOR_STATE: {
2622
if (!kvm->arch.pv.dumping)
2623
break;
2624
2625
/*
2626
* gaddr is an output parameter since we might stop
2627
* early. As dmp will be copied back in our caller, we
2628
* don't need to do it ourselves.
2629
*/
2630
r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
2631
&cmd->rc, &cmd->rrc);
2632
break;
2633
}
2634
case KVM_PV_DUMP_COMPLETE: {
2635
if (!kvm->arch.pv.dumping)
2636
break;
2637
2638
r = -EINVAL;
2639
if (dmp.buff_len < uv_info.conf_dump_finalize_len)
2640
break;
2641
2642
r = kvm_s390_pv_dump_complete(kvm, result_buff,
2643
&cmd->rc, &cmd->rrc);
2644
break;
2645
}
2646
default:
2647
r = -ENOTTY;
2648
break;
2649
}
2650
2651
return r;
2652
}
2653
2654
static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2655
{
2656
const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
2657
void __user *argp = (void __user *)cmd->data;
2658
int r = 0;
2659
u16 dummy;
2660
2661
if (need_lock)
2662
mutex_lock(&kvm->lock);
2663
2664
switch (cmd->cmd) {
2665
case KVM_PV_ENABLE: {
2666
r = -EINVAL;
2667
if (kvm_s390_pv_is_protected(kvm))
2668
break;
2669
2670
/*
2671
* FMT 4 SIE needs esca. As we never switch back to bsca from
2672
* esca, we need no cleanup in the error cases below
2673
*/
2674
r = sca_switch_to_extended(kvm);
2675
if (r)
2676
break;
2677
2678
mmap_write_lock(kvm->mm);
2679
r = gmap_helper_disable_cow_sharing();
2680
mmap_write_unlock(kvm->mm);
2681
if (r)
2682
break;
2683
2684
r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2685
if (r)
2686
break;
2687
2688
r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2689
if (r)
2690
kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2691
2692
/* we need to block service interrupts from now on */
2693
set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2694
break;
2695
}
2696
case KVM_PV_ASYNC_CLEANUP_PREPARE:
2697
r = -EINVAL;
2698
if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
2699
break;
2700
2701
r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2702
/*
2703
* If a CPU could not be destroyed, destroy VM will also fail.
2704
* There is no point in trying to destroy it. Instead return
2705
* the rc and rrc from the first CPU that failed destroying.
2706
*/
2707
if (r)
2708
break;
2709
r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
2710
2711
/* no need to block service interrupts any more */
2712
clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2713
break;
2714
case KVM_PV_ASYNC_CLEANUP_PERFORM:
2715
r = -EINVAL;
2716
if (!async_destroy)
2717
break;
2718
/* kvm->lock must not be held; this is asserted inside the function. */
2719
r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
2720
break;
2721
case KVM_PV_DISABLE: {
2722
r = -EINVAL;
2723
if (!kvm_s390_pv_is_protected(kvm))
2724
break;
2725
2726
r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2727
/*
2728
* If a CPU could not be destroyed, destroy VM will also fail.
2729
* There is no point in trying to destroy it. Instead return
2730
* the rc and rrc from the first CPU that failed destroying.
2731
*/
2732
if (r)
2733
break;
2734
r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
2735
2736
/* no need to block service interrupts any more */
2737
clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2738
break;
2739
}
2740
case KVM_PV_SET_SEC_PARMS: {
2741
struct kvm_s390_pv_sec_parm parms = {};
2742
void *hdr;
2743
2744
r = -EINVAL;
2745
if (!kvm_s390_pv_is_protected(kvm))
2746
break;
2747
2748
r = -EFAULT;
2749
if (copy_from_user(&parms, argp, sizeof(parms)))
2750
break;
2751
2752
/* Currently restricted to 8KB */
2753
r = -EINVAL;
2754
if (parms.length > PAGE_SIZE * 2)
2755
break;
2756
2757
r = -ENOMEM;
2758
hdr = vmalloc(parms.length);
2759
if (!hdr)
2760
break;
2761
2762
r = -EFAULT;
2763
if (!copy_from_user(hdr, (void __user *)parms.origin,
2764
parms.length))
2765
r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2766
&cmd->rc, &cmd->rrc);
2767
2768
vfree(hdr);
2769
break;
2770
}
2771
case KVM_PV_UNPACK: {
2772
struct kvm_s390_pv_unp unp = {};
2773
2774
r = -EINVAL;
2775
if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2776
break;
2777
2778
r = -EFAULT;
2779
if (copy_from_user(&unp, argp, sizeof(unp)))
2780
break;
2781
2782
r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2783
&cmd->rc, &cmd->rrc);
2784
break;
2785
}
2786
case KVM_PV_VERIFY: {
2787
r = -EINVAL;
2788
if (!kvm_s390_pv_is_protected(kvm))
2789
break;
2790
2791
r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2792
UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2793
KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2794
cmd->rrc);
2795
break;
2796
}
2797
case KVM_PV_PREP_RESET: {
2798
r = -EINVAL;
2799
if (!kvm_s390_pv_is_protected(kvm))
2800
break;
2801
2802
r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2803
UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2804
KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2805
cmd->rc, cmd->rrc);
2806
break;
2807
}
2808
case KVM_PV_UNSHARE_ALL: {
2809
r = -EINVAL;
2810
if (!kvm_s390_pv_is_protected(kvm))
2811
break;
2812
2813
r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2814
UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2815
KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2816
cmd->rc, cmd->rrc);
2817
break;
2818
}
2819
case KVM_PV_INFO: {
2820
struct kvm_s390_pv_info info = {};
2821
ssize_t data_len;
2822
2823
/*
2824
* No need to check the VM protection here.
2825
*
2826
* Maybe user space wants to query some of the data
2827
* when the VM is still unprotected. If we see the
2828
* need to fence a new data command we can still
2829
* return an error in the info handler.
2830
*/
2831
2832
r = -EFAULT;
2833
if (copy_from_user(&info, argp, sizeof(info.header)))
2834
break;
2835
2836
r = -EINVAL;
2837
if (info.header.len_max < sizeof(info.header))
2838
break;
2839
2840
data_len = kvm_s390_handle_pv_info(&info);
2841
if (data_len < 0) {
2842
r = data_len;
2843
break;
2844
}
2845
/*
2846
* If a data command struct is extended (multiple
2847
* times) this can be used to determine how much of it
2848
* is valid.
2849
*/
2850
info.header.len_written = data_len;
2851
2852
r = -EFAULT;
2853
if (copy_to_user(argp, &info, data_len))
2854
break;
2855
2856
r = 0;
2857
break;
2858
}
2859
case KVM_PV_DUMP: {
2860
struct kvm_s390_pv_dmp dmp;
2861
2862
r = -EINVAL;
2863
if (!kvm_s390_pv_is_protected(kvm))
2864
break;
2865
2866
r = -EFAULT;
2867
if (copy_from_user(&dmp, argp, sizeof(dmp)))
2868
break;
2869
2870
r = kvm_s390_pv_dmp(kvm, cmd, dmp);
2871
if (r)
2872
break;
2873
2874
if (copy_to_user(argp, &dmp, sizeof(dmp))) {
2875
r = -EFAULT;
2876
break;
2877
}
2878
2879
break;
2880
}
2881
default:
2882
r = -ENOTTY;
2883
}
2884
if (need_lock)
2885
mutex_unlock(&kvm->lock);
2886
2887
return r;
2888
}
2889
2890
static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_flags)
2891
{
2892
if (mop->flags & ~supported_flags || !mop->size)
2893
return -EINVAL;
2894
if (mop->size > MEM_OP_MAX_SIZE)
2895
return -E2BIG;
2896
if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2897
if (mop->key > 0xf)
2898
return -EINVAL;
2899
} else {
2900
mop->key = 0;
2901
}
2902
return 0;
2903
}
2904
2905
static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2906
{
2907
void __user *uaddr = (void __user *)mop->buf;
2908
enum gacc_mode acc_mode;
2909
void *tmpbuf = NULL;
2910
int r, srcu_idx;
2911
2912
r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION |
2913
KVM_S390_MEMOP_F_CHECK_ONLY);
2914
if (r)
2915
return r;
2916
2917
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2918
tmpbuf = vmalloc(mop->size);
2919
if (!tmpbuf)
2920
return -ENOMEM;
2921
}
2922
2923
srcu_idx = srcu_read_lock(&kvm->srcu);
2924
2925
if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
2926
r = PGM_ADDRESSING;
2927
goto out_unlock;
2928
}
2929
2930
acc_mode = mop->op == KVM_S390_MEMOP_ABSOLUTE_READ ? GACC_FETCH : GACC_STORE;
2931
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2932
r = check_gpa_range(kvm, mop->gaddr, mop->size, acc_mode, mop->key);
2933
goto out_unlock;
2934
}
2935
if (acc_mode == GACC_FETCH) {
2936
r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2937
mop->size, GACC_FETCH, mop->key);
2938
if (r)
2939
goto out_unlock;
2940
if (copy_to_user(uaddr, tmpbuf, mop->size))
2941
r = -EFAULT;
2942
} else {
2943
if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2944
r = -EFAULT;
2945
goto out_unlock;
2946
}
2947
r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2948
mop->size, GACC_STORE, mop->key);
2949
}
2950
2951
out_unlock:
2952
srcu_read_unlock(&kvm->srcu, srcu_idx);
2953
2954
vfree(tmpbuf);
2955
return r;
2956
}
2957
2958
static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2959
{
2960
void __user *uaddr = (void __user *)mop->buf;
2961
void __user *old_addr = (void __user *)mop->old_addr;
2962
union {
2963
__uint128_t quad;
2964
char raw[sizeof(__uint128_t)];
2965
} old = { .quad = 0}, new = { .quad = 0 };
2966
unsigned int off_in_quad = sizeof(new) - mop->size;
2967
int r, srcu_idx;
2968
bool success;
2969
2970
r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION);
2971
if (r)
2972
return r;
2973
/*
2974
* This validates off_in_quad. Checking that size is a power
2975
* of two is not necessary, as cmpxchg_guest_abs_with_key
2976
* takes care of that
2977
*/
2978
if (mop->size > sizeof(new))
2979
return -EINVAL;
2980
if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
2981
return -EFAULT;
2982
if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size))
2983
return -EFAULT;
2984
2985
srcu_idx = srcu_read_lock(&kvm->srcu);
2986
2987
if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
2988
r = PGM_ADDRESSING;
2989
goto out_unlock;
2990
}
2991
2992
r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad,
2993
new.quad, mop->key, &success);
2994
if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
2995
r = -EFAULT;
2996
2997
out_unlock:
2998
srcu_read_unlock(&kvm->srcu, srcu_idx);
2999
return r;
3000
}
3001
3002
static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
3003
{
3004
/*
3005
* This is technically a heuristic only, if the kvm->lock is not
3006
* taken, it is not guaranteed that the vm is/remains non-protected.
3007
* This is ok from a kernel perspective, wrongdoing is detected
3008
* on the access, -EFAULT is returned and the vm may crash the
3009
* next time it accesses the memory in question.
3010
* There is no sane usecase to do switching and a memop on two
3011
* different CPUs at the same time.
3012
*/
3013
if (kvm_s390_pv_get_handle(kvm))
3014
return -EINVAL;
3015
3016
switch (mop->op) {
3017
case KVM_S390_MEMOP_ABSOLUTE_READ:
3018
case KVM_S390_MEMOP_ABSOLUTE_WRITE:
3019
return kvm_s390_vm_mem_op_abs(kvm, mop);
3020
case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
3021
return kvm_s390_vm_mem_op_cmpxchg(kvm, mop);
3022
default:
3023
return -EINVAL;
3024
}
3025
}
3026
3027
int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
3028
{
3029
struct kvm *kvm = filp->private_data;
3030
void __user *argp = (void __user *)arg;
3031
struct kvm_device_attr attr;
3032
int r;
3033
3034
switch (ioctl) {
3035
case KVM_S390_INTERRUPT: {
3036
struct kvm_s390_interrupt s390int;
3037
3038
r = -EFAULT;
3039
if (copy_from_user(&s390int, argp, sizeof(s390int)))
3040
break;
3041
r = kvm_s390_inject_vm(kvm, &s390int);
3042
break;
3043
}
3044
case KVM_CREATE_IRQCHIP: {
3045
r = -EINVAL;
3046
if (kvm->arch.use_irqchip)
3047
r = 0;
3048
break;
3049
}
3050
case KVM_SET_DEVICE_ATTR: {
3051
r = -EFAULT;
3052
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3053
break;
3054
r = kvm_s390_vm_set_attr(kvm, &attr);
3055
break;
3056
}
3057
case KVM_GET_DEVICE_ATTR: {
3058
r = -EFAULT;
3059
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3060
break;
3061
r = kvm_s390_vm_get_attr(kvm, &attr);
3062
break;
3063
}
3064
case KVM_HAS_DEVICE_ATTR: {
3065
r = -EFAULT;
3066
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3067
break;
3068
r = kvm_s390_vm_has_attr(kvm, &attr);
3069
break;
3070
}
3071
case KVM_S390_GET_SKEYS: {
3072
struct kvm_s390_skeys args;
3073
3074
r = -EFAULT;
3075
if (copy_from_user(&args, argp,
3076
sizeof(struct kvm_s390_skeys)))
3077
break;
3078
r = kvm_s390_get_skeys(kvm, &args);
3079
break;
3080
}
3081
case KVM_S390_SET_SKEYS: {
3082
struct kvm_s390_skeys args;
3083
3084
r = -EFAULT;
3085
if (copy_from_user(&args, argp,
3086
sizeof(struct kvm_s390_skeys)))
3087
break;
3088
r = kvm_s390_set_skeys(kvm, &args);
3089
break;
3090
}
3091
case KVM_S390_GET_CMMA_BITS: {
3092
struct kvm_s390_cmma_log args;
3093
3094
r = -EFAULT;
3095
if (copy_from_user(&args, argp, sizeof(args)))
3096
break;
3097
mutex_lock(&kvm->slots_lock);
3098
r = kvm_s390_get_cmma_bits(kvm, &args);
3099
mutex_unlock(&kvm->slots_lock);
3100
if (!r) {
3101
r = copy_to_user(argp, &args, sizeof(args));
3102
if (r)
3103
r = -EFAULT;
3104
}
3105
break;
3106
}
3107
case KVM_S390_SET_CMMA_BITS: {
3108
struct kvm_s390_cmma_log args;
3109
3110
r = -EFAULT;
3111
if (copy_from_user(&args, argp, sizeof(args)))
3112
break;
3113
mutex_lock(&kvm->slots_lock);
3114
r = kvm_s390_set_cmma_bits(kvm, &args);
3115
mutex_unlock(&kvm->slots_lock);
3116
break;
3117
}
3118
case KVM_S390_PV_COMMAND: {
3119
struct kvm_pv_cmd args;
3120
3121
/* protvirt means user cpu state */
3122
kvm_s390_set_user_cpu_state_ctrl(kvm);
3123
r = 0;
3124
if (!is_prot_virt_host()) {
3125
r = -EINVAL;
3126
break;
3127
}
3128
if (copy_from_user(&args, argp, sizeof(args))) {
3129
r = -EFAULT;
3130
break;
3131
}
3132
if (args.flags) {
3133
r = -EINVAL;
3134
break;
3135
}
3136
/* must be called without kvm->lock */
3137
r = kvm_s390_handle_pv(kvm, &args);
3138
if (copy_to_user(argp, &args, sizeof(args))) {
3139
r = -EFAULT;
3140
break;
3141
}
3142
break;
3143
}
3144
case KVM_S390_MEM_OP: {
3145
struct kvm_s390_mem_op mem_op;
3146
3147
if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3148
r = kvm_s390_vm_mem_op(kvm, &mem_op);
3149
else
3150
r = -EFAULT;
3151
break;
3152
}
3153
case KVM_S390_ZPCI_OP: {
3154
struct kvm_s390_zpci_op args;
3155
3156
r = -EINVAL;
3157
if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3158
break;
3159
if (copy_from_user(&args, argp, sizeof(args))) {
3160
r = -EFAULT;
3161
break;
3162
}
3163
r = kvm_s390_pci_zpci_op(kvm, &args);
3164
break;
3165
}
3166
default:
3167
r = -ENOTTY;
3168
}
3169
3170
return r;
3171
}
3172
3173
static int kvm_s390_apxa_installed(void)
3174
{
3175
struct ap_config_info info;
3176
3177
if (ap_instructions_available()) {
3178
if (ap_qci(&info) == 0)
3179
return info.apxa;
3180
}
3181
3182
return 0;
3183
}
3184
3185
/*
3186
* The format of the crypto control block (CRYCB) is specified in the 3 low
3187
* order bits of the CRYCB designation (CRYCBD) field as follows:
3188
* Format 0: Neither the message security assist extension 3 (MSAX3) nor the
3189
* AP extended addressing (APXA) facility are installed.
3190
* Format 1: The APXA facility is not installed but the MSAX3 facility is.
3191
* Format 2: Both the APXA and MSAX3 facilities are installed
3192
*/
3193
static void kvm_s390_set_crycb_format(struct kvm *kvm)
3194
{
3195
kvm->arch.crypto.crycbd = virt_to_phys(kvm->arch.crypto.crycb);
3196
3197
/* Clear the CRYCB format bits - i.e., set format 0 by default */
3198
kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
3199
3200
/* Check whether MSAX3 is installed */
3201
if (!test_kvm_facility(kvm, 76))
3202
return;
3203
3204
if (kvm_s390_apxa_installed())
3205
kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
3206
else
3207
kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
3208
}
3209
3210
/*
3211
* kvm_arch_crypto_set_masks
3212
*
3213
* @kvm: pointer to the target guest's KVM struct containing the crypto masks
3214
* to be set.
3215
* @apm: the mask identifying the accessible AP adapters
3216
* @aqm: the mask identifying the accessible AP domains
3217
* @adm: the mask identifying the accessible AP control domains
3218
*
3219
* Set the masks that identify the adapters, domains and control domains to
3220
* which the KVM guest is granted access.
3221
*
3222
* Note: The kvm->lock mutex must be locked by the caller before invoking this
3223
* function.
3224
*/
3225
void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
3226
unsigned long *aqm, unsigned long *adm)
3227
{
3228
struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
3229
3230
kvm_s390_vcpu_block_all(kvm);
3231
3232
switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
3233
case CRYCB_FORMAT2: /* APCB1 use 256 bits */
3234
memcpy(crycb->apcb1.apm, apm, 32);
3235
VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
3236
apm[0], apm[1], apm[2], apm[3]);
3237
memcpy(crycb->apcb1.aqm, aqm, 32);
3238
VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
3239
aqm[0], aqm[1], aqm[2], aqm[3]);
3240
memcpy(crycb->apcb1.adm, adm, 32);
3241
VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
3242
adm[0], adm[1], adm[2], adm[3]);
3243
break;
3244
case CRYCB_FORMAT1:
3245
case CRYCB_FORMAT0: /* Fall through both use APCB0 */
3246
memcpy(crycb->apcb0.apm, apm, 8);
3247
memcpy(crycb->apcb0.aqm, aqm, 2);
3248
memcpy(crycb->apcb0.adm, adm, 2);
3249
VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
3250
apm[0], *((unsigned short *)aqm),
3251
*((unsigned short *)adm));
3252
break;
3253
default: /* Can not happen */
3254
break;
3255
}
3256
3257
/* recreate the shadow crycb for each vcpu */
3258
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3259
kvm_s390_vcpu_unblock_all(kvm);
3260
}
3261
EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
3262
3263
/*
3264
* kvm_arch_crypto_clear_masks
3265
*
3266
* @kvm: pointer to the target guest's KVM struct containing the crypto masks
3267
* to be cleared.
3268
*
3269
* Clear the masks that identify the adapters, domains and control domains to
3270
* which the KVM guest is granted access.
3271
*
3272
* Note: The kvm->lock mutex must be locked by the caller before invoking this
3273
* function.
3274
*/
3275
void kvm_arch_crypto_clear_masks(struct kvm *kvm)
3276
{
3277
kvm_s390_vcpu_block_all(kvm);
3278
3279
memset(&kvm->arch.crypto.crycb->apcb0, 0,
3280
sizeof(kvm->arch.crypto.crycb->apcb0));
3281
memset(&kvm->arch.crypto.crycb->apcb1, 0,
3282
sizeof(kvm->arch.crypto.crycb->apcb1));
3283
3284
VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
3285
/* recreate the shadow crycb for each vcpu */
3286
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3287
kvm_s390_vcpu_unblock_all(kvm);
3288
}
3289
EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
3290
3291
static u64 kvm_s390_get_initial_cpuid(void)
3292
{
3293
struct cpuid cpuid;
3294
3295
get_cpu_id(&cpuid);
3296
cpuid.version = 0xff;
3297
return *((u64 *) &cpuid);
3298
}
3299
3300
static void kvm_s390_crypto_init(struct kvm *kvm)
3301
{
3302
kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
3303
kvm_s390_set_crycb_format(kvm);
3304
init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
3305
3306
if (!test_kvm_facility(kvm, 76))
3307
return;
3308
3309
/* Enable AES/DEA protected key functions by default */
3310
kvm->arch.crypto.aes_kw = 1;
3311
kvm->arch.crypto.dea_kw = 1;
3312
get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
3313
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
3314
get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
3315
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
3316
}
3317
3318
static void sca_dispose(struct kvm *kvm)
3319
{
3320
if (kvm->arch.use_esca)
3321
free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
3322
else
3323
free_page((unsigned long)(kvm->arch.sca));
3324
kvm->arch.sca = NULL;
3325
}
3326
3327
void kvm_arch_free_vm(struct kvm *kvm)
3328
{
3329
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3330
kvm_s390_pci_clear_list(kvm);
3331
3332
__kvm_arch_free_vm(kvm);
3333
}
3334
3335
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
3336
{
3337
gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
3338
int i, rc;
3339
char debug_name[16];
3340
static unsigned long sca_offset;
3341
3342
rc = -EINVAL;
3343
#ifdef CONFIG_KVM_S390_UCONTROL
3344
if (type & ~KVM_VM_S390_UCONTROL)
3345
goto out_err;
3346
if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
3347
goto out_err;
3348
#else
3349
if (type)
3350
goto out_err;
3351
#endif
3352
3353
rc = s390_enable_sie();
3354
if (rc)
3355
goto out_err;
3356
3357
rc = -ENOMEM;
3358
3359
if (!sclp.has_64bscao)
3360
alloc_flags |= GFP_DMA;
3361
rwlock_init(&kvm->arch.sca_lock);
3362
/* start with basic SCA */
3363
kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
3364
if (!kvm->arch.sca)
3365
goto out_err;
3366
mutex_lock(&kvm_lock);
3367
sca_offset += 16;
3368
if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
3369
sca_offset = 0;
3370
kvm->arch.sca = (struct bsca_block *)
3371
((char *) kvm->arch.sca + sca_offset);
3372
mutex_unlock(&kvm_lock);
3373
3374
sprintf(debug_name, "kvm-%u", current->pid);
3375
3376
kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
3377
if (!kvm->arch.dbf)
3378
goto out_err;
3379
3380
BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
3381
kvm->arch.sie_page2 =
3382
(struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
3383
if (!kvm->arch.sie_page2)
3384
goto out_err;
3385
3386
kvm->arch.sie_page2->kvm = kvm;
3387
kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
3388
3389
for (i = 0; i < kvm_s390_fac_size(); i++) {
3390
kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
3391
(kvm_s390_fac_base[i] |
3392
kvm_s390_fac_ext[i]);
3393
kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
3394
kvm_s390_fac_base[i];
3395
}
3396
kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
3397
3398
/* we are always in czam mode - even on pre z14 machines */
3399
set_kvm_facility(kvm->arch.model.fac_mask, 138);
3400
set_kvm_facility(kvm->arch.model.fac_list, 138);
3401
/* we emulate STHYI in kvm */
3402
set_kvm_facility(kvm->arch.model.fac_mask, 74);
3403
set_kvm_facility(kvm->arch.model.fac_list, 74);
3404
if (machine_has_tlb_guest()) {
3405
set_kvm_facility(kvm->arch.model.fac_mask, 147);
3406
set_kvm_facility(kvm->arch.model.fac_list, 147);
3407
}
3408
3409
if (css_general_characteristics.aiv && test_facility(65))
3410
set_kvm_facility(kvm->arch.model.fac_mask, 65);
3411
3412
kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
3413
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
3414
3415
kvm->arch.model.uv_feat_guest.feat = 0;
3416
3417
kvm_s390_crypto_init(kvm);
3418
3419
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
3420
mutex_lock(&kvm->lock);
3421
kvm_s390_pci_init_list(kvm);
3422
kvm_s390_vcpu_pci_enable_interp(kvm);
3423
mutex_unlock(&kvm->lock);
3424
}
3425
3426
mutex_init(&kvm->arch.float_int.ais_lock);
3427
spin_lock_init(&kvm->arch.float_int.lock);
3428
for (i = 0; i < FIRQ_LIST_COUNT; i++)
3429
INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
3430
init_waitqueue_head(&kvm->arch.ipte_wq);
3431
mutex_init(&kvm->arch.ipte_mutex);
3432
3433
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
3434
VM_EVENT(kvm, 3, "vm created with type %lu", type);
3435
3436
if (type & KVM_VM_S390_UCONTROL) {
3437
struct kvm_userspace_memory_region2 fake_memslot = {
3438
.slot = KVM_S390_UCONTROL_MEMSLOT,
3439
.guest_phys_addr = 0,
3440
.userspace_addr = 0,
3441
.memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE),
3442
.flags = 0,
3443
};
3444
3445
kvm->arch.gmap = NULL;
3446
kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
3447
/* one flat fake memslot covering the whole address-space */
3448
mutex_lock(&kvm->slots_lock);
3449
KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm);
3450
mutex_unlock(&kvm->slots_lock);
3451
} else {
3452
if (sclp.hamax == U64_MAX)
3453
kvm->arch.mem_limit = TASK_SIZE_MAX;
3454
else
3455
kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
3456
sclp.hamax + 1);
3457
kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
3458
if (!kvm->arch.gmap)
3459
goto out_err;
3460
kvm->arch.gmap->private = kvm;
3461
kvm->arch.gmap->pfault_enabled = 0;
3462
}
3463
3464
kvm->arch.use_pfmfi = sclp.has_pfmfi;
3465
kvm->arch.use_skf = sclp.has_skey;
3466
spin_lock_init(&kvm->arch.start_stop_lock);
3467
kvm_s390_vsie_init(kvm);
3468
if (use_gisa)
3469
kvm_s390_gisa_init(kvm);
3470
INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
3471
kvm->arch.pv.set_aside = NULL;
3472
KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
3473
3474
return 0;
3475
out_err:
3476
free_page((unsigned long)kvm->arch.sie_page2);
3477
debug_unregister(kvm->arch.dbf);
3478
sca_dispose(kvm);
3479
KVM_EVENT(3, "creation of vm failed: %d", rc);
3480
return rc;
3481
}
3482
3483
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3484
{
3485
u16 rc, rrc;
3486
3487
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
3488
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
3489
kvm_s390_clear_local_irqs(vcpu);
3490
kvm_clear_async_pf_completion_queue(vcpu);
3491
if (!kvm_is_ucontrol(vcpu->kvm))
3492
sca_del_vcpu(vcpu);
3493
kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3494
3495
if (kvm_is_ucontrol(vcpu->kvm))
3496
gmap_remove(vcpu->arch.gmap);
3497
3498
if (vcpu->kvm->arch.use_cmma)
3499
kvm_s390_vcpu_unsetup_cmma(vcpu);
3500
/* We can not hold the vcpu mutex here, we are already dying */
3501
if (kvm_s390_pv_cpu_get_handle(vcpu))
3502
kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
3503
free_page((unsigned long)(vcpu->arch.sie_block));
3504
}
3505
3506
void kvm_arch_destroy_vm(struct kvm *kvm)
3507
{
3508
u16 rc, rrc;
3509
3510
kvm_destroy_vcpus(kvm);
3511
sca_dispose(kvm);
3512
kvm_s390_gisa_destroy(kvm);
3513
/*
3514
* We are already at the end of life and kvm->lock is not taken.
3515
* This is ok as the file descriptor is closed by now and nobody
3516
* can mess with the pv state.
3517
*/
3518
kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
3519
/*
3520
* Remove the mmu notifier only when the whole KVM VM is torn down,
3521
* and only if one was registered to begin with. If the VM is
3522
* currently not protected, but has been previously been protected,
3523
* then it's possible that the notifier is still registered.
3524
*/
3525
if (kvm->arch.pv.mmu_notifier.ops)
3526
mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
3527
3528
debug_unregister(kvm->arch.dbf);
3529
free_page((unsigned long)kvm->arch.sie_page2);
3530
if (!kvm_is_ucontrol(kvm))
3531
gmap_remove(kvm->arch.gmap);
3532
kvm_s390_destroy_adapters(kvm);
3533
kvm_s390_clear_float_irqs(kvm);
3534
kvm_s390_vsie_destroy(kvm);
3535
KVM_EVENT(3, "vm 0x%p destroyed", kvm);
3536
}
3537
3538
/* Section: vcpu related */
3539
static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
3540
{
3541
vcpu->arch.gmap = gmap_create(current->mm, -1UL);
3542
if (!vcpu->arch.gmap)
3543
return -ENOMEM;
3544
vcpu->arch.gmap->private = vcpu->kvm;
3545
3546
return 0;
3547
}
3548
3549
static void sca_del_vcpu(struct kvm_vcpu *vcpu)
3550
{
3551
if (!kvm_s390_use_sca_entries())
3552
return;
3553
read_lock(&vcpu->kvm->arch.sca_lock);
3554
if (vcpu->kvm->arch.use_esca) {
3555
struct esca_block *sca = vcpu->kvm->arch.sca;
3556
3557
clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3558
sca->cpu[vcpu->vcpu_id].sda = 0;
3559
} else {
3560
struct bsca_block *sca = vcpu->kvm->arch.sca;
3561
3562
clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3563
sca->cpu[vcpu->vcpu_id].sda = 0;
3564
}
3565
read_unlock(&vcpu->kvm->arch.sca_lock);
3566
}
3567
3568
static void sca_add_vcpu(struct kvm_vcpu *vcpu)
3569
{
3570
if (!kvm_s390_use_sca_entries()) {
3571
phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
3572
3573
/* we still need the basic sca for the ipte control */
3574
vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3575
vcpu->arch.sie_block->scaol = sca_phys;
3576
return;
3577
}
3578
read_lock(&vcpu->kvm->arch.sca_lock);
3579
if (vcpu->kvm->arch.use_esca) {
3580
struct esca_block *sca = vcpu->kvm->arch.sca;
3581
phys_addr_t sca_phys = virt_to_phys(sca);
3582
3583
sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3584
vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3585
vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
3586
vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3587
set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3588
} else {
3589
struct bsca_block *sca = vcpu->kvm->arch.sca;
3590
phys_addr_t sca_phys = virt_to_phys(sca);
3591
3592
sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3593
vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3594
vcpu->arch.sie_block->scaol = sca_phys;
3595
set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3596
}
3597
read_unlock(&vcpu->kvm->arch.sca_lock);
3598
}
3599
3600
/* Basic SCA to Extended SCA data copy routines */
3601
static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3602
{
3603
d->sda = s->sda;
3604
d->sigp_ctrl.c = s->sigp_ctrl.c;
3605
d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3606
}
3607
3608
static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3609
{
3610
int i;
3611
3612
d->ipte_control = s->ipte_control;
3613
d->mcn[0] = s->mcn;
3614
for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3615
sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3616
}
3617
3618
static int sca_switch_to_extended(struct kvm *kvm)
3619
{
3620
struct bsca_block *old_sca = kvm->arch.sca;
3621
struct esca_block *new_sca;
3622
struct kvm_vcpu *vcpu;
3623
unsigned long vcpu_idx;
3624
u32 scaol, scaoh;
3625
phys_addr_t new_sca_phys;
3626
3627
if (kvm->arch.use_esca)
3628
return 0;
3629
3630
new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3631
if (!new_sca)
3632
return -ENOMEM;
3633
3634
new_sca_phys = virt_to_phys(new_sca);
3635
scaoh = new_sca_phys >> 32;
3636
scaol = new_sca_phys & ESCA_SCAOL_MASK;
3637
3638
kvm_s390_vcpu_block_all(kvm);
3639
write_lock(&kvm->arch.sca_lock);
3640
3641
sca_copy_b_to_e(new_sca, old_sca);
3642
3643
kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3644
vcpu->arch.sie_block->scaoh = scaoh;
3645
vcpu->arch.sie_block->scaol = scaol;
3646
vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3647
}
3648
kvm->arch.sca = new_sca;
3649
kvm->arch.use_esca = 1;
3650
3651
write_unlock(&kvm->arch.sca_lock);
3652
kvm_s390_vcpu_unblock_all(kvm);
3653
3654
free_page((unsigned long)old_sca);
3655
3656
VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)",
3657
old_sca, kvm->arch.sca);
3658
return 0;
3659
}
3660
3661
static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3662
{
3663
int rc;
3664
3665
if (!kvm_s390_use_sca_entries()) {
3666
if (id < KVM_MAX_VCPUS)
3667
return true;
3668
return false;
3669
}
3670
if (id < KVM_S390_BSCA_CPU_SLOTS)
3671
return true;
3672
if (!sclp.has_esca || !sclp.has_64bscao)
3673
return false;
3674
3675
rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3676
3677
return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3678
}
3679
3680
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3681
static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3682
{
3683
WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3684
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3685
vcpu->arch.cputm_start = get_tod_clock_fast();
3686
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3687
}
3688
3689
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3690
static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3691
{
3692
WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3693
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3694
vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3695
vcpu->arch.cputm_start = 0;
3696
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3697
}
3698
3699
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3700
static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3701
{
3702
WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3703
vcpu->arch.cputm_enabled = true;
3704
__start_cpu_timer_accounting(vcpu);
3705
}
3706
3707
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3708
static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3709
{
3710
WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3711
__stop_cpu_timer_accounting(vcpu);
3712
vcpu->arch.cputm_enabled = false;
3713
}
3714
3715
static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3716
{
3717
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3718
__enable_cpu_timer_accounting(vcpu);
3719
preempt_enable();
3720
}
3721
3722
static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3723
{
3724
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3725
__disable_cpu_timer_accounting(vcpu);
3726
preempt_enable();
3727
}
3728
3729
/* set the cpu timer - may only be called from the VCPU thread itself */
3730
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3731
{
3732
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3733
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3734
if (vcpu->arch.cputm_enabled)
3735
vcpu->arch.cputm_start = get_tod_clock_fast();
3736
vcpu->arch.sie_block->cputm = cputm;
3737
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3738
preempt_enable();
3739
}
3740
3741
/* update and get the cpu timer - can also be called from other VCPU threads */
3742
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3743
{
3744
unsigned int seq;
3745
__u64 value;
3746
3747
if (unlikely(!vcpu->arch.cputm_enabled))
3748
return vcpu->arch.sie_block->cputm;
3749
3750
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3751
do {
3752
seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3753
/*
3754
* If the writer would ever execute a read in the critical
3755
* section, e.g. in irq context, we have a deadlock.
3756
*/
3757
WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3758
value = vcpu->arch.sie_block->cputm;
3759
/* if cputm_start is 0, accounting is being started/stopped */
3760
if (likely(vcpu->arch.cputm_start))
3761
value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3762
} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3763
preempt_enable();
3764
return value;
3765
}
3766
3767
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3768
{
3769
3770
kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3771
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3772
__start_cpu_timer_accounting(vcpu);
3773
vcpu->cpu = cpu;
3774
}
3775
3776
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3777
{
3778
vcpu->cpu = -1;
3779
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3780
__stop_cpu_timer_accounting(vcpu);
3781
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3782
3783
}
3784
3785
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3786
{
3787
mutex_lock(&vcpu->kvm->lock);
3788
preempt_disable();
3789
vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3790
vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3791
preempt_enable();
3792
mutex_unlock(&vcpu->kvm->lock);
3793
if (!kvm_is_ucontrol(vcpu->kvm)) {
3794
vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3795
sca_add_vcpu(vcpu);
3796
}
3797
if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3798
vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3799
}
3800
3801
static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3802
{
3803
if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3804
test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3805
return true;
3806
return false;
3807
}
3808
3809
static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3810
{
3811
/* At least one ECC subfunction must be present */
3812
return kvm_has_pckmo_subfunc(kvm, 32) ||
3813
kvm_has_pckmo_subfunc(kvm, 33) ||
3814
kvm_has_pckmo_subfunc(kvm, 34) ||
3815
kvm_has_pckmo_subfunc(kvm, 40) ||
3816
kvm_has_pckmo_subfunc(kvm, 41);
3817
3818
}
3819
3820
static bool kvm_has_pckmo_hmac(struct kvm *kvm)
3821
{
3822
/* At least one HMAC subfunction must be present */
3823
return kvm_has_pckmo_subfunc(kvm, 118) ||
3824
kvm_has_pckmo_subfunc(kvm, 122);
3825
}
3826
3827
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3828
{
3829
/*
3830
* If the AP instructions are not being interpreted and the MSAX3
3831
* facility is not configured for the guest, there is nothing to set up.
3832
*/
3833
if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3834
return;
3835
3836
vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3837
vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3838
vcpu->arch.sie_block->eca &= ~ECA_APIE;
3839
vcpu->arch.sie_block->ecd &= ~(ECD_ECC | ECD_HMAC);
3840
3841
if (vcpu->kvm->arch.crypto.apie)
3842
vcpu->arch.sie_block->eca |= ECA_APIE;
3843
3844
/* Set up protected key support */
3845
if (vcpu->kvm->arch.crypto.aes_kw) {
3846
vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3847
/* ecc/hmac is also wrapped with AES key */
3848
if (kvm_has_pckmo_ecc(vcpu->kvm))
3849
vcpu->arch.sie_block->ecd |= ECD_ECC;
3850
if (kvm_has_pckmo_hmac(vcpu->kvm))
3851
vcpu->arch.sie_block->ecd |= ECD_HMAC;
3852
}
3853
3854
if (vcpu->kvm->arch.crypto.dea_kw)
3855
vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3856
}
3857
3858
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3859
{
3860
free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
3861
vcpu->arch.sie_block->cbrlo = 0;
3862
}
3863
3864
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3865
{
3866
void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
3867
3868
if (!cbrlo_page)
3869
return -ENOMEM;
3870
3871
vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
3872
return 0;
3873
}
3874
3875
static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3876
{
3877
struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3878
3879
vcpu->arch.sie_block->ibc = model->ibc;
3880
if (test_kvm_facility(vcpu->kvm, 7))
3881
vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
3882
}
3883
3884
static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3885
{
3886
int rc = 0;
3887
u16 uvrc, uvrrc;
3888
3889
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3890
CPUSTAT_SM |
3891
CPUSTAT_STOPPED);
3892
3893
if (test_kvm_facility(vcpu->kvm, 78))
3894
kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3895
else if (test_kvm_facility(vcpu->kvm, 8))
3896
kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3897
3898
kvm_s390_vcpu_setup_model(vcpu);
3899
3900
/* pgste_set_pte has special handling for !machine_has_esop() */
3901
if (machine_has_esop())
3902
vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3903
if (test_kvm_facility(vcpu->kvm, 9))
3904
vcpu->arch.sie_block->ecb |= ECB_SRSI;
3905
if (test_kvm_facility(vcpu->kvm, 11))
3906
vcpu->arch.sie_block->ecb |= ECB_PTF;
3907
if (test_kvm_facility(vcpu->kvm, 73))
3908
vcpu->arch.sie_block->ecb |= ECB_TE;
3909
if (!kvm_is_ucontrol(vcpu->kvm))
3910
vcpu->arch.sie_block->ecb |= ECB_SPECI;
3911
3912
if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3913
vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3914
if (test_kvm_facility(vcpu->kvm, 130))
3915
vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3916
vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3917
if (sclp.has_cei)
3918
vcpu->arch.sie_block->eca |= ECA_CEI;
3919
if (sclp.has_ib)
3920
vcpu->arch.sie_block->eca |= ECA_IB;
3921
if (sclp.has_siif)
3922
vcpu->arch.sie_block->eca |= ECA_SII;
3923
if (sclp.has_sigpif)
3924
vcpu->arch.sie_block->eca |= ECA_SIGPI;
3925
if (test_kvm_facility(vcpu->kvm, 129)) {
3926
vcpu->arch.sie_block->eca |= ECA_VX;
3927
vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3928
}
3929
if (test_kvm_facility(vcpu->kvm, 139))
3930
vcpu->arch.sie_block->ecd |= ECD_MEF;
3931
if (test_kvm_facility(vcpu->kvm, 156))
3932
vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3933
if (vcpu->arch.sie_block->gd) {
3934
vcpu->arch.sie_block->eca |= ECA_AIV;
3935
VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3936
vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3937
}
3938
vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
3939
vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
3940
3941
if (sclp.has_kss)
3942
kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3943
else
3944
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3945
3946
if (vcpu->kvm->arch.use_cmma) {
3947
rc = kvm_s390_vcpu_setup_cmma(vcpu);
3948
if (rc)
3949
return rc;
3950
}
3951
hrtimer_setup(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, CLOCK_MONOTONIC,
3952
HRTIMER_MODE_REL);
3953
3954
vcpu->arch.sie_block->hpid = HPID_KVM;
3955
3956
kvm_s390_vcpu_crypto_setup(vcpu);
3957
3958
kvm_s390_vcpu_pci_setup(vcpu);
3959
3960
mutex_lock(&vcpu->kvm->lock);
3961
if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3962
rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3963
if (rc)
3964
kvm_s390_vcpu_unsetup_cmma(vcpu);
3965
}
3966
mutex_unlock(&vcpu->kvm->lock);
3967
3968
return rc;
3969
}
3970
3971
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3972
{
3973
if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3974
return -EINVAL;
3975
return 0;
3976
}
3977
3978
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3979
{
3980
struct sie_page *sie_page;
3981
int rc;
3982
3983
BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3984
sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3985
if (!sie_page)
3986
return -ENOMEM;
3987
3988
vcpu->arch.sie_block = &sie_page->sie_block;
3989
vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
3990
3991
/* the real guest size will always be smaller than msl */
3992
vcpu->arch.sie_block->mso = 0;
3993
vcpu->arch.sie_block->msl = sclp.hamax;
3994
3995
vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3996
spin_lock_init(&vcpu->arch.local_int.lock);
3997
vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3998
seqcount_init(&vcpu->arch.cputm_seqcount);
3999
4000
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4001
kvm_clear_async_pf_completion_queue(vcpu);
4002
vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
4003
KVM_SYNC_GPRS |
4004
KVM_SYNC_ACRS |
4005
KVM_SYNC_CRS |
4006
KVM_SYNC_ARCH0 |
4007
KVM_SYNC_PFAULT |
4008
KVM_SYNC_DIAG318;
4009
vcpu->arch.acrs_loaded = false;
4010
kvm_s390_set_prefix(vcpu, 0);
4011
if (test_kvm_facility(vcpu->kvm, 64))
4012
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
4013
if (test_kvm_facility(vcpu->kvm, 82))
4014
vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
4015
if (test_kvm_facility(vcpu->kvm, 133))
4016
vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
4017
if (test_kvm_facility(vcpu->kvm, 156))
4018
vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
4019
/* fprs can be synchronized via vrs, even if the guest has no vx. With
4020
* cpu_has_vx(), (load|store)_fpu_regs() will work with vrs format.
4021
*/
4022
if (cpu_has_vx())
4023
vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
4024
else
4025
vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
4026
4027
if (kvm_is_ucontrol(vcpu->kvm)) {
4028
rc = __kvm_ucontrol_vcpu_init(vcpu);
4029
if (rc)
4030
goto out_free_sie_block;
4031
}
4032
4033
VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p",
4034
vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
4035
trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
4036
4037
rc = kvm_s390_vcpu_setup(vcpu);
4038
if (rc)
4039
goto out_ucontrol_uninit;
4040
4041
kvm_s390_update_topology_change_report(vcpu->kvm, 1);
4042
return 0;
4043
4044
out_ucontrol_uninit:
4045
if (kvm_is_ucontrol(vcpu->kvm))
4046
gmap_remove(vcpu->arch.gmap);
4047
out_free_sie_block:
4048
free_page((unsigned long)(vcpu->arch.sie_block));
4049
return rc;
4050
}
4051
4052
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
4053
{
4054
clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4055
return kvm_s390_vcpu_has_irq(vcpu, 0);
4056
}
4057
4058
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
4059
{
4060
return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
4061
}
4062
4063
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
4064
{
4065
atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
4066
exit_sie(vcpu);
4067
}
4068
4069
void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
4070
{
4071
atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
4072
}
4073
4074
static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
4075
{
4076
atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
4077
exit_sie(vcpu);
4078
}
4079
4080
bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
4081
{
4082
return atomic_read(&vcpu->arch.sie_block->prog20) &
4083
(PROG_BLOCK_SIE | PROG_REQUEST);
4084
}
4085
4086
static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
4087
{
4088
atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
4089
}
4090
4091
/*
4092
* Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
4093
* If the CPU is not running (e.g. waiting as idle) the function will
4094
* return immediately. */
4095
void exit_sie(struct kvm_vcpu *vcpu)
4096
{
4097
kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
4098
kvm_s390_vsie_kick(vcpu);
4099
while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
4100
cpu_relax();
4101
}
4102
4103
/* Kick a guest cpu out of SIE to process a request synchronously */
4104
void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
4105
{
4106
__kvm_make_request(req, vcpu);
4107
kvm_s390_vcpu_request(vcpu);
4108
}
4109
4110
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
4111
unsigned long end)
4112
{
4113
struct kvm *kvm = gmap->private;
4114
struct kvm_vcpu *vcpu;
4115
unsigned long prefix;
4116
unsigned long i;
4117
4118
trace_kvm_s390_gmap_notifier(start, end, gmap_is_shadow(gmap));
4119
4120
if (gmap_is_shadow(gmap))
4121
return;
4122
if (start >= 1UL << 31)
4123
/* We are only interested in prefix pages */
4124
return;
4125
kvm_for_each_vcpu(i, vcpu, kvm) {
4126
/* match against both prefix pages */
4127
prefix = kvm_s390_get_prefix(vcpu);
4128
if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
4129
VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
4130
start, end);
4131
kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4132
}
4133
}
4134
}
4135
4136
bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
4137
{
4138
/* do not poll with more than halt_poll_max_steal percent of steal time */
4139
if (get_lowcore()->avg_steal_timer * 100 / (TICK_USEC << 12) >=
4140
READ_ONCE(halt_poll_max_steal)) {
4141
vcpu->stat.halt_no_poll_steal++;
4142
return true;
4143
}
4144
return false;
4145
}
4146
4147
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
4148
{
4149
/* kvm common code refers to this, but never calls it */
4150
BUG();
4151
return 0;
4152
}
4153
4154
static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
4155
struct kvm_one_reg *reg)
4156
{
4157
int r = -EINVAL;
4158
4159
switch (reg->id) {
4160
case KVM_REG_S390_TODPR:
4161
r = put_user(vcpu->arch.sie_block->todpr,
4162
(u32 __user *)reg->addr);
4163
break;
4164
case KVM_REG_S390_EPOCHDIFF:
4165
r = put_user(vcpu->arch.sie_block->epoch,
4166
(u64 __user *)reg->addr);
4167
break;
4168
case KVM_REG_S390_CPU_TIMER:
4169
r = put_user(kvm_s390_get_cpu_timer(vcpu),
4170
(u64 __user *)reg->addr);
4171
break;
4172
case KVM_REG_S390_CLOCK_COMP:
4173
r = put_user(vcpu->arch.sie_block->ckc,
4174
(u64 __user *)reg->addr);
4175
break;
4176
case KVM_REG_S390_PFTOKEN:
4177
r = put_user(vcpu->arch.pfault_token,
4178
(u64 __user *)reg->addr);
4179
break;
4180
case KVM_REG_S390_PFCOMPARE:
4181
r = put_user(vcpu->arch.pfault_compare,
4182
(u64 __user *)reg->addr);
4183
break;
4184
case KVM_REG_S390_PFSELECT:
4185
r = put_user(vcpu->arch.pfault_select,
4186
(u64 __user *)reg->addr);
4187
break;
4188
case KVM_REG_S390_PP:
4189
r = put_user(vcpu->arch.sie_block->pp,
4190
(u64 __user *)reg->addr);
4191
break;
4192
case KVM_REG_S390_GBEA:
4193
r = put_user(vcpu->arch.sie_block->gbea,
4194
(u64 __user *)reg->addr);
4195
break;
4196
default:
4197
break;
4198
}
4199
4200
return r;
4201
}
4202
4203
static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
4204
struct kvm_one_reg *reg)
4205
{
4206
int r = -EINVAL;
4207
__u64 val;
4208
4209
switch (reg->id) {
4210
case KVM_REG_S390_TODPR:
4211
r = get_user(vcpu->arch.sie_block->todpr,
4212
(u32 __user *)reg->addr);
4213
break;
4214
case KVM_REG_S390_EPOCHDIFF:
4215
r = get_user(vcpu->arch.sie_block->epoch,
4216
(u64 __user *)reg->addr);
4217
break;
4218
case KVM_REG_S390_CPU_TIMER:
4219
r = get_user(val, (u64 __user *)reg->addr);
4220
if (!r)
4221
kvm_s390_set_cpu_timer(vcpu, val);
4222
break;
4223
case KVM_REG_S390_CLOCK_COMP:
4224
r = get_user(vcpu->arch.sie_block->ckc,
4225
(u64 __user *)reg->addr);
4226
break;
4227
case KVM_REG_S390_PFTOKEN:
4228
r = get_user(vcpu->arch.pfault_token,
4229
(u64 __user *)reg->addr);
4230
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4231
kvm_clear_async_pf_completion_queue(vcpu);
4232
break;
4233
case KVM_REG_S390_PFCOMPARE:
4234
r = get_user(vcpu->arch.pfault_compare,
4235
(u64 __user *)reg->addr);
4236
break;
4237
case KVM_REG_S390_PFSELECT:
4238
r = get_user(vcpu->arch.pfault_select,
4239
(u64 __user *)reg->addr);
4240
break;
4241
case KVM_REG_S390_PP:
4242
r = get_user(vcpu->arch.sie_block->pp,
4243
(u64 __user *)reg->addr);
4244
break;
4245
case KVM_REG_S390_GBEA:
4246
r = get_user(vcpu->arch.sie_block->gbea,
4247
(u64 __user *)reg->addr);
4248
break;
4249
default:
4250
break;
4251
}
4252
4253
return r;
4254
}
4255
4256
static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
4257
{
4258
vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
4259
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4260
memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
4261
4262
kvm_clear_async_pf_completion_queue(vcpu);
4263
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
4264
kvm_s390_vcpu_stop(vcpu);
4265
kvm_s390_clear_local_irqs(vcpu);
4266
}
4267
4268
static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
4269
{
4270
/* Initial reset is a superset of the normal reset */
4271
kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4272
4273
/*
4274
* This equals initial cpu reset in pop, but we don't switch to ESA.
4275
* We do not only reset the internal data, but also ...
4276
*/
4277
vcpu->arch.sie_block->gpsw.mask = 0;
4278
vcpu->arch.sie_block->gpsw.addr = 0;
4279
kvm_s390_set_prefix(vcpu, 0);
4280
kvm_s390_set_cpu_timer(vcpu, 0);
4281
vcpu->arch.sie_block->ckc = 0;
4282
memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
4283
vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
4284
vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
4285
4286
/* ... the data in sync regs */
4287
memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
4288
vcpu->run->s.regs.ckc = 0;
4289
vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
4290
vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
4291
vcpu->run->psw_addr = 0;
4292
vcpu->run->psw_mask = 0;
4293
vcpu->run->s.regs.todpr = 0;
4294
vcpu->run->s.regs.cputm = 0;
4295
vcpu->run->s.regs.ckc = 0;
4296
vcpu->run->s.regs.pp = 0;
4297
vcpu->run->s.regs.gbea = 1;
4298
vcpu->run->s.regs.fpc = 0;
4299
/*
4300
* Do not reset these registers in the protected case, as some of
4301
* them are overlaid and they are not accessible in this case
4302
* anyway.
4303
*/
4304
if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4305
vcpu->arch.sie_block->gbea = 1;
4306
vcpu->arch.sie_block->pp = 0;
4307
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4308
vcpu->arch.sie_block->todpr = 0;
4309
}
4310
}
4311
4312
static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
4313
{
4314
struct kvm_sync_regs *regs = &vcpu->run->s.regs;
4315
4316
/* Clear reset is a superset of the initial reset */
4317
kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4318
4319
memset(&regs->gprs, 0, sizeof(regs->gprs));
4320
memset(&regs->vrs, 0, sizeof(regs->vrs));
4321
memset(&regs->acrs, 0, sizeof(regs->acrs));
4322
memset(&regs->gscb, 0, sizeof(regs->gscb));
4323
4324
regs->etoken = 0;
4325
regs->etoken_extension = 0;
4326
}
4327
4328
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4329
{
4330
vcpu_load(vcpu);
4331
memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
4332
vcpu_put(vcpu);
4333
return 0;
4334
}
4335
4336
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4337
{
4338
vcpu_load(vcpu);
4339
memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
4340
vcpu_put(vcpu);
4341
return 0;
4342
}
4343
4344
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4345
struct kvm_sregs *sregs)
4346
{
4347
vcpu_load(vcpu);
4348
4349
memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
4350
memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
4351
4352
vcpu_put(vcpu);
4353
return 0;
4354
}
4355
4356
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4357
struct kvm_sregs *sregs)
4358
{
4359
vcpu_load(vcpu);
4360
4361
memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
4362
memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
4363
4364
vcpu_put(vcpu);
4365
return 0;
4366
}
4367
4368
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4369
{
4370
int ret = 0;
4371
4372
vcpu_load(vcpu);
4373
4374
vcpu->run->s.regs.fpc = fpu->fpc;
4375
if (cpu_has_vx())
4376
convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
4377
(freg_t *) fpu->fprs);
4378
else
4379
memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
4380
4381
vcpu_put(vcpu);
4382
return ret;
4383
}
4384
4385
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4386
{
4387
vcpu_load(vcpu);
4388
4389
if (cpu_has_vx())
4390
convert_vx_to_fp((freg_t *) fpu->fprs,
4391
(__vector128 *) vcpu->run->s.regs.vrs);
4392
else
4393
memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
4394
fpu->fpc = vcpu->run->s.regs.fpc;
4395
4396
vcpu_put(vcpu);
4397
return 0;
4398
}
4399
4400
static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
4401
{
4402
int rc = 0;
4403
4404
if (!is_vcpu_stopped(vcpu))
4405
rc = -EBUSY;
4406
else {
4407
vcpu->run->psw_mask = psw.mask;
4408
vcpu->run->psw_addr = psw.addr;
4409
}
4410
return rc;
4411
}
4412
4413
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4414
struct kvm_translation *tr)
4415
{
4416
return -EINVAL; /* not implemented yet */
4417
}
4418
4419
#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
4420
KVM_GUESTDBG_USE_HW_BP | \
4421
KVM_GUESTDBG_ENABLE)
4422
4423
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4424
struct kvm_guest_debug *dbg)
4425
{
4426
int rc = 0;
4427
4428
vcpu_load(vcpu);
4429
4430
vcpu->guest_debug = 0;
4431
kvm_s390_clear_bp_data(vcpu);
4432
4433
if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
4434
rc = -EINVAL;
4435
goto out;
4436
}
4437
if (!sclp.has_gpere) {
4438
rc = -EINVAL;
4439
goto out;
4440
}
4441
4442
if (dbg->control & KVM_GUESTDBG_ENABLE) {
4443
vcpu->guest_debug = dbg->control;
4444
/* enforce guest PER */
4445
kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
4446
4447
if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
4448
rc = kvm_s390_import_bp_data(vcpu, dbg);
4449
} else {
4450
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4451
vcpu->arch.guestdbg.last_bp = 0;
4452
}
4453
4454
if (rc) {
4455
vcpu->guest_debug = 0;
4456
kvm_s390_clear_bp_data(vcpu);
4457
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4458
}
4459
4460
out:
4461
vcpu_put(vcpu);
4462
return rc;
4463
}
4464
4465
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4466
struct kvm_mp_state *mp_state)
4467
{
4468
int ret;
4469
4470
vcpu_load(vcpu);
4471
4472
/* CHECK_STOP and LOAD are not supported yet */
4473
ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
4474
KVM_MP_STATE_OPERATING;
4475
4476
vcpu_put(vcpu);
4477
return ret;
4478
}
4479
4480
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4481
struct kvm_mp_state *mp_state)
4482
{
4483
int rc = 0;
4484
4485
vcpu_load(vcpu);
4486
4487
/* user space knows about this interface - let it control the state */
4488
kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
4489
4490
switch (mp_state->mp_state) {
4491
case KVM_MP_STATE_STOPPED:
4492
rc = kvm_s390_vcpu_stop(vcpu);
4493
break;
4494
case KVM_MP_STATE_OPERATING:
4495
rc = kvm_s390_vcpu_start(vcpu);
4496
break;
4497
case KVM_MP_STATE_LOAD:
4498
if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4499
rc = -ENXIO;
4500
break;
4501
}
4502
rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
4503
break;
4504
case KVM_MP_STATE_CHECK_STOP:
4505
fallthrough; /* CHECK_STOP and LOAD are not supported yet */
4506
default:
4507
rc = -ENXIO;
4508
}
4509
4510
vcpu_put(vcpu);
4511
return rc;
4512
}
4513
4514
static bool ibs_enabled(struct kvm_vcpu *vcpu)
4515
{
4516
return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
4517
}
4518
4519
static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags)
4520
{
4521
struct kvm *kvm = gmap->private;
4522
gfn_t gfn = gpa_to_gfn(gaddr);
4523
bool unlocked;
4524
hva_t vmaddr;
4525
gpa_t tmp;
4526
int rc;
4527
4528
if (kvm_is_ucontrol(kvm)) {
4529
tmp = __gmap_translate(gmap, gaddr);
4530
gfn = gpa_to_gfn(tmp);
4531
}
4532
4533
vmaddr = gfn_to_hva(kvm, gfn);
4534
rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
4535
if (!rc)
4536
rc = __gmap_link(gmap, gaddr, vmaddr);
4537
return rc;
4538
}
4539
4540
/**
4541
* __kvm_s390_mprotect_many() - Apply specified protection to guest pages
4542
* @gmap: the gmap of the guest
4543
* @gpa: the starting guest address
4544
* @npages: how many pages to protect
4545
* @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
4546
* @bits: pgste notification bits to set
4547
*
4548
* Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one()
4549
*
4550
* Context: kvm->srcu and gmap->mm need to be held in read mode
4551
*/
4552
int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
4553
unsigned long bits)
4554
{
4555
unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
4556
gpa_t end = gpa + npages * PAGE_SIZE;
4557
int rc;
4558
4559
for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) {
4560
rc = gmap_protect_one(gmap, gpa, prot, bits);
4561
if (rc == -EAGAIN) {
4562
__kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag);
4563
rc = gmap_protect_one(gmap, gpa, prot, bits);
4564
}
4565
if (rc < 0)
4566
return rc;
4567
}
4568
4569
return 0;
4570
}
4571
4572
static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu)
4573
{
4574
gpa_t gaddr = kvm_s390_get_prefix(vcpu);
4575
int idx, rc;
4576
4577
idx = srcu_read_lock(&vcpu->kvm->srcu);
4578
mmap_read_lock(vcpu->arch.gmap->mm);
4579
4580
rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT);
4581
4582
mmap_read_unlock(vcpu->arch.gmap->mm);
4583
srcu_read_unlock(&vcpu->kvm->srcu, idx);
4584
4585
return rc;
4586
}
4587
4588
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
4589
{
4590
retry:
4591
kvm_s390_vcpu_request_handled(vcpu);
4592
if (!kvm_request_pending(vcpu))
4593
return 0;
4594
/*
4595
* If the guest prefix changed, re-arm the ipte notifier for the
4596
* guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
4597
* This ensures that the ipte instruction for this request has
4598
* already finished. We might race against a second unmapper that
4599
* wants to set the blocking bit. Lets just retry the request loop.
4600
*/
4601
if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
4602
int rc;
4603
4604
rc = kvm_s390_mprotect_notify_prefix(vcpu);
4605
if (rc) {
4606
kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4607
return rc;
4608
}
4609
goto retry;
4610
}
4611
4612
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
4613
vcpu->arch.sie_block->ihcpu = 0xffff;
4614
goto retry;
4615
}
4616
4617
if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
4618
if (!ibs_enabled(vcpu)) {
4619
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
4620
kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
4621
}
4622
goto retry;
4623
}
4624
4625
if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
4626
if (ibs_enabled(vcpu)) {
4627
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
4628
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
4629
}
4630
goto retry;
4631
}
4632
4633
if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
4634
vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
4635
goto retry;
4636
}
4637
4638
if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
4639
/*
4640
* Disable CMM virtualization; we will emulate the ESSA
4641
* instruction manually, in order to provide additional
4642
* functionalities needed for live migration.
4643
*/
4644
vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
4645
goto retry;
4646
}
4647
4648
if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
4649
/*
4650
* Re-enable CMM virtualization if CMMA is available and
4651
* CMM has been used.
4652
*/
4653
if ((vcpu->kvm->arch.use_cmma) &&
4654
(vcpu->kvm->mm->context.uses_cmm))
4655
vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
4656
goto retry;
4657
}
4658
4659
/* we left the vsie handler, nothing to do, just clear the request */
4660
kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4661
4662
return 0;
4663
}
4664
4665
static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4666
{
4667
struct kvm_vcpu *vcpu;
4668
union tod_clock clk;
4669
unsigned long i;
4670
4671
preempt_disable();
4672
4673
store_tod_clock_ext(&clk);
4674
4675
kvm->arch.epoch = gtod->tod - clk.tod;
4676
kvm->arch.epdx = 0;
4677
if (test_kvm_facility(kvm, 139)) {
4678
kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4679
if (kvm->arch.epoch > gtod->tod)
4680
kvm->arch.epdx -= 1;
4681
}
4682
4683
kvm_s390_vcpu_block_all(kvm);
4684
kvm_for_each_vcpu(i, vcpu, kvm) {
4685
vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4686
vcpu->arch.sie_block->epdx = kvm->arch.epdx;
4687
}
4688
4689
kvm_s390_vcpu_unblock_all(kvm);
4690
preempt_enable();
4691
}
4692
4693
int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4694
{
4695
if (!mutex_trylock(&kvm->lock))
4696
return 0;
4697
__kvm_s390_set_tod_clock(kvm, gtod);
4698
mutex_unlock(&kvm->lock);
4699
return 1;
4700
}
4701
4702
static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4703
unsigned long token)
4704
{
4705
struct kvm_s390_interrupt inti;
4706
struct kvm_s390_irq irq;
4707
4708
if (start_token) {
4709
irq.u.ext.ext_params2 = token;
4710
irq.type = KVM_S390_INT_PFAULT_INIT;
4711
WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4712
} else {
4713
inti.type = KVM_S390_INT_PFAULT_DONE;
4714
inti.parm64 = token;
4715
WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4716
}
4717
}
4718
4719
bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4720
struct kvm_async_pf *work)
4721
{
4722
trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4723
__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4724
4725
return true;
4726
}
4727
4728
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4729
struct kvm_async_pf *work)
4730
{
4731
trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4732
__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4733
}
4734
4735
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4736
struct kvm_async_pf *work)
4737
{
4738
/* s390 will always inject the page directly */
4739
}
4740
4741
bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4742
{
4743
/*
4744
* s390 will always inject the page directly,
4745
* but we still want check_async_completion to cleanup
4746
*/
4747
return true;
4748
}
4749
4750
static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4751
{
4752
hva_t hva;
4753
struct kvm_arch_async_pf arch;
4754
4755
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4756
return false;
4757
if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4758
vcpu->arch.pfault_compare)
4759
return false;
4760
if (psw_extint_disabled(vcpu))
4761
return false;
4762
if (kvm_s390_vcpu_has_irq(vcpu, 0))
4763
return false;
4764
if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4765
return false;
4766
if (!vcpu->arch.gmap->pfault_enabled)
4767
return false;
4768
4769
hva = gfn_to_hva(vcpu->kvm, current->thread.gmap_teid.addr);
4770
if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4771
return false;
4772
4773
return kvm_setup_async_pf(vcpu, current->thread.gmap_teid.addr * PAGE_SIZE, hva, &arch);
4774
}
4775
4776
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4777
{
4778
int rc, cpuflags;
4779
4780
/*
4781
* On s390 notifications for arriving pages will be delivered directly
4782
* to the guest but the house keeping for completed pfaults is
4783
* handled outside the worker.
4784
*/
4785
kvm_check_async_pf_completion(vcpu);
4786
4787
vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4788
vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4789
4790
if (need_resched())
4791
schedule();
4792
4793
if (!kvm_is_ucontrol(vcpu->kvm)) {
4794
rc = kvm_s390_deliver_pending_interrupts(vcpu);
4795
if (rc || guestdbg_exit_pending(vcpu))
4796
return rc;
4797
}
4798
4799
rc = kvm_s390_handle_requests(vcpu);
4800
if (rc)
4801
return rc;
4802
4803
if (guestdbg_enabled(vcpu)) {
4804
kvm_s390_backup_guest_per_regs(vcpu);
4805
kvm_s390_patch_guest_per_regs(vcpu);
4806
}
4807
4808
clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4809
4810
vcpu->arch.sie_block->icptcode = 0;
4811
current->thread.gmap_int_code = 0;
4812
cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4813
VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4814
trace_kvm_s390_sie_enter(vcpu, cpuflags);
4815
4816
return 0;
4817
}
4818
4819
static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
4820
{
4821
struct kvm_s390_pgm_info pgm_info = {
4822
.code = PGM_ADDRESSING,
4823
};
4824
u8 opcode, ilen;
4825
int rc;
4826
4827
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4828
trace_kvm_s390_sie_fault(vcpu);
4829
4830
/*
4831
* We want to inject an addressing exception, which is defined as a
4832
* suppressing or terminating exception. However, since we came here
4833
* by a DAT access exception, the PSW still points to the faulting
4834
* instruction since DAT exceptions are nullifying. So we've got
4835
* to look up the current opcode to get the length of the instruction
4836
* to be able to forward the PSW.
4837
*/
4838
rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4839
ilen = insn_length(opcode);
4840
if (rc < 0) {
4841
return rc;
4842
} else if (rc) {
4843
/* Instruction-Fetching Exceptions - we can't detect the ilen.
4844
* Forward by arbitrary ilc, injection will take care of
4845
* nullification if necessary.
4846
*/
4847
pgm_info = vcpu->arch.pgm;
4848
ilen = 4;
4849
}
4850
pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4851
kvm_s390_forward_psw(vcpu, ilen);
4852
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4853
}
4854
4855
static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu)
4856
{
4857
KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
4858
"Unexpected program interrupt 0x%x, TEID 0x%016lx",
4859
current->thread.gmap_int_code, current->thread.gmap_teid.val);
4860
}
4861
4862
/*
4863
* __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu
4864
* @vcpu: the vCPU whose gmap is to be fixed up
4865
* @gfn: the guest frame number used for memslots (including fake memslots)
4866
* @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps
4867
* @flags: FOLL_* flags
4868
*
4869
* Return: 0 on success, < 0 in case of error.
4870
* Context: The mm lock must not be held before calling. May sleep.
4871
*/
4872
int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags)
4873
{
4874
struct kvm_memory_slot *slot;
4875
unsigned int fault_flags;
4876
bool writable, unlocked;
4877
unsigned long vmaddr;
4878
struct page *page;
4879
kvm_pfn_t pfn;
4880
int rc;
4881
4882
slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
4883
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
4884
return vcpu_post_run_addressing_exception(vcpu);
4885
4886
fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
4887
if (vcpu->arch.gmap->pfault_enabled)
4888
flags |= FOLL_NOWAIT;
4889
vmaddr = __gfn_to_hva_memslot(slot, gfn);
4890
4891
try_again:
4892
pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page);
4893
4894
/* Access outside memory, inject addressing exception */
4895
if (is_noslot_pfn(pfn))
4896
return vcpu_post_run_addressing_exception(vcpu);
4897
/* Signal pending: try again */
4898
if (pfn == KVM_PFN_ERR_SIGPENDING)
4899
return -EAGAIN;
4900
4901
/* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */
4902
if (pfn == KVM_PFN_ERR_NEEDS_IO) {
4903
trace_kvm_s390_major_guest_pfault(vcpu);
4904
if (kvm_arch_setup_async_pf(vcpu))
4905
return 0;
4906
vcpu->stat.pfault_sync++;
4907
/* Could not setup async pfault, try again synchronously */
4908
flags &= ~FOLL_NOWAIT;
4909
goto try_again;
4910
}
4911
/* Any other error */
4912
if (is_error_pfn(pfn))
4913
return -EFAULT;
4914
4915
/* Success */
4916
mmap_read_lock(vcpu->arch.gmap->mm);
4917
/* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */
4918
rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked);
4919
if (!rc)
4920
rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr);
4921
scoped_guard(spinlock, &vcpu->kvm->mmu_lock) {
4922
kvm_release_faultin_page(vcpu->kvm, page, false, writable);
4923
}
4924
mmap_read_unlock(vcpu->arch.gmap->mm);
4925
return rc;
4926
}
4927
4928
static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags)
4929
{
4930
unsigned long gaddr_tmp;
4931
gfn_t gfn;
4932
4933
gfn = gpa_to_gfn(gaddr);
4934
if (kvm_is_ucontrol(vcpu->kvm)) {
4935
/*
4936
* This translates the per-vCPU guest address into a
4937
* fake guest address, which can then be used with the
4938
* fake memslots that are identity mapping userspace.
4939
* This allows ucontrol VMs to use the normal fault
4940
* resolution path, like normal VMs.
4941
*/
4942
mmap_read_lock(vcpu->arch.gmap->mm);
4943
gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr);
4944
mmap_read_unlock(vcpu->arch.gmap->mm);
4945
if (gaddr_tmp == -EFAULT) {
4946
vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4947
vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
4948
vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION;
4949
return -EREMOTE;
4950
}
4951
gfn = gpa_to_gfn(gaddr_tmp);
4952
}
4953
return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags);
4954
}
4955
4956
static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
4957
{
4958
unsigned int flags = 0;
4959
unsigned long gaddr;
4960
int rc;
4961
4962
gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
4963
if (kvm_s390_cur_gmap_fault_is_write())
4964
flags = FAULT_FLAG_WRITE;
4965
4966
switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) {
4967
case 0:
4968
vcpu->stat.exit_null++;
4969
break;
4970
case PGM_SECURE_STORAGE_ACCESS:
4971
case PGM_SECURE_STORAGE_VIOLATION:
4972
kvm_s390_assert_primary_as(vcpu);
4973
/*
4974
* This can happen after a reboot with asynchronous teardown;
4975
* the new guest (normal or protected) will run on top of the
4976
* previous protected guest. The old pages need to be destroyed
4977
* so the new guest can use them.
4978
*/
4979
if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) {
4980
/*
4981
* Either KVM messed up the secure guest mapping or the
4982
* same page is mapped into multiple secure guests.
4983
*
4984
* This exception is only triggered when a guest 2 is
4985
* running and can therefore never occur in kernel
4986
* context.
4987
*/
4988
pr_warn_ratelimited("Secure storage violation (%x) in task: %s, pid %d\n",
4989
current->thread.gmap_int_code, current->comm,
4990
current->pid);
4991
send_sig(SIGSEGV, current, 0);
4992
}
4993
break;
4994
case PGM_NON_SECURE_STORAGE_ACCESS:
4995
kvm_s390_assert_primary_as(vcpu);
4996
/*
4997
* This is normal operation; a page belonging to a protected
4998
* guest has not been imported yet. Try to import the page into
4999
* the protected guest.
5000
*/
5001
rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr);
5002
if (rc == -EINVAL)
5003
send_sig(SIGSEGV, current, 0);
5004
if (rc != -ENXIO)
5005
break;
5006
flags = FAULT_FLAG_WRITE;
5007
fallthrough;
5008
case PGM_PROTECTION:
5009
case PGM_SEGMENT_TRANSLATION:
5010
case PGM_PAGE_TRANSLATION:
5011
case PGM_ASCE_TYPE:
5012
case PGM_REGION_FIRST_TRANS:
5013
case PGM_REGION_SECOND_TRANS:
5014
case PGM_REGION_THIRD_TRANS:
5015
kvm_s390_assert_primary_as(vcpu);
5016
return vcpu_dat_fault_handler(vcpu, gaddr, flags);
5017
default:
5018
KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
5019
current->thread.gmap_int_code, current->thread.gmap_teid.val);
5020
send_sig(SIGSEGV, current, 0);
5021
break;
5022
}
5023
return 0;
5024
}
5025
5026
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
5027
{
5028
struct mcck_volatile_info *mcck_info;
5029
struct sie_page *sie_page;
5030
int rc;
5031
5032
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
5033
vcpu->arch.sie_block->icptcode);
5034
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
5035
5036
if (guestdbg_enabled(vcpu))
5037
kvm_s390_restore_guest_per_regs(vcpu);
5038
5039
vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
5040
vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
5041
5042
if (exit_reason == -EINTR) {
5043
VCPU_EVENT(vcpu, 3, "%s", "machine check");
5044
sie_page = container_of(vcpu->arch.sie_block,
5045
struct sie_page, sie_block);
5046
mcck_info = &sie_page->mcck_info;
5047
kvm_s390_reinject_machine_check(vcpu, mcck_info);
5048
return 0;
5049
}
5050
5051
if (vcpu->arch.sie_block->icptcode > 0) {
5052
rc = kvm_handle_sie_intercept(vcpu);
5053
5054
if (rc != -EOPNOTSUPP)
5055
return rc;
5056
vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
5057
vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
5058
vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
5059
vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
5060
return -EREMOTE;
5061
}
5062
5063
return vcpu_post_run_handle_fault(vcpu);
5064
}
5065
5066
int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
5067
u64 *gprs, unsigned long gasce)
5068
{
5069
int ret;
5070
5071
guest_state_enter_irqoff();
5072
5073
/*
5074
* The guest_state_{enter,exit}_irqoff() functions inform lockdep and
5075
* tracing that entry to the guest will enable host IRQs, and exit from
5076
* the guest will disable host IRQs.
5077
*
5078
* We must not use lockdep/tracing/RCU in this critical section, so we
5079
* use the low-level arch_local_irq_*() helpers to enable/disable IRQs.
5080
*/
5081
arch_local_irq_enable();
5082
ret = sie64a(scb, gprs, gasce);
5083
arch_local_irq_disable();
5084
5085
guest_state_exit_irqoff();
5086
5087
return ret;
5088
}
5089
5090
#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
5091
static int __vcpu_run(struct kvm_vcpu *vcpu)
5092
{
5093
int rc, exit_reason;
5094
struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
5095
5096
/*
5097
* We try to hold kvm->srcu during most of vcpu_run (except when run-
5098
* ning the guest), so that memslots (and other stuff) are protected
5099
*/
5100
kvm_vcpu_srcu_read_lock(vcpu);
5101
5102
do {
5103
rc = vcpu_pre_run(vcpu);
5104
if (rc || guestdbg_exit_pending(vcpu))
5105
break;
5106
5107
kvm_vcpu_srcu_read_unlock(vcpu);
5108
/*
5109
* As PF_VCPU will be used in fault handler, between
5110
* guest_timing_enter_irqoff and guest_timing_exit_irqoff
5111
* should be no uaccess.
5112
*/
5113
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5114
memcpy(sie_page->pv_grregs,
5115
vcpu->run->s.regs.gprs,
5116
sizeof(sie_page->pv_grregs));
5117
}
5118
5119
local_irq_disable();
5120
guest_timing_enter_irqoff();
5121
__disable_cpu_timer_accounting(vcpu);
5122
5123
exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block,
5124
vcpu->run->s.regs.gprs,
5125
vcpu->arch.gmap->asce);
5126
5127
__enable_cpu_timer_accounting(vcpu);
5128
guest_timing_exit_irqoff();
5129
local_irq_enable();
5130
5131
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5132
memcpy(vcpu->run->s.regs.gprs,
5133
sie_page->pv_grregs,
5134
sizeof(sie_page->pv_grregs));
5135
/*
5136
* We're not allowed to inject interrupts on intercepts
5137
* that leave the guest state in an "in-between" state
5138
* where the next SIE entry will do a continuation.
5139
* Fence interrupts in our "internal" PSW.
5140
*/
5141
if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
5142
vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
5143
vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5144
}
5145
}
5146
kvm_vcpu_srcu_read_lock(vcpu);
5147
5148
rc = vcpu_post_run(vcpu, exit_reason);
5149
} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
5150
5151
kvm_vcpu_srcu_read_unlock(vcpu);
5152
return rc;
5153
}
5154
5155
static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
5156
{
5157
struct kvm_run *kvm_run = vcpu->run;
5158
struct runtime_instr_cb *riccb;
5159
struct gs_cb *gscb;
5160
5161
riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
5162
gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
5163
vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
5164
vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
5165
if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
5166
vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
5167
vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
5168
vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
5169
}
5170
if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
5171
vcpu->arch.pfault_token = kvm_run->s.regs.pft;
5172
vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
5173
vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
5174
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
5175
kvm_clear_async_pf_completion_queue(vcpu);
5176
}
5177
if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
5178
vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
5179
vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
5180
VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
5181
}
5182
/*
5183
* If userspace sets the riccb (e.g. after migration) to a valid state,
5184
* we should enable RI here instead of doing the lazy enablement.
5185
*/
5186
if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
5187
test_kvm_facility(vcpu->kvm, 64) &&
5188
riccb->v &&
5189
!(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
5190
VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
5191
vcpu->arch.sie_block->ecb3 |= ECB3_RI;
5192
}
5193
/*
5194
* If userspace sets the gscb (e.g. after migration) to non-zero,
5195
* we should enable GS here instead of doing the lazy enablement.
5196
*/
5197
if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
5198
test_kvm_facility(vcpu->kvm, 133) &&
5199
gscb->gssm &&
5200
!vcpu->arch.gs_enabled) {
5201
VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
5202
vcpu->arch.sie_block->ecb |= ECB_GS;
5203
vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
5204
vcpu->arch.gs_enabled = 1;
5205
}
5206
if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
5207
test_kvm_facility(vcpu->kvm, 82)) {
5208
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
5209
vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
5210
}
5211
if (cpu_has_gs()) {
5212
preempt_disable();
5213
local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
5214
if (current->thread.gs_cb) {
5215
vcpu->arch.host_gscb = current->thread.gs_cb;
5216
save_gs_cb(vcpu->arch.host_gscb);
5217
}
5218
if (vcpu->arch.gs_enabled) {
5219
current->thread.gs_cb = (struct gs_cb *)
5220
&vcpu->run->s.regs.gscb;
5221
restore_gs_cb(current->thread.gs_cb);
5222
}
5223
preempt_enable();
5224
}
5225
/* SIE will load etoken directly from SDNX and therefore kvm_run */
5226
}
5227
5228
static void sync_regs(struct kvm_vcpu *vcpu)
5229
{
5230
struct kvm_run *kvm_run = vcpu->run;
5231
5232
if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
5233
kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
5234
if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
5235
memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
5236
/* some control register changes require a tlb flush */
5237
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5238
}
5239
if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
5240
kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
5241
vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
5242
}
5243
save_access_regs(vcpu->arch.host_acrs);
5244
restore_access_regs(vcpu->run->s.regs.acrs);
5245
vcpu->arch.acrs_loaded = true;
5246
kvm_s390_fpu_load(vcpu->run);
5247
/* Sync fmt2 only data */
5248
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
5249
sync_regs_fmt2(vcpu);
5250
} else {
5251
/*
5252
* In several places we have to modify our internal view to
5253
* not do things that are disallowed by the ultravisor. For
5254
* example we must not inject interrupts after specific exits
5255
* (e.g. 112 prefix page not secure). We do this by turning
5256
* off the machine check, external and I/O interrupt bits
5257
* of our PSW copy. To avoid getting validity intercepts, we
5258
* do only accept the condition code from userspace.
5259
*/
5260
vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
5261
vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
5262
PSW_MASK_CC;
5263
}
5264
5265
kvm_run->kvm_dirty_regs = 0;
5266
}
5267
5268
static void store_regs_fmt2(struct kvm_vcpu *vcpu)
5269
{
5270
struct kvm_run *kvm_run = vcpu->run;
5271
5272
kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
5273
kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
5274
kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
5275
kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
5276
kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
5277
if (cpu_has_gs()) {
5278
preempt_disable();
5279
local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
5280
if (vcpu->arch.gs_enabled)
5281
save_gs_cb(current->thread.gs_cb);
5282
current->thread.gs_cb = vcpu->arch.host_gscb;
5283
restore_gs_cb(vcpu->arch.host_gscb);
5284
if (!vcpu->arch.host_gscb)
5285
local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT);
5286
vcpu->arch.host_gscb = NULL;
5287
preempt_enable();
5288
}
5289
/* SIE will save etoken directly into SDNX and therefore kvm_run */
5290
}
5291
5292
static void store_regs(struct kvm_vcpu *vcpu)
5293
{
5294
struct kvm_run *kvm_run = vcpu->run;
5295
5296
kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
5297
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
5298
kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
5299
memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
5300
kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
5301
kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
5302
kvm_run->s.regs.pft = vcpu->arch.pfault_token;
5303
kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
5304
kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
5305
save_access_regs(vcpu->run->s.regs.acrs);
5306
restore_access_regs(vcpu->arch.host_acrs);
5307
vcpu->arch.acrs_loaded = false;
5308
kvm_s390_fpu_store(vcpu->run);
5309
if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
5310
store_regs_fmt2(vcpu);
5311
}
5312
5313
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
5314
{
5315
struct kvm_run *kvm_run = vcpu->run;
5316
DECLARE_KERNEL_FPU_ONSTACK32(fpu);
5317
int rc;
5318
5319
/*
5320
* Running a VM while dumping always has the potential to
5321
* produce inconsistent dump data. But for PV vcpus a SIE
5322
* entry while dumping could also lead to a fatal validity
5323
* intercept which we absolutely want to avoid.
5324
*/
5325
if (vcpu->kvm->arch.pv.dumping)
5326
return -EINVAL;
5327
5328
if (!vcpu->wants_to_run)
5329
return -EINTR;
5330
5331
if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
5332
kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
5333
return -EINVAL;
5334
5335
vcpu_load(vcpu);
5336
5337
if (guestdbg_exit_pending(vcpu)) {
5338
kvm_s390_prepare_debug_exit(vcpu);
5339
rc = 0;
5340
goto out;
5341
}
5342
5343
kvm_sigset_activate(vcpu);
5344
5345
/*
5346
* no need to check the return value of vcpu_start as it can only have
5347
* an error for protvirt, but protvirt means user cpu state
5348
*/
5349
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
5350
kvm_s390_vcpu_start(vcpu);
5351
} else if (is_vcpu_stopped(vcpu)) {
5352
pr_err_ratelimited("can't run stopped vcpu %d\n",
5353
vcpu->vcpu_id);
5354
rc = -EINVAL;
5355
goto out;
5356
}
5357
5358
kernel_fpu_begin(&fpu, KERNEL_FPC | KERNEL_VXR);
5359
sync_regs(vcpu);
5360
enable_cpu_timer_accounting(vcpu);
5361
5362
might_fault();
5363
rc = __vcpu_run(vcpu);
5364
5365
if (signal_pending(current) && !rc) {
5366
kvm_run->exit_reason = KVM_EXIT_INTR;
5367
rc = -EINTR;
5368
}
5369
5370
if (guestdbg_exit_pending(vcpu) && !rc) {
5371
kvm_s390_prepare_debug_exit(vcpu);
5372
rc = 0;
5373
}
5374
5375
if (rc == -EREMOTE) {
5376
/* userspace support is needed, kvm_run has been prepared */
5377
rc = 0;
5378
}
5379
5380
disable_cpu_timer_accounting(vcpu);
5381
store_regs(vcpu);
5382
kernel_fpu_end(&fpu, KERNEL_FPC | KERNEL_VXR);
5383
5384
kvm_sigset_deactivate(vcpu);
5385
5386
vcpu->stat.exit_userspace++;
5387
out:
5388
vcpu_put(vcpu);
5389
return rc;
5390
}
5391
5392
/*
5393
* store status at address
5394
* we use have two special cases:
5395
* KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
5396
* KVM_S390_STORE_STATUS_PREFIXED: -> prefix
5397
*/
5398
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
5399
{
5400
unsigned char archmode = 1;
5401
freg_t fprs[NUM_FPRS];
5402
unsigned int px;
5403
u64 clkcomp, cputm;
5404
int rc;
5405
5406
px = kvm_s390_get_prefix(vcpu);
5407
if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
5408
if (write_guest_abs(vcpu, 163, &archmode, 1))
5409
return -EFAULT;
5410
gpa = 0;
5411
} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
5412
if (write_guest_real(vcpu, 163, &archmode, 1))
5413
return -EFAULT;
5414
gpa = px;
5415
} else
5416
gpa -= __LC_FPREGS_SAVE_AREA;
5417
5418
/* manually convert vector registers if necessary */
5419
if (cpu_has_vx()) {
5420
convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
5421
rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5422
fprs, 128);
5423
} else {
5424
rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5425
vcpu->run->s.regs.fprs, 128);
5426
}
5427
rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
5428
vcpu->run->s.regs.gprs, 128);
5429
rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
5430
&vcpu->arch.sie_block->gpsw, 16);
5431
rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
5432
&px, 4);
5433
rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
5434
&vcpu->run->s.regs.fpc, 4);
5435
rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
5436
&vcpu->arch.sie_block->todpr, 4);
5437
cputm = kvm_s390_get_cpu_timer(vcpu);
5438
rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
5439
&cputm, 8);
5440
clkcomp = vcpu->arch.sie_block->ckc >> 8;
5441
rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
5442
&clkcomp, 8);
5443
rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
5444
&vcpu->run->s.regs.acrs, 64);
5445
rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
5446
&vcpu->arch.sie_block->gcr, 128);
5447
return rc ? -EFAULT : 0;
5448
}
5449
5450
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
5451
{
5452
/*
5453
* The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
5454
* switch in the run ioctl. Let's update our copies before we save
5455
* it into the save area
5456
*/
5457
kvm_s390_fpu_store(vcpu->run);
5458
save_access_regs(vcpu->run->s.regs.acrs);
5459
5460
return kvm_s390_store_status_unloaded(vcpu, addr);
5461
}
5462
5463
static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5464
{
5465
kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
5466
kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
5467
}
5468
5469
static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
5470
{
5471
unsigned long i;
5472
struct kvm_vcpu *vcpu;
5473
5474
kvm_for_each_vcpu(i, vcpu, kvm) {
5475
__disable_ibs_on_vcpu(vcpu);
5476
}
5477
}
5478
5479
static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5480
{
5481
if (!sclp.has_ibs)
5482
return;
5483
kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
5484
kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
5485
}
5486
5487
int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
5488
{
5489
int i, online_vcpus, r = 0, started_vcpus = 0;
5490
5491
if (!is_vcpu_stopped(vcpu))
5492
return 0;
5493
5494
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
5495
/* Only one cpu at a time may enter/leave the STOPPED state. */
5496
spin_lock(&vcpu->kvm->arch.start_stop_lock);
5497
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5498
5499
/* Let's tell the UV that we want to change into the operating state */
5500
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5501
r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
5502
if (r) {
5503
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5504
return r;
5505
}
5506
}
5507
5508
for (i = 0; i < online_vcpus; i++) {
5509
if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
5510
started_vcpus++;
5511
}
5512
5513
if (started_vcpus == 0) {
5514
/* we're the only active VCPU -> speed it up */
5515
__enable_ibs_on_vcpu(vcpu);
5516
} else if (started_vcpus == 1) {
5517
/*
5518
* As we are starting a second VCPU, we have to disable
5519
* the IBS facility on all VCPUs to remove potentially
5520
* outstanding ENABLE requests.
5521
*/
5522
__disable_ibs_on_all_vcpus(vcpu->kvm);
5523
}
5524
5525
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
5526
/*
5527
* The real PSW might have changed due to a RESTART interpreted by the
5528
* ultravisor. We block all interrupts and let the next sie exit
5529
* refresh our view.
5530
*/
5531
if (kvm_s390_pv_cpu_is_protected(vcpu))
5532
vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5533
/*
5534
* Another VCPU might have used IBS while we were offline.
5535
* Let's play safe and flush the VCPU at startup.
5536
*/
5537
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5538
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5539
return 0;
5540
}
5541
5542
int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
5543
{
5544
int i, online_vcpus, r = 0, started_vcpus = 0;
5545
struct kvm_vcpu *started_vcpu = NULL;
5546
5547
if (is_vcpu_stopped(vcpu))
5548
return 0;
5549
5550
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
5551
/* Only one cpu at a time may enter/leave the STOPPED state. */
5552
spin_lock(&vcpu->kvm->arch.start_stop_lock);
5553
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5554
5555
/* Let's tell the UV that we want to change into the stopped state */
5556
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5557
r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
5558
if (r) {
5559
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5560
return r;
5561
}
5562
}
5563
5564
/*
5565
* Set the VCPU to STOPPED and THEN clear the interrupt flag,
5566
* now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
5567
* have been fully processed. This will ensure that the VCPU
5568
* is kept BUSY if another VCPU is inquiring with SIGP SENSE.
5569
*/
5570
kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
5571
kvm_s390_clear_stop_irq(vcpu);
5572
5573
__disable_ibs_on_vcpu(vcpu);
5574
5575
for (i = 0; i < online_vcpus; i++) {
5576
struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
5577
5578
if (!is_vcpu_stopped(tmp)) {
5579
started_vcpus++;
5580
started_vcpu = tmp;
5581
}
5582
}
5583
5584
if (started_vcpus == 1) {
5585
/*
5586
* As we only have one VCPU left, we want to enable the
5587
* IBS facility for that VCPU to speed it up.
5588
*/
5589
__enable_ibs_on_vcpu(started_vcpu);
5590
}
5591
5592
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5593
return 0;
5594
}
5595
5596
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5597
struct kvm_enable_cap *cap)
5598
{
5599
int r;
5600
5601
if (cap->flags)
5602
return -EINVAL;
5603
5604
switch (cap->cap) {
5605
case KVM_CAP_S390_CSS_SUPPORT:
5606
if (!vcpu->kvm->arch.css_support) {
5607
vcpu->kvm->arch.css_support = 1;
5608
VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
5609
trace_kvm_s390_enable_css(vcpu->kvm);
5610
}
5611
r = 0;
5612
break;
5613
default:
5614
r = -EINVAL;
5615
break;
5616
}
5617
return r;
5618
}
5619
5620
static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
5621
struct kvm_s390_mem_op *mop)
5622
{
5623
void __user *uaddr = (void __user *)mop->buf;
5624
void *sida_addr;
5625
int r = 0;
5626
5627
if (mop->flags || !mop->size)
5628
return -EINVAL;
5629
if (mop->size + mop->sida_offset < mop->size)
5630
return -EINVAL;
5631
if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
5632
return -E2BIG;
5633
if (!kvm_s390_pv_cpu_is_protected(vcpu))
5634
return -EINVAL;
5635
5636
sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
5637
5638
switch (mop->op) {
5639
case KVM_S390_MEMOP_SIDA_READ:
5640
if (copy_to_user(uaddr, sida_addr, mop->size))
5641
r = -EFAULT;
5642
5643
break;
5644
case KVM_S390_MEMOP_SIDA_WRITE:
5645
if (copy_from_user(sida_addr, uaddr, mop->size))
5646
r = -EFAULT;
5647
break;
5648
}
5649
return r;
5650
}
5651
5652
static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
5653
struct kvm_s390_mem_op *mop)
5654
{
5655
void __user *uaddr = (void __user *)mop->buf;
5656
enum gacc_mode acc_mode;
5657
void *tmpbuf = NULL;
5658
int r;
5659
5660
r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_INJECT_EXCEPTION |
5661
KVM_S390_MEMOP_F_CHECK_ONLY |
5662
KVM_S390_MEMOP_F_SKEY_PROTECTION);
5663
if (r)
5664
return r;
5665
if (mop->ar >= NUM_ACRS)
5666
return -EINVAL;
5667
if (kvm_s390_pv_cpu_is_protected(vcpu))
5668
return -EINVAL;
5669
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
5670
tmpbuf = vmalloc(mop->size);
5671
if (!tmpbuf)
5672
return -ENOMEM;
5673
}
5674
5675
acc_mode = mop->op == KVM_S390_MEMOP_LOGICAL_READ ? GACC_FETCH : GACC_STORE;
5676
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5677
r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5678
acc_mode, mop->key);
5679
goto out_inject;
5680
}
5681
if (acc_mode == GACC_FETCH) {
5682
r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5683
mop->size, mop->key);
5684
if (r)
5685
goto out_inject;
5686
if (copy_to_user(uaddr, tmpbuf, mop->size)) {
5687
r = -EFAULT;
5688
goto out_free;
5689
}
5690
} else {
5691
if (copy_from_user(tmpbuf, uaddr, mop->size)) {
5692
r = -EFAULT;
5693
goto out_free;
5694
}
5695
r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5696
mop->size, mop->key);
5697
}
5698
5699
out_inject:
5700
if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
5701
kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
5702
5703
out_free:
5704
vfree(tmpbuf);
5705
return r;
5706
}
5707
5708
static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
5709
struct kvm_s390_mem_op *mop)
5710
{
5711
int r, srcu_idx;
5712
5713
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5714
5715
switch (mop->op) {
5716
case KVM_S390_MEMOP_LOGICAL_READ:
5717
case KVM_S390_MEMOP_LOGICAL_WRITE:
5718
r = kvm_s390_vcpu_mem_op(vcpu, mop);
5719
break;
5720
case KVM_S390_MEMOP_SIDA_READ:
5721
case KVM_S390_MEMOP_SIDA_WRITE:
5722
/* we are locked against sida going away by the vcpu->mutex */
5723
r = kvm_s390_vcpu_sida_op(vcpu, mop);
5724
break;
5725
default:
5726
r = -EINVAL;
5727
}
5728
5729
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
5730
return r;
5731
}
5732
5733
long kvm_arch_vcpu_async_ioctl(struct file *filp,
5734
unsigned int ioctl, unsigned long arg)
5735
{
5736
struct kvm_vcpu *vcpu = filp->private_data;
5737
void __user *argp = (void __user *)arg;
5738
int rc;
5739
5740
switch (ioctl) {
5741
case KVM_S390_IRQ: {
5742
struct kvm_s390_irq s390irq;
5743
5744
if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
5745
return -EFAULT;
5746
rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
5747
break;
5748
}
5749
case KVM_S390_INTERRUPT: {
5750
struct kvm_s390_interrupt s390int;
5751
struct kvm_s390_irq s390irq = {};
5752
5753
if (copy_from_user(&s390int, argp, sizeof(s390int)))
5754
return -EFAULT;
5755
if (s390int_to_s390irq(&s390int, &s390irq))
5756
return -EINVAL;
5757
rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
5758
break;
5759
}
5760
default:
5761
rc = -ENOIOCTLCMD;
5762
break;
5763
}
5764
5765
/*
5766
* To simplify single stepping of userspace-emulated instructions,
5767
* KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
5768
* should_handle_per_ifetch()). However, if userspace emulation injects
5769
* an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
5770
* after (and not before) the interrupt delivery.
5771
*/
5772
if (!rc)
5773
vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
5774
5775
return rc;
5776
}
5777
5778
static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
5779
struct kvm_pv_cmd *cmd)
5780
{
5781
struct kvm_s390_pv_dmp dmp;
5782
void *data;
5783
int ret;
5784
5785
/* Dump initialization is a prerequisite */
5786
if (!vcpu->kvm->arch.pv.dumping)
5787
return -EINVAL;
5788
5789
if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
5790
return -EFAULT;
5791
5792
/* We only handle this subcmd right now */
5793
if (dmp.subcmd != KVM_PV_DUMP_CPU)
5794
return -EINVAL;
5795
5796
/* CPU dump length is the same as create cpu storage donation. */
5797
if (dmp.buff_len != uv_info.guest_cpu_stor_len)
5798
return -EINVAL;
5799
5800
data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
5801
if (!data)
5802
return -ENOMEM;
5803
5804
ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
5805
5806
VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
5807
vcpu->vcpu_id, cmd->rc, cmd->rrc);
5808
5809
if (ret)
5810
ret = -EINVAL;
5811
5812
/* On success copy over the dump data */
5813
if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
5814
ret = -EFAULT;
5815
5816
kvfree(data);
5817
return ret;
5818
}
5819
5820
long kvm_arch_vcpu_ioctl(struct file *filp,
5821
unsigned int ioctl, unsigned long arg)
5822
{
5823
struct kvm_vcpu *vcpu = filp->private_data;
5824
void __user *argp = (void __user *)arg;
5825
int idx;
5826
long r;
5827
u16 rc, rrc;
5828
5829
vcpu_load(vcpu);
5830
5831
switch (ioctl) {
5832
case KVM_S390_STORE_STATUS:
5833
idx = srcu_read_lock(&vcpu->kvm->srcu);
5834
r = kvm_s390_store_status_unloaded(vcpu, arg);
5835
srcu_read_unlock(&vcpu->kvm->srcu, idx);
5836
break;
5837
case KVM_S390_SET_INITIAL_PSW: {
5838
psw_t psw;
5839
5840
r = -EFAULT;
5841
if (copy_from_user(&psw, argp, sizeof(psw)))
5842
break;
5843
r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
5844
break;
5845
}
5846
case KVM_S390_CLEAR_RESET:
5847
r = 0;
5848
kvm_arch_vcpu_ioctl_clear_reset(vcpu);
5849
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5850
r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5851
UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
5852
VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
5853
rc, rrc);
5854
}
5855
break;
5856
case KVM_S390_INITIAL_RESET:
5857
r = 0;
5858
kvm_arch_vcpu_ioctl_initial_reset(vcpu);
5859
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5860
r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5861
UVC_CMD_CPU_RESET_INITIAL,
5862
&rc, &rrc);
5863
VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
5864
rc, rrc);
5865
}
5866
break;
5867
case KVM_S390_NORMAL_RESET:
5868
r = 0;
5869
kvm_arch_vcpu_ioctl_normal_reset(vcpu);
5870
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5871
r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5872
UVC_CMD_CPU_RESET, &rc, &rrc);
5873
VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
5874
rc, rrc);
5875
}
5876
break;
5877
case KVM_SET_ONE_REG:
5878
case KVM_GET_ONE_REG: {
5879
struct kvm_one_reg reg;
5880
r = -EINVAL;
5881
if (kvm_s390_pv_cpu_is_protected(vcpu))
5882
break;
5883
r = -EFAULT;
5884
if (copy_from_user(&reg, argp, sizeof(reg)))
5885
break;
5886
if (ioctl == KVM_SET_ONE_REG)
5887
r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
5888
else
5889
r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
5890
break;
5891
}
5892
#ifdef CONFIG_KVM_S390_UCONTROL
5893
case KVM_S390_UCAS_MAP: {
5894
struct kvm_s390_ucas_mapping ucasmap;
5895
5896
if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5897
r = -EFAULT;
5898
break;
5899
}
5900
5901
if (!kvm_is_ucontrol(vcpu->kvm)) {
5902
r = -EINVAL;
5903
break;
5904
}
5905
5906
r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5907
ucasmap.vcpu_addr, ucasmap.length);
5908
break;
5909
}
5910
case KVM_S390_UCAS_UNMAP: {
5911
struct kvm_s390_ucas_mapping ucasmap;
5912
5913
if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5914
r = -EFAULT;
5915
break;
5916
}
5917
5918
if (!kvm_is_ucontrol(vcpu->kvm)) {
5919
r = -EINVAL;
5920
break;
5921
}
5922
5923
r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5924
ucasmap.length);
5925
break;
5926
}
5927
#endif
5928
case KVM_S390_VCPU_FAULT: {
5929
idx = srcu_read_lock(&vcpu->kvm->srcu);
5930
r = vcpu_dat_fault_handler(vcpu, arg, 0);
5931
srcu_read_unlock(&vcpu->kvm->srcu, idx);
5932
break;
5933
}
5934
case KVM_ENABLE_CAP:
5935
{
5936
struct kvm_enable_cap cap;
5937
r = -EFAULT;
5938
if (copy_from_user(&cap, argp, sizeof(cap)))
5939
break;
5940
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5941
break;
5942
}
5943
case KVM_S390_MEM_OP: {
5944
struct kvm_s390_mem_op mem_op;
5945
5946
if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5947
r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5948
else
5949
r = -EFAULT;
5950
break;
5951
}
5952
case KVM_S390_SET_IRQ_STATE: {
5953
struct kvm_s390_irq_state irq_state;
5954
5955
r = -EFAULT;
5956
if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5957
break;
5958
if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5959
irq_state.len == 0 ||
5960
irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5961
r = -EINVAL;
5962
break;
5963
}
5964
/* do not use irq_state.flags, it will break old QEMUs */
5965
r = kvm_s390_set_irq_state(vcpu,
5966
(void __user *) irq_state.buf,
5967
irq_state.len);
5968
break;
5969
}
5970
case KVM_S390_GET_IRQ_STATE: {
5971
struct kvm_s390_irq_state irq_state;
5972
5973
r = -EFAULT;
5974
if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5975
break;
5976
if (irq_state.len == 0) {
5977
r = -EINVAL;
5978
break;
5979
}
5980
/* do not use irq_state.flags, it will break old QEMUs */
5981
r = kvm_s390_get_irq_state(vcpu,
5982
(__u8 __user *) irq_state.buf,
5983
irq_state.len);
5984
break;
5985
}
5986
case KVM_S390_PV_CPU_COMMAND: {
5987
struct kvm_pv_cmd cmd;
5988
5989
r = -EINVAL;
5990
if (!is_prot_virt_host())
5991
break;
5992
5993
r = -EFAULT;
5994
if (copy_from_user(&cmd, argp, sizeof(cmd)))
5995
break;
5996
5997
r = -EINVAL;
5998
if (cmd.flags)
5999
break;
6000
6001
/* We only handle this cmd right now */
6002
if (cmd.cmd != KVM_PV_DUMP)
6003
break;
6004
6005
r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
6006
6007
/* Always copy over UV rc / rrc data */
6008
if (copy_to_user((__u8 __user *)argp, &cmd.rc,
6009
sizeof(cmd.rc) + sizeof(cmd.rrc)))
6010
r = -EFAULT;
6011
break;
6012
}
6013
default:
6014
r = -ENOTTY;
6015
}
6016
6017
vcpu_put(vcpu);
6018
return r;
6019
}
6020
6021
vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
6022
{
6023
#ifdef CONFIG_KVM_S390_UCONTROL
6024
if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
6025
&& (kvm_is_ucontrol(vcpu->kvm))) {
6026
vmf->page = virt_to_page(vcpu->arch.sie_block);
6027
get_page(vmf->page);
6028
return 0;
6029
}
6030
#endif
6031
return VM_FAULT_SIGBUS;
6032
}
6033
6034
bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
6035
{
6036
return true;
6037
}
6038
6039
/* Section: memory related */
6040
int kvm_arch_prepare_memory_region(struct kvm *kvm,
6041
const struct kvm_memory_slot *old,
6042
struct kvm_memory_slot *new,
6043
enum kvm_mr_change change)
6044
{
6045
gpa_t size;
6046
6047
if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS)
6048
return -EINVAL;
6049
6050
/* When we are protected, we should not change the memory slots */
6051
if (kvm_s390_pv_get_handle(kvm))
6052
return -EINVAL;
6053
6054
if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) {
6055
/*
6056
* A few sanity checks. We can have memory slots which have to be
6057
* located/ended at a segment boundary (1MB). The memory in userland is
6058
* ok to be fragmented into various different vmas. It is okay to mmap()
6059
* and munmap() stuff in this slot after doing this call at any time
6060
*/
6061
6062
if (new->userspace_addr & 0xffffful)
6063
return -EINVAL;
6064
6065
size = new->npages * PAGE_SIZE;
6066
if (size & 0xffffful)
6067
return -EINVAL;
6068
6069
if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
6070
return -EINVAL;
6071
}
6072
6073
if (!kvm->arch.migration_mode)
6074
return 0;
6075
6076
/*
6077
* Turn off migration mode when:
6078
* - userspace creates a new memslot with dirty logging off,
6079
* - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
6080
* dirty logging is turned off.
6081
* Migration mode expects dirty page logging being enabled to store
6082
* its dirty bitmap.
6083
*/
6084
if (change != KVM_MR_DELETE &&
6085
!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
6086
WARN(kvm_s390_vm_stop_migration(kvm),
6087
"Failed to stop migration mode");
6088
6089
return 0;
6090
}
6091
6092
void kvm_arch_commit_memory_region(struct kvm *kvm,
6093
struct kvm_memory_slot *old,
6094
const struct kvm_memory_slot *new,
6095
enum kvm_mr_change change)
6096
{
6097
int rc = 0;
6098
6099
if (kvm_is_ucontrol(kvm))
6100
return;
6101
6102
switch (change) {
6103
case KVM_MR_DELETE:
6104
rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
6105
old->npages * PAGE_SIZE);
6106
break;
6107
case KVM_MR_MOVE:
6108
rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
6109
old->npages * PAGE_SIZE);
6110
if (rc)
6111
break;
6112
fallthrough;
6113
case KVM_MR_CREATE:
6114
rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
6115
new->base_gfn * PAGE_SIZE,
6116
new->npages * PAGE_SIZE);
6117
break;
6118
case KVM_MR_FLAGS_ONLY:
6119
break;
6120
default:
6121
WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
6122
}
6123
if (rc)
6124
pr_warn("failed to commit memory region\n");
6125
return;
6126
}
6127
6128
static inline unsigned long nonhyp_mask(int i)
6129
{
6130
unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
6131
6132
return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
6133
}
6134
6135
static int __init kvm_s390_init(void)
6136
{
6137
int i, r;
6138
6139
if (!sclp.has_sief2) {
6140
pr_info("SIE is not available\n");
6141
return -ENODEV;
6142
}
6143
6144
if (nested && hpage) {
6145
pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
6146
return -EINVAL;
6147
}
6148
6149
for (i = 0; i < 16; i++)
6150
kvm_s390_fac_base[i] |=
6151
stfle_fac_list[i] & nonhyp_mask(i);
6152
6153
r = __kvm_s390_init();
6154
if (r)
6155
return r;
6156
6157
r = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
6158
if (r) {
6159
__kvm_s390_exit();
6160
return r;
6161
}
6162
return 0;
6163
}
6164
6165
static void __exit kvm_s390_exit(void)
6166
{
6167
kvm_exit();
6168
6169
__kvm_s390_exit();
6170
}
6171
6172
module_init(kvm_s390_init);
6173
module_exit(kvm_s390_exit);
6174
6175
/*
6176
* Enable autoloading of the kvm module.
6177
* Note that we add the module alias here instead of virt/kvm/kvm_main.c
6178
* since x86 takes a different approach.
6179
*/
6180
#include <linux/miscdevice.h>
6181
MODULE_ALIAS_MISCDEV(KVM_MINOR);
6182
MODULE_ALIAS("devname:kvm");
6183
6184