CoCalc -- kvm-s390.c

GitHub Repository: torvalds/linux
Path: blob/master/arch/s390/kvm/kvm-s390.c
²⁶⁴⁴² views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
 * hosting IBM Z kernel virtual machines (s390x)
4
 *
5
 * Copyright IBM Corp. 2008, 2020
6
 *
7
 *    Author(s): Carsten Otte <[email protected]>
8
 *               Christian Borntraeger <[email protected]>
9
 *               Christian Ehrhardt <[email protected]>
10
 *               Jason J. Herne <[email protected]>
11
 */
12

13
#define KMSG_COMPONENT "kvm-s390"
14
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
15

16
#include <linux/compiler.h>
17
#include <linux/export.h>
18
#include <linux/err.h>
19
#include <linux/fs.h>
20
#include <linux/hrtimer.h>
21
#include <linux/init.h>
22
#include <linux/kvm.h>
23
#include <linux/kvm_host.h>
24
#include <linux/mman.h>
25
#include <linux/module.h>
26
#include <linux/moduleparam.h>
27
#include <linux/cpufeature.h>
28
#include <linux/random.h>
29
#include <linux/slab.h>
30
#include <linux/timer.h>
31
#include <linux/vmalloc.h>
32
#include <linux/bitmap.h>
33
#include <linux/sched/signal.h>
34
#include <linux/string.h>
35
#include <linux/pgtable.h>
36
#include <linux/mmu_notifier.h>
37

38
#include <asm/access-regs.h>
39
#include <asm/asm-offsets.h>
40
#include <asm/lowcore.h>
41
#include <asm/machine.h>
42
#include <asm/stp.h>
43
#include <asm/gmap.h>
44
#include <asm/gmap_helpers.h>
45
#include <asm/nmi.h>
46
#include <asm/isc.h>
47
#include <asm/sclp.h>
48
#include <asm/cpacf.h>
49
#include <asm/timex.h>
50
#include <asm/asm.h>
51
#include <asm/fpu.h>
52
#include <asm/ap.h>
53
#include <asm/uv.h>
54
#include "kvm-s390.h"
55
#include "gaccess.h"
56
#include "pci.h"
57

58
#define CREATE_TRACE_POINTS
59
#include "trace.h"
60
#include "trace-s390.h"
61

62
#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
63
#define LOCAL_IRQS 32
64
#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
65
			   (KVM_MAX_VCPUS + LOCAL_IRQS))
66

67
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
68
	KVM_GENERIC_VM_STATS(),
69
	STATS_DESC_COUNTER(VM, inject_io),
70
	STATS_DESC_COUNTER(VM, inject_float_mchk),
71
	STATS_DESC_COUNTER(VM, inject_pfault_done),
72
	STATS_DESC_COUNTER(VM, inject_service_signal),
73
	STATS_DESC_COUNTER(VM, inject_virtio),
74
	STATS_DESC_COUNTER(VM, aen_forward),
75
	STATS_DESC_COUNTER(VM, gmap_shadow_reuse),
76
	STATS_DESC_COUNTER(VM, gmap_shadow_create),
77
	STATS_DESC_COUNTER(VM, gmap_shadow_r1_entry),
78
	STATS_DESC_COUNTER(VM, gmap_shadow_r2_entry),
79
	STATS_DESC_COUNTER(VM, gmap_shadow_r3_entry),
80
	STATS_DESC_COUNTER(VM, gmap_shadow_sg_entry),
81
	STATS_DESC_COUNTER(VM, gmap_shadow_pg_entry),
82
};
83

84
const struct kvm_stats_header kvm_vm_stats_header = {
85
	.name_size = KVM_STATS_NAME_SIZE,
86
	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
87
	.id_offset = sizeof(struct kvm_stats_header),
88
	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
89
	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
90
		       sizeof(kvm_vm_stats_desc),
91
};
92

93
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
94
	KVM_GENERIC_VCPU_STATS(),
95
	STATS_DESC_COUNTER(VCPU, exit_userspace),
96
	STATS_DESC_COUNTER(VCPU, exit_null),
97
	STATS_DESC_COUNTER(VCPU, exit_external_request),
98
	STATS_DESC_COUNTER(VCPU, exit_io_request),
99
	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
100
	STATS_DESC_COUNTER(VCPU, exit_stop_request),
101
	STATS_DESC_COUNTER(VCPU, exit_validity),
102
	STATS_DESC_COUNTER(VCPU, exit_instruction),
103
	STATS_DESC_COUNTER(VCPU, exit_pei),
104
	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
105
	STATS_DESC_COUNTER(VCPU, instruction_lctl),
106
	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
107
	STATS_DESC_COUNTER(VCPU, instruction_stctl),
108
	STATS_DESC_COUNTER(VCPU, instruction_stctg),
109
	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
110
	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
111
	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
112
	STATS_DESC_COUNTER(VCPU, deliver_ckc),
113
	STATS_DESC_COUNTER(VCPU, deliver_cputm),
114
	STATS_DESC_COUNTER(VCPU, deliver_external_call),
115
	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
116
	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
117
	STATS_DESC_COUNTER(VCPU, deliver_virtio),
118
	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
119
	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
120
	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
121
	STATS_DESC_COUNTER(VCPU, deliver_program),
122
	STATS_DESC_COUNTER(VCPU, deliver_io),
123
	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
124
	STATS_DESC_COUNTER(VCPU, exit_wait_state),
125
	STATS_DESC_COUNTER(VCPU, inject_ckc),
126
	STATS_DESC_COUNTER(VCPU, inject_cputm),
127
	STATS_DESC_COUNTER(VCPU, inject_external_call),
128
	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
129
	STATS_DESC_COUNTER(VCPU, inject_mchk),
130
	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
131
	STATS_DESC_COUNTER(VCPU, inject_program),
132
	STATS_DESC_COUNTER(VCPU, inject_restart),
133
	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
134
	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
135
	STATS_DESC_COUNTER(VCPU, instruction_epsw),
136
	STATS_DESC_COUNTER(VCPU, instruction_gs),
137
	STATS_DESC_COUNTER(VCPU, instruction_io_other),
138
	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
139
	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
140
	STATS_DESC_COUNTER(VCPU, instruction_lpswey),
141
	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
142
	STATS_DESC_COUNTER(VCPU, instruction_ptff),
143
	STATS_DESC_COUNTER(VCPU, instruction_sck),
144
	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
145
	STATS_DESC_COUNTER(VCPU, instruction_stidp),
146
	STATS_DESC_COUNTER(VCPU, instruction_spx),
147
	STATS_DESC_COUNTER(VCPU, instruction_stpx),
148
	STATS_DESC_COUNTER(VCPU, instruction_stap),
149
	STATS_DESC_COUNTER(VCPU, instruction_iske),
150
	STATS_DESC_COUNTER(VCPU, instruction_ri),
151
	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
152
	STATS_DESC_COUNTER(VCPU, instruction_sske),
153
	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
154
	STATS_DESC_COUNTER(VCPU, instruction_stsi),
155
	STATS_DESC_COUNTER(VCPU, instruction_stfl),
156
	STATS_DESC_COUNTER(VCPU, instruction_tb),
157
	STATS_DESC_COUNTER(VCPU, instruction_tpi),
158
	STATS_DESC_COUNTER(VCPU, instruction_tprot),
159
	STATS_DESC_COUNTER(VCPU, instruction_tsch),
160
	STATS_DESC_COUNTER(VCPU, instruction_sie),
161
	STATS_DESC_COUNTER(VCPU, instruction_essa),
162
	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
163
	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
164
	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
165
	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
166
	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
167
	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
168
	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
169
	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
170
	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
171
	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
172
	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
173
	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
174
	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
175
	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
176
	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
177
	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
178
	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
179
	STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
180
	STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
181
	STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
182
	STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
183
	STATS_DESC_COUNTER(VCPU, diag_9c_forward),
184
	STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
185
	STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
186
	STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
187
	STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
188
	STATS_DESC_COUNTER(VCPU, pfault_sync)
189
};
190

191
const struct kvm_stats_header kvm_vcpu_stats_header = {
192
	.name_size = KVM_STATS_NAME_SIZE,
193
	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
194
	.id_offset = sizeof(struct kvm_stats_header),
195
	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
196
	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
197
		       sizeof(kvm_vcpu_stats_desc),
198
};
199

200
/* allow nested virtualization in KVM (if enabled by user space) */
201
static int nested;
202
module_param(nested, int, S_IRUGO);
203
MODULE_PARM_DESC(nested, "Nested virtualization support");
204

205
/* allow 1m huge page guest backing, if !nested */
206
static int hpage;
207
module_param(hpage, int, 0444);
208
MODULE_PARM_DESC(hpage, "1m huge page backing support");
209

210
/* maximum percentage of steal time for polling.  >100 is treated like 100 */
211
static u8 halt_poll_max_steal = 10;
212
module_param(halt_poll_max_steal, byte, 0644);
213
MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
214

215
/* if set to true, the GISA will be initialized and used if available */
216
static bool use_gisa  = true;
217
module_param(use_gisa, bool, 0644);
218
MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
219

220
/* maximum diag9c forwarding per second */
221
unsigned int diag9c_forwarding_hz;
222
module_param(diag9c_forwarding_hz, uint, 0644);
223
MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
224

225
/*
226
 * allow asynchronous deinit for protected guests; enable by default since
227
 * the feature is opt-in anyway
228
 */
229
static int async_destroy = 1;
230
module_param(async_destroy, int, 0444);
231
MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
232

233
/*
234
 * For now we handle at most 16 double words as this is what the s390 base
235
 * kernel handles and stores in the prefix page. If we ever need to go beyond
236
 * this, this requires changes to code, but the external uapi can stay.
237
 */
238
#define SIZE_INTERNAL 16
239

240
/*
241
 * Base feature mask that defines default mask for facilities. Consists of the
242
 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
243
 */
244
static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
245
/*
246
 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
247
 * and defines the facilities that can be enabled via a cpu model.
248
 */
249
static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
250

251
static unsigned long kvm_s390_fac_size(void)
252
{
253
	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
254
	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
255
	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
256
		sizeof(stfle_fac_list));
257

258
	return SIZE_INTERNAL;
259
}
260

261
/* available cpu features supported by kvm */
262
static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
263
/* available subfunctions indicated via query / "test bit" */
264
static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
265

266
static struct gmap_notifier gmap_notifier;
267
static struct gmap_notifier vsie_gmap_notifier;
268
debug_info_t *kvm_s390_dbf;
269
debug_info_t *kvm_s390_dbf_uv;
270

271
/* Section: not file related */
272
/* forward declarations */
273
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
274
			      unsigned long end);
275
static int sca_switch_to_extended(struct kvm *kvm);
276

277
static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
278
{
279
	u8 delta_idx = 0;
280

281
	/*
282
	 * The TOD jumps by delta, we have to compensate this by adding
283
	 * -delta to the epoch.
284
	 */
285
	delta = -delta;
286

287
	/* sign-extension - we're adding to signed values below */
288
	if ((s64)delta < 0)
289
		delta_idx = -1;
290

291
	scb->epoch += delta;
292
	if (scb->ecd & ECD_MEF) {
293
		scb->epdx += delta_idx;
294
		if (scb->epoch < delta)
295
			scb->epdx += 1;
296
	}
297
}
298

299
/*
300
 * This callback is executed during stop_machine(). All CPUs are therefore
301
 * temporarily stopped. In order not to change guest behavior, we have to
302
 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
303
 * so a CPU won't be stopped while calculating with the epoch.
304
 */
305
static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
306
			  void *v)
307
{
308
	struct kvm *kvm;
309
	struct kvm_vcpu *vcpu;
310
	unsigned long i;
311
	unsigned long long *delta = v;
312

313
	list_for_each_entry(kvm, &vm_list, vm_list) {
314
		kvm_for_each_vcpu(i, vcpu, kvm) {
315
			kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
316
			if (i == 0) {
317
				kvm->arch.epoch = vcpu->arch.sie_block->epoch;
318
				kvm->arch.epdx = vcpu->arch.sie_block->epdx;
319
			}
320
			if (vcpu->arch.cputm_enabled)
321
				vcpu->arch.cputm_start += *delta;
322
			if (vcpu->arch.vsie_block)
323
				kvm_clock_sync_scb(vcpu->arch.vsie_block,
324
						   *delta);
325
		}
326
	}
327
	return NOTIFY_OK;
328
}
329

330
static struct notifier_block kvm_clock_notifier = {
331
	.notifier_call = kvm_clock_sync,
332
};
333

334
static void allow_cpu_feat(unsigned long nr)
335
{
336
	set_bit_inv(nr, kvm_s390_available_cpu_feat);
337
}
338

339
static inline int plo_test_bit(unsigned char nr)
340
{
341
	unsigned long function = (unsigned long)nr | 0x100;
342
	int cc;
343

344
	asm volatile(
345
		"	lgr	0,%[function]\n"
346
		/* Parameter registers are ignored for "test bit" */
347
		"	plo	0,0,0,0(0)\n"
348
		CC_IPM(cc)
349
		: CC_OUT(cc, cc)
350
		: [function] "d" (function)
351
		: CC_CLOBBER_LIST("0"));
352
	return CC_TRANSFORM(cc) == 0;
353
}
354

355
static __always_inline void pfcr_query(u8 (*query)[16])
356
{
357
	asm volatile(
358
		"	lghi	0,0\n"
359
		"	.insn   rsy,0xeb0000000016,0,0,%[query]\n"
360
		: [query] "=QS" (*query)
361
		:
362
		: "cc", "0");
363
}
364

365
static __always_inline void __sortl_query(u8 (*query)[32])
366
{
367
	asm volatile(
368
		"	lghi	0,0\n"
369
		"	la	1,%[query]\n"
370
		/* Parameter registers are ignored */
371
		"	.insn	rre,0xb9380000,2,4\n"
372
		: [query] "=R" (*query)
373
		:
374
		: "cc", "0", "1");
375
}
376

377
static __always_inline void __dfltcc_query(u8 (*query)[32])
378
{
379
	asm volatile(
380
		"	lghi	0,0\n"
381
		"	la	1,%[query]\n"
382
		/* Parameter registers are ignored */
383
		"	.insn	rrf,0xb9390000,2,4,6,0\n"
384
		: [query] "=R" (*query)
385
		:
386
		: "cc", "0", "1");
387
}
388

389
static void __init kvm_s390_cpu_feat_init(void)
390
{
391
	int i;
392

393
	for (i = 0; i < 256; ++i) {
394
		if (plo_test_bit(i))
395
			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
396
	}
397

398
	if (test_facility(28)) /* TOD-clock steering */
399
		ptff(kvm_s390_available_subfunc.ptff,
400
		     sizeof(kvm_s390_available_subfunc.ptff),
401
		     PTFF_QAF);
402

403
	if (test_facility(17)) { /* MSA */
404
		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
405
			      kvm_s390_available_subfunc.kmac);
406
		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
407
			      kvm_s390_available_subfunc.kmc);
408
		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
409
			      kvm_s390_available_subfunc.km);
410
		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
411
			      kvm_s390_available_subfunc.kimd);
412
		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
413
			      kvm_s390_available_subfunc.klmd);
414
	}
415
	if (test_facility(76)) /* MSA3 */
416
		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
417
			      kvm_s390_available_subfunc.pckmo);
418
	if (test_facility(77)) { /* MSA4 */
419
		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
420
			      kvm_s390_available_subfunc.kmctr);
421
		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
422
			      kvm_s390_available_subfunc.kmf);
423
		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
424
			      kvm_s390_available_subfunc.kmo);
425
		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
426
			      kvm_s390_available_subfunc.pcc);
427
	}
428
	if (test_facility(57)) /* MSA5 */
429
		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
430
			      kvm_s390_available_subfunc.ppno);
431

432
	if (test_facility(146)) /* MSA8 */
433
		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
434
			      kvm_s390_available_subfunc.kma);
435

436
	if (test_facility(155)) /* MSA9 */
437
		__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
438
			      kvm_s390_available_subfunc.kdsa);
439

440
	if (test_facility(150)) /* SORTL */
441
		__sortl_query(&kvm_s390_available_subfunc.sortl);
442

443
	if (test_facility(151)) /* DFLTCC */
444
		__dfltcc_query(&kvm_s390_available_subfunc.dfltcc);
445

446
	if (test_facility(201))	/* PFCR */
447
		pfcr_query(&kvm_s390_available_subfunc.pfcr);
448

449
	if (machine_has_esop())
450
		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
451
	/*
452
	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
453
	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
454
	 */
455
	if (!sclp.has_sief2 || !machine_has_esop() || !sclp.has_64bscao ||
456
	    !test_facility(3) || !nested)
457
		return;
458
	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
459
	if (sclp.has_64bscao)
460
		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
461
	if (sclp.has_siif)
462
		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
463
	if (sclp.has_gpere)
464
		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
465
	if (sclp.has_gsls)
466
		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
467
	if (sclp.has_ib)
468
		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
469
	if (sclp.has_cei)
470
		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
471
	if (sclp.has_ibs)
472
		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
473
	if (sclp.has_kss)
474
		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
475
	/*
476
	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
477
	 * all skey handling functions read/set the skey from the PGSTE
478
	 * instead of the real storage key.
479
	 *
480
	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
481
	 * pages being detected as preserved although they are resident.
482
	 *
483
	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
484
	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
485
	 *
486
	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
487
	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
488
	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
489
	 *
490
	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
491
	 * cannot easily shadow the SCA because of the ipte lock.
492
	 */
493
}
494

495
static int __init __kvm_s390_init(void)
496
{
497
	int rc = -ENOMEM;
498

499
	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
500
	if (!kvm_s390_dbf)
501
		return -ENOMEM;
502

503
	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
504
	if (!kvm_s390_dbf_uv)
505
		goto err_kvm_uv;
506

507
	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
508
	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
509
		goto err_debug_view;
510

511
	kvm_s390_cpu_feat_init();
512

513
	/* Register floating interrupt controller interface. */
514
	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
515
	if (rc) {
516
		pr_err("A FLIC registration call failed with rc=%d\n", rc);
517
		goto err_flic;
518
	}
519

520
	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
521
		rc = kvm_s390_pci_init();
522
		if (rc) {
523
			pr_err("Unable to allocate AIFT for PCI\n");
524
			goto err_pci;
525
		}
526
	}
527

528
	rc = kvm_s390_gib_init(GAL_ISC);
529
	if (rc)
530
		goto err_gib;
531

532
	gmap_notifier.notifier_call = kvm_gmap_notifier;
533
	gmap_register_pte_notifier(&gmap_notifier);
534
	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
535
	gmap_register_pte_notifier(&vsie_gmap_notifier);
536
	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
537
				       &kvm_clock_notifier);
538

539
	return 0;
540

541
err_gib:
542
	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
543
		kvm_s390_pci_exit();
544
err_pci:
545
err_flic:
546
err_debug_view:
547
	debug_unregister(kvm_s390_dbf_uv);
548
err_kvm_uv:
549
	debug_unregister(kvm_s390_dbf);
550
	return rc;
551
}
552

553
static void __kvm_s390_exit(void)
554
{
555
	gmap_unregister_pte_notifier(&gmap_notifier);
556
	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
557
	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
558
					 &kvm_clock_notifier);
559

560
	kvm_s390_gib_destroy();
561
	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
562
		kvm_s390_pci_exit();
563
	debug_unregister(kvm_s390_dbf);
564
	debug_unregister(kvm_s390_dbf_uv);
565
}
566

567
/* Section: device related */
568
long kvm_arch_dev_ioctl(struct file *filp,
569
			unsigned int ioctl, unsigned long arg)
570
{
571
	if (ioctl == KVM_S390_ENABLE_SIE)
572
		return s390_enable_sie();
573
	return -EINVAL;
574
}
575

576
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
577
{
578
	int r;
579

580
	switch (ext) {
581
	case KVM_CAP_S390_PSW:
582
	case KVM_CAP_S390_GMAP:
583
	case KVM_CAP_SYNC_MMU:
584
#ifdef CONFIG_KVM_S390_UCONTROL
585
	case KVM_CAP_S390_UCONTROL:
586
#endif
587
	case KVM_CAP_ASYNC_PF:
588
	case KVM_CAP_SYNC_REGS:
589
	case KVM_CAP_ONE_REG:
590
	case KVM_CAP_ENABLE_CAP:
591
	case KVM_CAP_S390_CSS_SUPPORT:
592
	case KVM_CAP_IOEVENTFD:
593
	case KVM_CAP_S390_IRQCHIP:
594
	case KVM_CAP_VM_ATTRIBUTES:
595
	case KVM_CAP_MP_STATE:
596
	case KVM_CAP_IMMEDIATE_EXIT:
597
	case KVM_CAP_S390_INJECT_IRQ:
598
	case KVM_CAP_S390_USER_SIGP:
599
	case KVM_CAP_S390_USER_STSI:
600
	case KVM_CAP_S390_SKEYS:
601
	case KVM_CAP_S390_IRQ_STATE:
602
	case KVM_CAP_S390_USER_INSTR0:
603
	case KVM_CAP_S390_CMMA_MIGRATION:
604
	case KVM_CAP_S390_AIS:
605
	case KVM_CAP_S390_AIS_MIGRATION:
606
	case KVM_CAP_S390_VCPU_RESETS:
607
	case KVM_CAP_SET_GUEST_DEBUG:
608
	case KVM_CAP_S390_DIAG318:
609
	case KVM_CAP_IRQFD_RESAMPLE:
610
		r = 1;
611
		break;
612
	case KVM_CAP_SET_GUEST_DEBUG2:
613
		r = KVM_GUESTDBG_VALID_MASK;
614
		break;
615
	case KVM_CAP_S390_HPAGE_1M:
616
		r = 0;
617
		if (hpage && !(kvm && kvm_is_ucontrol(kvm)))
618
			r = 1;
619
		break;
620
	case KVM_CAP_S390_MEM_OP:
621
		r = MEM_OP_MAX_SIZE;
622
		break;
623
	case KVM_CAP_S390_MEM_OP_EXTENSION:
624
		/*
625
		 * Flag bits indicating which extensions are supported.
626
		 * If r > 0, the base extension must also be supported/indicated,
627
		 * in order to maintain backwards compatibility.
628
		 */
629
		r = KVM_S390_MEMOP_EXTENSION_CAP_BASE |
630
		    KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG;
631
		break;
632
	case KVM_CAP_NR_VCPUS:
633
	case KVM_CAP_MAX_VCPUS:
634
	case KVM_CAP_MAX_VCPU_ID:
635
		r = KVM_S390_BSCA_CPU_SLOTS;
636
		if (!kvm_s390_use_sca_entries())
637
			r = KVM_MAX_VCPUS;
638
		else if (sclp.has_esca && sclp.has_64bscao)
639
			r = KVM_S390_ESCA_CPU_SLOTS;
640
		if (ext == KVM_CAP_NR_VCPUS)
641
			r = min_t(unsigned int, num_online_cpus(), r);
642
		break;
643
	case KVM_CAP_S390_COW:
644
		r = machine_has_esop();
645
		break;
646
	case KVM_CAP_S390_VECTOR_REGISTERS:
647
		r = test_facility(129);
648
		break;
649
	case KVM_CAP_S390_RI:
650
		r = test_facility(64);
651
		break;
652
	case KVM_CAP_S390_GS:
653
		r = test_facility(133);
654
		break;
655
	case KVM_CAP_S390_BPB:
656
		r = test_facility(82);
657
		break;
658
	case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
659
		r = async_destroy && is_prot_virt_host();
660
		break;
661
	case KVM_CAP_S390_PROTECTED:
662
		r = is_prot_virt_host();
663
		break;
664
	case KVM_CAP_S390_PROTECTED_DUMP: {
665
		u64 pv_cmds_dump[] = {
666
			BIT_UVC_CMD_DUMP_INIT,
667
			BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
668
			BIT_UVC_CMD_DUMP_CPU,
669
			BIT_UVC_CMD_DUMP_COMPLETE,
670
		};
671
		int i;
672

673
		r = is_prot_virt_host();
674

675
		for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
676
			if (!test_bit_inv(pv_cmds_dump[i],
677
					  (unsigned long *)&uv_info.inst_calls_list)) {
678
				r = 0;
679
				break;
680
			}
681
		}
682
		break;
683
	}
684
	case KVM_CAP_S390_ZPCI_OP:
685
		r = kvm_s390_pci_interp_allowed();
686
		break;
687
	case KVM_CAP_S390_CPU_TOPOLOGY:
688
		r = test_facility(11);
689
		break;
690
	default:
691
		r = 0;
692
	}
693
	return r;
694
}
695

696
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
697
{
698
	int i;
699
	gfn_t cur_gfn, last_gfn;
700
	unsigned long gaddr, vmaddr;
701
	struct gmap *gmap = kvm->arch.gmap;
702
	DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
703

704
	/* Loop over all guest segments */
705
	cur_gfn = memslot->base_gfn;
706
	last_gfn = memslot->base_gfn + memslot->npages;
707
	for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
708
		gaddr = gfn_to_gpa(cur_gfn);
709
		vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
710
		if (kvm_is_error_hva(vmaddr))
711
			continue;
712

713
		bitmap_zero(bitmap, _PAGE_ENTRIES);
714
		gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
715
		for (i = 0; i < _PAGE_ENTRIES; i++) {
716
			if (test_bit(i, bitmap))
717
				mark_page_dirty(kvm, cur_gfn + i);
718
		}
719

720
		if (fatal_signal_pending(current))
721
			return;
722
		cond_resched();
723
	}
724
}
725

726
/* Section: vm related */
727
static void sca_del_vcpu(struct kvm_vcpu *vcpu);
728

729
/*
730
 * Get (and clear) the dirty memory log for a memory slot.
731
 */
732
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
733
			       struct kvm_dirty_log *log)
734
{
735
	int r;
736
	unsigned long n;
737
	struct kvm_memory_slot *memslot;
738
	int is_dirty;
739

740
	if (kvm_is_ucontrol(kvm))
741
		return -EINVAL;
742

743
	mutex_lock(&kvm->slots_lock);
744

745
	r = -EINVAL;
746
	if (log->slot >= KVM_USER_MEM_SLOTS)
747
		goto out;
748

749
	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
750
	if (r)
751
		goto out;
752

753
	/* Clear the dirty log */
754
	if (is_dirty) {
755
		n = kvm_dirty_bitmap_bytes(memslot);
756
		memset(memslot->dirty_bitmap, 0, n);
757
	}
758
	r = 0;
759
out:
760
	mutex_unlock(&kvm->slots_lock);
761
	return r;
762
}
763

764
static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
765
{
766
	unsigned long i;
767
	struct kvm_vcpu *vcpu;
768

769
	kvm_for_each_vcpu(i, vcpu, kvm) {
770
		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
771
	}
772
}
773

774
int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
775
{
776
	int r;
777

778
	if (cap->flags)
779
		return -EINVAL;
780

781
	switch (cap->cap) {
782
	case KVM_CAP_S390_IRQCHIP:
783
		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
784
		kvm->arch.use_irqchip = 1;
785
		r = 0;
786
		break;
787
	case KVM_CAP_S390_USER_SIGP:
788
		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
789
		kvm->arch.user_sigp = 1;
790
		r = 0;
791
		break;
792
	case KVM_CAP_S390_VECTOR_REGISTERS:
793
		mutex_lock(&kvm->lock);
794
		if (kvm->created_vcpus) {
795
			r = -EBUSY;
796
		} else if (cpu_has_vx()) {
797
			set_kvm_facility(kvm->arch.model.fac_mask, 129);
798
			set_kvm_facility(kvm->arch.model.fac_list, 129);
799
			if (test_facility(134)) {
800
				set_kvm_facility(kvm->arch.model.fac_mask, 134);
801
				set_kvm_facility(kvm->arch.model.fac_list, 134);
802
			}
803
			if (test_facility(135)) {
804
				set_kvm_facility(kvm->arch.model.fac_mask, 135);
805
				set_kvm_facility(kvm->arch.model.fac_list, 135);
806
			}
807
			if (test_facility(148)) {
808
				set_kvm_facility(kvm->arch.model.fac_mask, 148);
809
				set_kvm_facility(kvm->arch.model.fac_list, 148);
810
			}
811
			if (test_facility(152)) {
812
				set_kvm_facility(kvm->arch.model.fac_mask, 152);
813
				set_kvm_facility(kvm->arch.model.fac_list, 152);
814
			}
815
			if (test_facility(192)) {
816
				set_kvm_facility(kvm->arch.model.fac_mask, 192);
817
				set_kvm_facility(kvm->arch.model.fac_list, 192);
818
			}
819
			if (test_facility(198)) {
820
				set_kvm_facility(kvm->arch.model.fac_mask, 198);
821
				set_kvm_facility(kvm->arch.model.fac_list, 198);
822
			}
823
			if (test_facility(199)) {
824
				set_kvm_facility(kvm->arch.model.fac_mask, 199);
825
				set_kvm_facility(kvm->arch.model.fac_list, 199);
826
			}
827
			r = 0;
828
		} else
829
			r = -EINVAL;
830
		mutex_unlock(&kvm->lock);
831
		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
832
			 r ? "(not available)" : "(success)");
833
		break;
834
	case KVM_CAP_S390_RI:
835
		r = -EINVAL;
836
		mutex_lock(&kvm->lock);
837
		if (kvm->created_vcpus) {
838
			r = -EBUSY;
839
		} else if (test_facility(64)) {
840
			set_kvm_facility(kvm->arch.model.fac_mask, 64);
841
			set_kvm_facility(kvm->arch.model.fac_list, 64);
842
			r = 0;
843
		}
844
		mutex_unlock(&kvm->lock);
845
		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
846
			 r ? "(not available)" : "(success)");
847
		break;
848
	case KVM_CAP_S390_AIS:
849
		mutex_lock(&kvm->lock);
850
		if (kvm->created_vcpus) {
851
			r = -EBUSY;
852
		} else {
853
			set_kvm_facility(kvm->arch.model.fac_mask, 72);
854
			set_kvm_facility(kvm->arch.model.fac_list, 72);
855
			r = 0;
856
		}
857
		mutex_unlock(&kvm->lock);
858
		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
859
			 r ? "(not available)" : "(success)");
860
		break;
861
	case KVM_CAP_S390_GS:
862
		r = -EINVAL;
863
		mutex_lock(&kvm->lock);
864
		if (kvm->created_vcpus) {
865
			r = -EBUSY;
866
		} else if (test_facility(133)) {
867
			set_kvm_facility(kvm->arch.model.fac_mask, 133);
868
			set_kvm_facility(kvm->arch.model.fac_list, 133);
869
			r = 0;
870
		}
871
		mutex_unlock(&kvm->lock);
872
		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
873
			 r ? "(not available)" : "(success)");
874
		break;
875
	case KVM_CAP_S390_HPAGE_1M:
876
		mutex_lock(&kvm->lock);
877
		if (kvm->created_vcpus)
878
			r = -EBUSY;
879
		else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
880
			r = -EINVAL;
881
		else {
882
			r = 0;
883
			mmap_write_lock(kvm->mm);
884
			kvm->mm->context.allow_gmap_hpage_1m = 1;
885
			mmap_write_unlock(kvm->mm);
886
			/*
887
			 * We might have to create fake 4k page
888
			 * tables. To avoid that the hardware works on
889
			 * stale PGSTEs, we emulate these instructions.
890
			 */
891
			kvm->arch.use_skf = 0;
892
			kvm->arch.use_pfmfi = 0;
893
		}
894
		mutex_unlock(&kvm->lock);
895
		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
896
			 r ? "(not available)" : "(success)");
897
		break;
898
	case KVM_CAP_S390_USER_STSI:
899
		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
900
		kvm->arch.user_stsi = 1;
901
		r = 0;
902
		break;
903
	case KVM_CAP_S390_USER_INSTR0:
904
		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
905
		kvm->arch.user_instr0 = 1;
906
		icpt_operexc_on_all_vcpus(kvm);
907
		r = 0;
908
		break;
909
	case KVM_CAP_S390_CPU_TOPOLOGY:
910
		r = -EINVAL;
911
		mutex_lock(&kvm->lock);
912
		if (kvm->created_vcpus) {
913
			r = -EBUSY;
914
		} else if (test_facility(11)) {
915
			set_kvm_facility(kvm->arch.model.fac_mask, 11);
916
			set_kvm_facility(kvm->arch.model.fac_list, 11);
917
			r = 0;
918
		}
919
		mutex_unlock(&kvm->lock);
920
		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
921
			 r ? "(not available)" : "(success)");
922
		break;
923
	default:
924
		r = -EINVAL;
925
		break;
926
	}
927
	return r;
928
}
929

930
static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
931
{
932
	int ret;
933

934
	switch (attr->attr) {
935
	case KVM_S390_VM_MEM_LIMIT_SIZE:
936
		ret = 0;
937
		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
938
			 kvm->arch.mem_limit);
939
		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
940
			ret = -EFAULT;
941
		break;
942
	default:
943
		ret = -ENXIO;
944
		break;
945
	}
946
	return ret;
947
}
948

949
static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
950
{
951
	int ret;
952
	unsigned int idx;
953
	switch (attr->attr) {
954
	case KVM_S390_VM_MEM_ENABLE_CMMA:
955
		ret = -ENXIO;
956
		if (!sclp.has_cmma)
957
			break;
958

959
		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
960
		mutex_lock(&kvm->lock);
961
		if (kvm->created_vcpus)
962
			ret = -EBUSY;
963
		else if (kvm->mm->context.allow_gmap_hpage_1m)
964
			ret = -EINVAL;
965
		else {
966
			kvm->arch.use_cmma = 1;
967
			/* Not compatible with cmma. */
968
			kvm->arch.use_pfmfi = 0;
969
			ret = 0;
970
		}
971
		mutex_unlock(&kvm->lock);
972
		break;
973
	case KVM_S390_VM_MEM_CLR_CMMA:
974
		ret = -ENXIO;
975
		if (!sclp.has_cmma)
976
			break;
977
		ret = -EINVAL;
978
		if (!kvm->arch.use_cmma)
979
			break;
980

981
		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
982
		mutex_lock(&kvm->lock);
983
		idx = srcu_read_lock(&kvm->srcu);
984
		s390_reset_cmma(kvm->arch.gmap->mm);
985
		srcu_read_unlock(&kvm->srcu, idx);
986
		mutex_unlock(&kvm->lock);
987
		ret = 0;
988
		break;
989
	case KVM_S390_VM_MEM_LIMIT_SIZE: {
990
		unsigned long new_limit;
991

992
		if (kvm_is_ucontrol(kvm))
993
			return -EINVAL;
994

995
		if (get_user(new_limit, (u64 __user *)attr->addr))
996
			return -EFAULT;
997

998
		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
999
		    new_limit > kvm->arch.mem_limit)
1000
			return -E2BIG;
1001

1002
		if (!new_limit)
1003
			return -EINVAL;
1004

1005
		/* gmap_create takes last usable address */
1006
		if (new_limit != KVM_S390_NO_MEM_LIMIT)
1007
			new_limit -= 1;
1008

1009
		ret = -EBUSY;
1010
		mutex_lock(&kvm->lock);
1011
		if (!kvm->created_vcpus) {
1012
			/* gmap_create will round the limit up */
1013
			struct gmap *new = gmap_create(current->mm, new_limit);
1014

1015
			if (!new) {
1016
				ret = -ENOMEM;
1017
			} else {
1018
				gmap_remove(kvm->arch.gmap);
1019
				new->private = kvm;
1020
				kvm->arch.gmap = new;
1021
				ret = 0;
1022
			}
1023
		}
1024
		mutex_unlock(&kvm->lock);
1025
		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
1026
		VM_EVENT(kvm, 3, "New guest asce: 0x%p",
1027
			 (void *) kvm->arch.gmap->asce);
1028
		break;
1029
	}
1030
	default:
1031
		ret = -ENXIO;
1032
		break;
1033
	}
1034
	return ret;
1035
}
1036

1037
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
1038

1039
void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
1040
{
1041
	struct kvm_vcpu *vcpu;
1042
	unsigned long i;
1043

1044
	kvm_s390_vcpu_block_all(kvm);
1045

1046
	kvm_for_each_vcpu(i, vcpu, kvm) {
1047
		kvm_s390_vcpu_crypto_setup(vcpu);
1048
		/* recreate the shadow crycb by leaving the VSIE handler */
1049
		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1050
	}
1051

1052
	kvm_s390_vcpu_unblock_all(kvm);
1053
}
1054

1055
static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
1056
{
1057
	mutex_lock(&kvm->lock);
1058
	switch (attr->attr) {
1059
	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1060
		if (!test_kvm_facility(kvm, 76)) {
1061
			mutex_unlock(&kvm->lock);
1062
			return -EINVAL;
1063
		}
1064
		get_random_bytes(
1065
			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1066
			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1067
		kvm->arch.crypto.aes_kw = 1;
1068
		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
1069
		break;
1070
	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1071
		if (!test_kvm_facility(kvm, 76)) {
1072
			mutex_unlock(&kvm->lock);
1073
			return -EINVAL;
1074
		}
1075
		get_random_bytes(
1076
			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1077
			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1078
		kvm->arch.crypto.dea_kw = 1;
1079
		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
1080
		break;
1081
	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1082
		if (!test_kvm_facility(kvm, 76)) {
1083
			mutex_unlock(&kvm->lock);
1084
			return -EINVAL;
1085
		}
1086
		kvm->arch.crypto.aes_kw = 0;
1087
		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
1088
			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1089
		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
1090
		break;
1091
	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1092
		if (!test_kvm_facility(kvm, 76)) {
1093
			mutex_unlock(&kvm->lock);
1094
			return -EINVAL;
1095
		}
1096
		kvm->arch.crypto.dea_kw = 0;
1097
		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
1098
			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1099
		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1100
		break;
1101
	case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1102
		if (!ap_instructions_available()) {
1103
			mutex_unlock(&kvm->lock);
1104
			return -EOPNOTSUPP;
1105
		}
1106
		kvm->arch.crypto.apie = 1;
1107
		break;
1108
	case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1109
		if (!ap_instructions_available()) {
1110
			mutex_unlock(&kvm->lock);
1111
			return -EOPNOTSUPP;
1112
		}
1113
		kvm->arch.crypto.apie = 0;
1114
		break;
1115
	default:
1116
		mutex_unlock(&kvm->lock);
1117
		return -ENXIO;
1118
	}
1119

1120
	kvm_s390_vcpu_crypto_reset_all(kvm);
1121
	mutex_unlock(&kvm->lock);
1122
	return 0;
1123
}
1124

1125
static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
1126
{
1127
	/* Only set the ECB bits after guest requests zPCI interpretation */
1128
	if (!vcpu->kvm->arch.use_zpci_interp)
1129
		return;
1130

1131
	vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
1132
	vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
1133
}
1134

1135
void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
1136
{
1137
	struct kvm_vcpu *vcpu;
1138
	unsigned long i;
1139

1140
	lockdep_assert_held(&kvm->lock);
1141

1142
	if (!kvm_s390_pci_interp_allowed())
1143
		return;
1144

1145
	/*
1146
	 * If host is configured for PCI and the necessary facilities are
1147
	 * available, turn on interpretation for the life of this guest
1148
	 */
1149
	kvm->arch.use_zpci_interp = 1;
1150

1151
	kvm_s390_vcpu_block_all(kvm);
1152

1153
	kvm_for_each_vcpu(i, vcpu, kvm) {
1154
		kvm_s390_vcpu_pci_setup(vcpu);
1155
		kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1156
	}
1157

1158
	kvm_s390_vcpu_unblock_all(kvm);
1159
}
1160

1161
static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1162
{
1163
	unsigned long cx;
1164
	struct kvm_vcpu *vcpu;
1165

1166
	kvm_for_each_vcpu(cx, vcpu, kvm)
1167
		kvm_s390_sync_request(req, vcpu);
1168
}
1169

1170
/*
1171
 * Must be called with kvm->srcu held to avoid races on memslots, and with
1172
 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1173
 */
1174
static int kvm_s390_vm_start_migration(struct kvm *kvm)
1175
{
1176
	struct kvm_memory_slot *ms;
1177
	struct kvm_memslots *slots;
1178
	unsigned long ram_pages = 0;
1179
	int bkt;
1180

1181
	/* migration mode already enabled */
1182
	if (kvm->arch.migration_mode)
1183
		return 0;
1184
	slots = kvm_memslots(kvm);
1185
	if (!slots || kvm_memslots_empty(slots))
1186
		return -EINVAL;
1187

1188
	if (!kvm->arch.use_cmma) {
1189
		kvm->arch.migration_mode = 1;
1190
		return 0;
1191
	}
1192
	/* mark all the pages in active slots as dirty */
1193
	kvm_for_each_memslot(ms, bkt, slots) {
1194
		if (!ms->dirty_bitmap)
1195
			return -EINVAL;
1196
		/*
1197
		 * The second half of the bitmap is only used on x86,
1198
		 * and would be wasted otherwise, so we put it to good
1199
		 * use here to keep track of the state of the storage
1200
		 * attributes.
1201
		 */
1202
		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1203
		ram_pages += ms->npages;
1204
	}
1205
	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1206
	kvm->arch.migration_mode = 1;
1207
	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1208
	return 0;
1209
}
1210

1211
/*
1212
 * Must be called with kvm->slots_lock to avoid races with ourselves and
1213
 * kvm_s390_vm_start_migration.
1214
 */
1215
static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1216
{
1217
	/* migration mode already disabled */
1218
	if (!kvm->arch.migration_mode)
1219
		return 0;
1220
	kvm->arch.migration_mode = 0;
1221
	if (kvm->arch.use_cmma)
1222
		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1223
	return 0;
1224
}
1225

1226
static int kvm_s390_vm_set_migration(struct kvm *kvm,
1227
				     struct kvm_device_attr *attr)
1228
{
1229
	int res = -ENXIO;
1230

1231
	mutex_lock(&kvm->slots_lock);
1232
	switch (attr->attr) {
1233
	case KVM_S390_VM_MIGRATION_START:
1234
		res = kvm_s390_vm_start_migration(kvm);
1235
		break;
1236
	case KVM_S390_VM_MIGRATION_STOP:
1237
		res = kvm_s390_vm_stop_migration(kvm);
1238
		break;
1239
	default:
1240
		break;
1241
	}
1242
	mutex_unlock(&kvm->slots_lock);
1243

1244
	return res;
1245
}
1246

1247
static int kvm_s390_vm_get_migration(struct kvm *kvm,
1248
				     struct kvm_device_attr *attr)
1249
{
1250
	u64 mig = kvm->arch.migration_mode;
1251

1252
	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1253
		return -ENXIO;
1254

1255
	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1256
		return -EFAULT;
1257
	return 0;
1258
}
1259

1260
static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
1261

1262
static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1263
{
1264
	struct kvm_s390_vm_tod_clock gtod;
1265

1266
	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1267
		return -EFAULT;
1268

1269
	if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1270
		return -EINVAL;
1271
	__kvm_s390_set_tod_clock(kvm, &gtod);
1272

1273
	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1274
		gtod.epoch_idx, gtod.tod);
1275

1276
	return 0;
1277
}
1278

1279
static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1280
{
1281
	u8 gtod_high;
1282

1283
	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1284
					   sizeof(gtod_high)))
1285
		return -EFAULT;
1286

1287
	if (gtod_high != 0)
1288
		return -EINVAL;
1289
	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1290

1291
	return 0;
1292
}
1293

1294
static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1295
{
1296
	struct kvm_s390_vm_tod_clock gtod = { 0 };
1297

1298
	if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1299
			   sizeof(gtod.tod)))
1300
		return -EFAULT;
1301

1302
	__kvm_s390_set_tod_clock(kvm, &gtod);
1303
	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1304
	return 0;
1305
}
1306

1307
static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1308
{
1309
	int ret;
1310

1311
	if (attr->flags)
1312
		return -EINVAL;
1313

1314
	mutex_lock(&kvm->lock);
1315
	/*
1316
	 * For protected guests, the TOD is managed by the ultravisor, so trying
1317
	 * to change it will never bring the expected results.
1318
	 */
1319
	if (kvm_s390_pv_is_protected(kvm)) {
1320
		ret = -EOPNOTSUPP;
1321
		goto out_unlock;
1322
	}
1323

1324
	switch (attr->attr) {
1325
	case KVM_S390_VM_TOD_EXT:
1326
		ret = kvm_s390_set_tod_ext(kvm, attr);
1327
		break;
1328
	case KVM_S390_VM_TOD_HIGH:
1329
		ret = kvm_s390_set_tod_high(kvm, attr);
1330
		break;
1331
	case KVM_S390_VM_TOD_LOW:
1332
		ret = kvm_s390_set_tod_low(kvm, attr);
1333
		break;
1334
	default:
1335
		ret = -ENXIO;
1336
		break;
1337
	}
1338

1339
out_unlock:
1340
	mutex_unlock(&kvm->lock);
1341
	return ret;
1342
}
1343

1344
static void kvm_s390_get_tod_clock(struct kvm *kvm,
1345
				   struct kvm_s390_vm_tod_clock *gtod)
1346
{
1347
	union tod_clock clk;
1348

1349
	preempt_disable();
1350

1351
	store_tod_clock_ext(&clk);
1352

1353
	gtod->tod = clk.tod + kvm->arch.epoch;
1354
	gtod->epoch_idx = 0;
1355
	if (test_kvm_facility(kvm, 139)) {
1356
		gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1357
		if (gtod->tod < clk.tod)
1358
			gtod->epoch_idx += 1;
1359
	}
1360

1361
	preempt_enable();
1362
}
1363

1364
static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1365
{
1366
	struct kvm_s390_vm_tod_clock gtod;
1367

1368
	memset(&gtod, 0, sizeof(gtod));
1369
	kvm_s390_get_tod_clock(kvm, &gtod);
1370
	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1371
		return -EFAULT;
1372

1373
	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1374
		gtod.epoch_idx, gtod.tod);
1375
	return 0;
1376
}
1377

1378
static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1379
{
1380
	u8 gtod_high = 0;
1381

1382
	if (copy_to_user((void __user *)attr->addr, &gtod_high,
1383
					 sizeof(gtod_high)))
1384
		return -EFAULT;
1385
	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1386

1387
	return 0;
1388
}
1389

1390
static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1391
{
1392
	u64 gtod;
1393

1394
	gtod = kvm_s390_get_tod_clock_fast(kvm);
1395
	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1396
		return -EFAULT;
1397
	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1398

1399
	return 0;
1400
}
1401

1402
static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1403
{
1404
	int ret;
1405

1406
	if (attr->flags)
1407
		return -EINVAL;
1408

1409
	switch (attr->attr) {
1410
	case KVM_S390_VM_TOD_EXT:
1411
		ret = kvm_s390_get_tod_ext(kvm, attr);
1412
		break;
1413
	case KVM_S390_VM_TOD_HIGH:
1414
		ret = kvm_s390_get_tod_high(kvm, attr);
1415
		break;
1416
	case KVM_S390_VM_TOD_LOW:
1417
		ret = kvm_s390_get_tod_low(kvm, attr);
1418
		break;
1419
	default:
1420
		ret = -ENXIO;
1421
		break;
1422
	}
1423
	return ret;
1424
}
1425

1426
static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1427
{
1428
	struct kvm_s390_vm_cpu_processor *proc;
1429
	u16 lowest_ibc, unblocked_ibc;
1430
	int ret = 0;
1431

1432
	mutex_lock(&kvm->lock);
1433
	if (kvm->created_vcpus) {
1434
		ret = -EBUSY;
1435
		goto out;
1436
	}
1437
	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1438
	if (!proc) {
1439
		ret = -ENOMEM;
1440
		goto out;
1441
	}
1442
	if (!copy_from_user(proc, (void __user *)attr->addr,
1443
			    sizeof(*proc))) {
1444
		kvm->arch.model.cpuid = proc->cpuid;
1445
		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1446
		unblocked_ibc = sclp.ibc & 0xfff;
1447
		if (lowest_ibc && proc->ibc) {
1448
			if (proc->ibc > unblocked_ibc)
1449
				kvm->arch.model.ibc = unblocked_ibc;
1450
			else if (proc->ibc < lowest_ibc)
1451
				kvm->arch.model.ibc = lowest_ibc;
1452
			else
1453
				kvm->arch.model.ibc = proc->ibc;
1454
		}
1455
		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1456
		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1457
		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1458
			 kvm->arch.model.ibc,
1459
			 kvm->arch.model.cpuid);
1460
		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1461
			 kvm->arch.model.fac_list[0],
1462
			 kvm->arch.model.fac_list[1],
1463
			 kvm->arch.model.fac_list[2]);
1464
	} else
1465
		ret = -EFAULT;
1466
	kfree(proc);
1467
out:
1468
	mutex_unlock(&kvm->lock);
1469
	return ret;
1470
}
1471

1472
static int kvm_s390_set_processor_feat(struct kvm *kvm,
1473
				       struct kvm_device_attr *attr)
1474
{
1475
	struct kvm_s390_vm_cpu_feat data;
1476

1477
	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1478
		return -EFAULT;
1479
	if (!bitmap_subset((unsigned long *) data.feat,
1480
			   kvm_s390_available_cpu_feat,
1481
			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1482
		return -EINVAL;
1483

1484
	mutex_lock(&kvm->lock);
1485
	if (kvm->created_vcpus) {
1486
		mutex_unlock(&kvm->lock);
1487
		return -EBUSY;
1488
	}
1489
	bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1490
	mutex_unlock(&kvm->lock);
1491
	VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1492
			 data.feat[0],
1493
			 data.feat[1],
1494
			 data.feat[2]);
1495
	return 0;
1496
}
1497

1498
static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1499
					  struct kvm_device_attr *attr)
1500
{
1501
	mutex_lock(&kvm->lock);
1502
	if (kvm->created_vcpus) {
1503
		mutex_unlock(&kvm->lock);
1504
		return -EBUSY;
1505
	}
1506

1507
	if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1508
			   sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1509
		mutex_unlock(&kvm->lock);
1510
		return -EFAULT;
1511
	}
1512
	mutex_unlock(&kvm->lock);
1513

1514
	VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1515
		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1516
		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1517
		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1518
		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1519
	VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1520
		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1521
		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1522
	VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1523
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1524
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1525
	VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1526
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1527
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1528
	VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1529
		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1530
		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1531
	VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1532
		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1533
		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1534
	VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1535
		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1536
		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1537
	VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1538
		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1539
		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1540
	VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1541
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1542
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1543
	VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1544
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1545
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1546
	VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1547
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1548
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1549
	VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1550
		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1551
		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1552
	VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1553
		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1554
		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1555
	VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1556
		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1557
		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1558
	VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1559
		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1560
		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1561
	VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1562
		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1563
		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1564
		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1565
		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1566
	VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1567
		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1568
		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1569
		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1570
		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1571
	VM_EVENT(kvm, 3, "GET: guest PFCR   subfunc 0x%16.16lx.%16.16lx",
1572
		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1573
		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1574

1575
	return 0;
1576
}
1577

1578
#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK	\
1579
(						\
1580
	((struct kvm_s390_vm_cpu_uv_feat){	\
1581
		.ap = 1,			\
1582
		.ap_intr = 1,			\
1583
	})					\
1584
	.feat					\
1585
)
1586

1587
static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1588
{
1589
	struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
1590
	unsigned long data, filter;
1591

1592
	filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
1593
	if (get_user(data, &ptr->feat))
1594
		return -EFAULT;
1595
	if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
1596
		return -EINVAL;
1597

1598
	mutex_lock(&kvm->lock);
1599
	if (kvm->created_vcpus) {
1600
		mutex_unlock(&kvm->lock);
1601
		return -EBUSY;
1602
	}
1603
	kvm->arch.model.uv_feat_guest.feat = data;
1604
	mutex_unlock(&kvm->lock);
1605

1606
	VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
1607

1608
	return 0;
1609
}
1610

1611
static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1612
{
1613
	int ret = -ENXIO;
1614

1615
	switch (attr->attr) {
1616
	case KVM_S390_VM_CPU_PROCESSOR:
1617
		ret = kvm_s390_set_processor(kvm, attr);
1618
		break;
1619
	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1620
		ret = kvm_s390_set_processor_feat(kvm, attr);
1621
		break;
1622
	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1623
		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1624
		break;
1625
	case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
1626
		ret = kvm_s390_set_uv_feat(kvm, attr);
1627
		break;
1628
	}
1629
	return ret;
1630
}
1631

1632
static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1633
{
1634
	struct kvm_s390_vm_cpu_processor *proc;
1635
	int ret = 0;
1636

1637
	proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1638
	if (!proc) {
1639
		ret = -ENOMEM;
1640
		goto out;
1641
	}
1642
	proc->cpuid = kvm->arch.model.cpuid;
1643
	proc->ibc = kvm->arch.model.ibc;
1644
	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1645
	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1646
	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1647
		 kvm->arch.model.ibc,
1648
		 kvm->arch.model.cpuid);
1649
	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1650
		 kvm->arch.model.fac_list[0],
1651
		 kvm->arch.model.fac_list[1],
1652
		 kvm->arch.model.fac_list[2]);
1653
	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1654
		ret = -EFAULT;
1655
	kfree(proc);
1656
out:
1657
	return ret;
1658
}
1659

1660
static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1661
{
1662
	struct kvm_s390_vm_cpu_machine *mach;
1663
	int ret = 0;
1664

1665
	mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1666
	if (!mach) {
1667
		ret = -ENOMEM;
1668
		goto out;
1669
	}
1670
	get_cpu_id((struct cpuid *) &mach->cpuid);
1671
	mach->ibc = sclp.ibc;
1672
	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1673
	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1674
	memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1675
	       sizeof(stfle_fac_list));
1676
	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1677
		 kvm->arch.model.ibc,
1678
		 kvm->arch.model.cpuid);
1679
	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1680
		 mach->fac_mask[0],
1681
		 mach->fac_mask[1],
1682
		 mach->fac_mask[2]);
1683
	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1684
		 mach->fac_list[0],
1685
		 mach->fac_list[1],
1686
		 mach->fac_list[2]);
1687
	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1688
		ret = -EFAULT;
1689
	kfree(mach);
1690
out:
1691
	return ret;
1692
}
1693

1694
static int kvm_s390_get_processor_feat(struct kvm *kvm,
1695
				       struct kvm_device_attr *attr)
1696
{
1697
	struct kvm_s390_vm_cpu_feat data;
1698

1699
	bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1700
	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1701
		return -EFAULT;
1702
	VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1703
			 data.feat[0],
1704
			 data.feat[1],
1705
			 data.feat[2]);
1706
	return 0;
1707
}
1708

1709
static int kvm_s390_get_machine_feat(struct kvm *kvm,
1710
				     struct kvm_device_attr *attr)
1711
{
1712
	struct kvm_s390_vm_cpu_feat data;
1713

1714
	bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1715
	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1716
		return -EFAULT;
1717
	VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1718
			 data.feat[0],
1719
			 data.feat[1],
1720
			 data.feat[2]);
1721
	return 0;
1722
}
1723

1724
static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1725
					  struct kvm_device_attr *attr)
1726
{
1727
	if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1728
	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1729
		return -EFAULT;
1730

1731
	VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1732
		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1733
		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1734
		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1735
		 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1736
	VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1737
		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1738
		 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1739
	VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1740
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1741
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1742
	VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1743
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1744
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1745
	VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1746
		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1747
		 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1748
	VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1749
		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1750
		 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1751
	VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1752
		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1753
		 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1754
	VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1755
		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1756
		 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1757
	VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1758
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1759
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1760
	VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1761
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1762
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1763
	VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1764
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1765
		 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1766
	VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1767
		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1768
		 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1769
	VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1770
		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1771
		 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1772
	VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1773
		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1774
		 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1775
	VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1776
		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1777
		 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1778
	VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1779
		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1780
		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1781
		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1782
		 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1783
	VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1784
		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1785
		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1786
		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1787
		 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1788
	VM_EVENT(kvm, 3, "GET: guest PFCR   subfunc 0x%16.16lx.%16.16lx",
1789
		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1790
		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1791

1792
	return 0;
1793
}
1794

1795
static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1796
					struct kvm_device_attr *attr)
1797
{
1798
	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1799
	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1800
		return -EFAULT;
1801

1802
	VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1803
		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1804
		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1805
		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1806
		 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1807
	VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1808
		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1809
		 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1810
	VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1811
		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1812
		 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1813
	VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1814
		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1815
		 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1816
	VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1817
		 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1818
		 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1819
	VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1820
		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1821
		 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1822
	VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1823
		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1824
		 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1825
	VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1826
		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1827
		 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1828
	VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1829
		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1830
		 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1831
	VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1832
		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1833
		 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1834
	VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1835
		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1836
		 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1837
	VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1838
		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1839
		 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1840
	VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1841
		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1842
		 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1843
	VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1844
		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1845
		 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1846
	VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1847
		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1848
		 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1849
	VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1850
		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1851
		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1852
		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1853
		 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1854
	VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1855
		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1856
		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1857
		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1858
		 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1859
	VM_EVENT(kvm, 3, "GET: host  PFCR   subfunc 0x%16.16lx.%16.16lx",
1860
		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[0],
1861
		 ((unsigned long *) &kvm_s390_available_subfunc.pfcr)[1]);
1862

1863
	return 0;
1864
}
1865

1866
static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1867
{
1868
	struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
1869
	unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
1870

1871
	if (put_user(feat, &dst->feat))
1872
		return -EFAULT;
1873
	VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
1874

1875
	return 0;
1876
}
1877

1878
static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
1879
{
1880
	struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
1881
	unsigned long feat;
1882

1883
	BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
1884

1885
	feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
1886
	if (put_user(feat, &dst->feat))
1887
		return -EFAULT;
1888
	VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
1889

1890
	return 0;
1891
}
1892

1893
static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1894
{
1895
	int ret = -ENXIO;
1896

1897
	switch (attr->attr) {
1898
	case KVM_S390_VM_CPU_PROCESSOR:
1899
		ret = kvm_s390_get_processor(kvm, attr);
1900
		break;
1901
	case KVM_S390_VM_CPU_MACHINE:
1902
		ret = kvm_s390_get_machine(kvm, attr);
1903
		break;
1904
	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1905
		ret = kvm_s390_get_processor_feat(kvm, attr);
1906
		break;
1907
	case KVM_S390_VM_CPU_MACHINE_FEAT:
1908
		ret = kvm_s390_get_machine_feat(kvm, attr);
1909
		break;
1910
	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1911
		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1912
		break;
1913
	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1914
		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1915
		break;
1916
	case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
1917
		ret = kvm_s390_get_processor_uv_feat(kvm, attr);
1918
		break;
1919
	case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
1920
		ret = kvm_s390_get_machine_uv_feat(kvm, attr);
1921
		break;
1922
	}
1923
	return ret;
1924
}
1925

1926
/**
1927
 * kvm_s390_update_topology_change_report - update CPU topology change report
1928
 * @kvm: guest KVM description
1929
 * @val: set or clear the MTCR bit
1930
 *
1931
 * Updates the Multiprocessor Topology-Change-Report bit to signal
1932
 * the guest with a topology change.
1933
 * This is only relevant if the topology facility is present.
1934
 *
1935
 * The SCA version, bsca or esca, doesn't matter as offset is the same.
1936
 */
1937
static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
1938
{
1939
	union sca_utility new, old;
1940
	struct bsca_block *sca;
1941

1942
	read_lock(&kvm->arch.sca_lock);
1943
	sca = kvm->arch.sca;
1944
	old = READ_ONCE(sca->utility);
1945
	do {
1946
		new = old;
1947
		new.mtcr = val;
1948
	} while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
1949
	read_unlock(&kvm->arch.sca_lock);
1950
}
1951

1952
static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
1953
					       struct kvm_device_attr *attr)
1954
{
1955
	if (!test_kvm_facility(kvm, 11))
1956
		return -ENXIO;
1957

1958
	kvm_s390_update_topology_change_report(kvm, !!attr->attr);
1959
	return 0;
1960
}
1961

1962
static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
1963
					       struct kvm_device_attr *attr)
1964
{
1965
	u8 topo;
1966

1967
	if (!test_kvm_facility(kvm, 11))
1968
		return -ENXIO;
1969

1970
	read_lock(&kvm->arch.sca_lock);
1971
	topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
1972
	read_unlock(&kvm->arch.sca_lock);
1973

1974
	return put_user(topo, (u8 __user *)attr->addr);
1975
}
1976

1977
static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1978
{
1979
	int ret;
1980

1981
	switch (attr->group) {
1982
	case KVM_S390_VM_MEM_CTRL:
1983
		ret = kvm_s390_set_mem_control(kvm, attr);
1984
		break;
1985
	case KVM_S390_VM_TOD:
1986
		ret = kvm_s390_set_tod(kvm, attr);
1987
		break;
1988
	case KVM_S390_VM_CPU_MODEL:
1989
		ret = kvm_s390_set_cpu_model(kvm, attr);
1990
		break;
1991
	case KVM_S390_VM_CRYPTO:
1992
		ret = kvm_s390_vm_set_crypto(kvm, attr);
1993
		break;
1994
	case KVM_S390_VM_MIGRATION:
1995
		ret = kvm_s390_vm_set_migration(kvm, attr);
1996
		break;
1997
	case KVM_S390_VM_CPU_TOPOLOGY:
1998
		ret = kvm_s390_set_topo_change_indication(kvm, attr);
1999
		break;
2000
	default:
2001
		ret = -ENXIO;
2002
		break;
2003
	}
2004

2005
	return ret;
2006
}
2007

2008
static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
2009
{
2010
	int ret;
2011

2012
	switch (attr->group) {
2013
	case KVM_S390_VM_MEM_CTRL:
2014
		ret = kvm_s390_get_mem_control(kvm, attr);
2015
		break;
2016
	case KVM_S390_VM_TOD:
2017
		ret = kvm_s390_get_tod(kvm, attr);
2018
		break;
2019
	case KVM_S390_VM_CPU_MODEL:
2020
		ret = kvm_s390_get_cpu_model(kvm, attr);
2021
		break;
2022
	case KVM_S390_VM_MIGRATION:
2023
		ret = kvm_s390_vm_get_migration(kvm, attr);
2024
		break;
2025
	case KVM_S390_VM_CPU_TOPOLOGY:
2026
		ret = kvm_s390_get_topo_change_indication(kvm, attr);
2027
		break;
2028
	default:
2029
		ret = -ENXIO;
2030
		break;
2031
	}
2032

2033
	return ret;
2034
}
2035

2036
static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
2037
{
2038
	int ret;
2039

2040
	switch (attr->group) {
2041
	case KVM_S390_VM_MEM_CTRL:
2042
		switch (attr->attr) {
2043
		case KVM_S390_VM_MEM_ENABLE_CMMA:
2044
		case KVM_S390_VM_MEM_CLR_CMMA:
2045
			ret = sclp.has_cmma ? 0 : -ENXIO;
2046
			break;
2047
		case KVM_S390_VM_MEM_LIMIT_SIZE:
2048
			ret = 0;
2049
			break;
2050
		default:
2051
			ret = -ENXIO;
2052
			break;
2053
		}
2054
		break;
2055
	case KVM_S390_VM_TOD:
2056
		switch (attr->attr) {
2057
		case KVM_S390_VM_TOD_LOW:
2058
		case KVM_S390_VM_TOD_HIGH:
2059
			ret = 0;
2060
			break;
2061
		default:
2062
			ret = -ENXIO;
2063
			break;
2064
		}
2065
		break;
2066
	case KVM_S390_VM_CPU_MODEL:
2067
		switch (attr->attr) {
2068
		case KVM_S390_VM_CPU_PROCESSOR:
2069
		case KVM_S390_VM_CPU_MACHINE:
2070
		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
2071
		case KVM_S390_VM_CPU_MACHINE_FEAT:
2072
		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
2073
		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
2074
		case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
2075
		case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
2076
			ret = 0;
2077
			break;
2078
		default:
2079
			ret = -ENXIO;
2080
			break;
2081
		}
2082
		break;
2083
	case KVM_S390_VM_CRYPTO:
2084
		switch (attr->attr) {
2085
		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
2086
		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
2087
		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
2088
		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
2089
			ret = 0;
2090
			break;
2091
		case KVM_S390_VM_CRYPTO_ENABLE_APIE:
2092
		case KVM_S390_VM_CRYPTO_DISABLE_APIE:
2093
			ret = ap_instructions_available() ? 0 : -ENXIO;
2094
			break;
2095
		default:
2096
			ret = -ENXIO;
2097
			break;
2098
		}
2099
		break;
2100
	case KVM_S390_VM_MIGRATION:
2101
		ret = 0;
2102
		break;
2103
	case KVM_S390_VM_CPU_TOPOLOGY:
2104
		ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
2105
		break;
2106
	default:
2107
		ret = -ENXIO;
2108
		break;
2109
	}
2110

2111
	return ret;
2112
}
2113

2114
static int kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2115
{
2116
	uint8_t *keys;
2117
	uint64_t hva;
2118
	int srcu_idx, i, r = 0;
2119

2120
	if (args->flags != 0)
2121
		return -EINVAL;
2122

2123
	/* Is this guest using storage keys? */
2124
	if (!mm_uses_skeys(current->mm))
2125
		return KVM_S390_GET_SKEYS_NONE;
2126

2127
	/* Enforce sane limit on memory allocation */
2128
	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2129
		return -EINVAL;
2130

2131
	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2132
	if (!keys)
2133
		return -ENOMEM;
2134

2135
	mmap_read_lock(current->mm);
2136
	srcu_idx = srcu_read_lock(&kvm->srcu);
2137
	for (i = 0; i < args->count; i++) {
2138
		hva = gfn_to_hva(kvm, args->start_gfn + i);
2139
		if (kvm_is_error_hva(hva)) {
2140
			r = -EFAULT;
2141
			break;
2142
		}
2143

2144
		r = get_guest_storage_key(current->mm, hva, &keys[i]);
2145
		if (r)
2146
			break;
2147
	}
2148
	srcu_read_unlock(&kvm->srcu, srcu_idx);
2149
	mmap_read_unlock(current->mm);
2150

2151
	if (!r) {
2152
		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
2153
				 sizeof(uint8_t) * args->count);
2154
		if (r)
2155
			r = -EFAULT;
2156
	}
2157

2158
	kvfree(keys);
2159
	return r;
2160
}
2161

2162
static int kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2163
{
2164
	uint8_t *keys;
2165
	uint64_t hva;
2166
	int srcu_idx, i, r = 0;
2167
	bool unlocked;
2168

2169
	if (args->flags != 0)
2170
		return -EINVAL;
2171

2172
	/* Enforce sane limit on memory allocation */
2173
	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2174
		return -EINVAL;
2175

2176
	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2177
	if (!keys)
2178
		return -ENOMEM;
2179

2180
	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
2181
			   sizeof(uint8_t) * args->count);
2182
	if (r) {
2183
		r = -EFAULT;
2184
		goto out;
2185
	}
2186

2187
	/* Enable storage key handling for the guest */
2188
	r = s390_enable_skey();
2189
	if (r)
2190
		goto out;
2191

2192
	i = 0;
2193
	mmap_read_lock(current->mm);
2194
	srcu_idx = srcu_read_lock(&kvm->srcu);
2195
        while (i < args->count) {
2196
		unlocked = false;
2197
		hva = gfn_to_hva(kvm, args->start_gfn + i);
2198
		if (kvm_is_error_hva(hva)) {
2199
			r = -EFAULT;
2200
			break;
2201
		}
2202

2203
		/* Lowest order bit is reserved */
2204
		if (keys[i] & 0x01) {
2205
			r = -EINVAL;
2206
			break;
2207
		}
2208

2209
		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
2210
		if (r) {
2211
			r = fixup_user_fault(current->mm, hva,
2212
					     FAULT_FLAG_WRITE, &unlocked);
2213
			if (r)
2214
				break;
2215
		}
2216
		if (!r)
2217
			i++;
2218
	}
2219
	srcu_read_unlock(&kvm->srcu, srcu_idx);
2220
	mmap_read_unlock(current->mm);
2221
out:
2222
	kvfree(keys);
2223
	return r;
2224
}
2225

2226
/*
2227
 * Base address and length must be sent at the start of each block, therefore
2228
 * it's cheaper to send some clean data, as long as it's less than the size of
2229
 * two longs.
2230
 */
2231
#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
2232
/* for consistency */
2233
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
2234

2235
static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2236
			      u8 *res, unsigned long bufsize)
2237
{
2238
	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2239

2240
	args->count = 0;
2241
	while (args->count < bufsize) {
2242
		hva = gfn_to_hva(kvm, cur_gfn);
2243
		/*
2244
		 * We return an error if the first value was invalid, but we
2245
		 * return successfully if at least one value was copied.
2246
		 */
2247
		if (kvm_is_error_hva(hva))
2248
			return args->count ? 0 : -EFAULT;
2249
		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2250
			pgstev = 0;
2251
		res[args->count++] = (pgstev >> 24) & 0x43;
2252
		cur_gfn++;
2253
	}
2254

2255
	return 0;
2256
}
2257

2258
static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
2259
						     gfn_t gfn)
2260
{
2261
	return ____gfn_to_memslot(slots, gfn, true);
2262
}
2263

2264
static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2265
					      unsigned long cur_gfn)
2266
{
2267
	struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
2268
	unsigned long ofs = cur_gfn - ms->base_gfn;
2269
	struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
2270

2271
	if (ms->base_gfn + ms->npages <= cur_gfn) {
2272
		mnode = rb_next(mnode);
2273
		/* If we are above the highest slot, wrap around */
2274
		if (!mnode)
2275
			mnode = rb_first(&slots->gfn_tree);
2276

2277
		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2278
		ofs = 0;
2279
	}
2280

2281
	if (cur_gfn < ms->base_gfn)
2282
		ofs = 0;
2283

2284
	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2285
	while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
2286
		ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2287
		ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
2288
	}
2289
	return ms->base_gfn + ofs;
2290
}
2291

2292
static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2293
			     u8 *res, unsigned long bufsize)
2294
{
2295
	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2296
	struct kvm_memslots *slots = kvm_memslots(kvm);
2297
	struct kvm_memory_slot *ms;
2298

2299
	if (unlikely(kvm_memslots_empty(slots)))
2300
		return 0;
2301

2302
	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2303
	ms = gfn_to_memslot(kvm, cur_gfn);
2304
	args->count = 0;
2305
	args->start_gfn = cur_gfn;
2306
	if (!ms)
2307
		return 0;
2308
	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2309
	mem_end = kvm_s390_get_gfn_end(slots);
2310

2311
	while (args->count < bufsize) {
2312
		hva = gfn_to_hva(kvm, cur_gfn);
2313
		if (kvm_is_error_hva(hva))
2314
			return 0;
2315
		/* Decrement only if we actually flipped the bit to 0 */
2316
		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2317
			atomic64_dec(&kvm->arch.cmma_dirty_pages);
2318
		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2319
			pgstev = 0;
2320
		/* Save the value */
2321
		res[args->count++] = (pgstev >> 24) & 0x43;
2322
		/* If the next bit is too far away, stop. */
2323
		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2324
			return 0;
2325
		/* If we reached the previous "next", find the next one */
2326
		if (cur_gfn == next_gfn)
2327
			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2328
		/* Reached the end of memory or of the buffer, stop */
2329
		if ((next_gfn >= mem_end) ||
2330
		    (next_gfn - args->start_gfn >= bufsize))
2331
			return 0;
2332
		cur_gfn++;
2333
		/* Reached the end of the current memslot, take the next one. */
2334
		if (cur_gfn - ms->base_gfn >= ms->npages) {
2335
			ms = gfn_to_memslot(kvm, cur_gfn);
2336
			if (!ms)
2337
				return 0;
2338
		}
2339
	}
2340
	return 0;
2341
}
2342

2343
/*
2344
 * This function searches for the next page with dirty CMMA attributes, and
2345
 * saves the attributes in the buffer up to either the end of the buffer or
2346
 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2347
 * no trailing clean bytes are saved.
2348
 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2349
 * output buffer will indicate 0 as length.
2350
 */
2351
static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2352
				  struct kvm_s390_cmma_log *args)
2353
{
2354
	unsigned long bufsize;
2355
	int srcu_idx, peek, ret;
2356
	u8 *values;
2357

2358
	if (!kvm->arch.use_cmma)
2359
		return -ENXIO;
2360
	/* Invalid/unsupported flags were specified */
2361
	if (args->flags & ~KVM_S390_CMMA_PEEK)
2362
		return -EINVAL;
2363
	/* Migration mode query, and we are not doing a migration */
2364
	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2365
	if (!peek && !kvm->arch.migration_mode)
2366
		return -EINVAL;
2367
	/* CMMA is disabled or was not used, or the buffer has length zero */
2368
	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2369
	if (!bufsize || !kvm->mm->context.uses_cmm) {
2370
		memset(args, 0, sizeof(*args));
2371
		return 0;
2372
	}
2373
	/* We are not peeking, and there are no dirty pages */
2374
	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2375
		memset(args, 0, sizeof(*args));
2376
		return 0;
2377
	}
2378

2379
	values = vmalloc(bufsize);
2380
	if (!values)
2381
		return -ENOMEM;
2382

2383
	mmap_read_lock(kvm->mm);
2384
	srcu_idx = srcu_read_lock(&kvm->srcu);
2385
	if (peek)
2386
		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2387
	else
2388
		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2389
	srcu_read_unlock(&kvm->srcu, srcu_idx);
2390
	mmap_read_unlock(kvm->mm);
2391

2392
	if (kvm->arch.migration_mode)
2393
		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2394
	else
2395
		args->remaining = 0;
2396

2397
	if (copy_to_user((void __user *)args->values, values, args->count))
2398
		ret = -EFAULT;
2399

2400
	vfree(values);
2401
	return ret;
2402
}
2403

2404
/*
2405
 * This function sets the CMMA attributes for the given pages. If the input
2406
 * buffer has zero length, no action is taken, otherwise the attributes are
2407
 * set and the mm->context.uses_cmm flag is set.
2408
 */
2409
static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2410
				  const struct kvm_s390_cmma_log *args)
2411
{
2412
	unsigned long hva, mask, pgstev, i;
2413
	uint8_t *bits;
2414
	int srcu_idx, r = 0;
2415

2416
	mask = args->mask;
2417

2418
	if (!kvm->arch.use_cmma)
2419
		return -ENXIO;
2420
	/* invalid/unsupported flags */
2421
	if (args->flags != 0)
2422
		return -EINVAL;
2423
	/* Enforce sane limit on memory allocation */
2424
	if (args->count > KVM_S390_CMMA_SIZE_MAX)
2425
		return -EINVAL;
2426
	/* Nothing to do */
2427
	if (args->count == 0)
2428
		return 0;
2429

2430
	bits = vmalloc(array_size(sizeof(*bits), args->count));
2431
	if (!bits)
2432
		return -ENOMEM;
2433

2434
	r = copy_from_user(bits, (void __user *)args->values, args->count);
2435
	if (r) {
2436
		r = -EFAULT;
2437
		goto out;
2438
	}
2439

2440
	mmap_read_lock(kvm->mm);
2441
	srcu_idx = srcu_read_lock(&kvm->srcu);
2442
	for (i = 0; i < args->count; i++) {
2443
		hva = gfn_to_hva(kvm, args->start_gfn + i);
2444
		if (kvm_is_error_hva(hva)) {
2445
			r = -EFAULT;
2446
			break;
2447
		}
2448

2449
		pgstev = bits[i];
2450
		pgstev = pgstev << 24;
2451
		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2452
		set_pgste_bits(kvm->mm, hva, mask, pgstev);
2453
	}
2454
	srcu_read_unlock(&kvm->srcu, srcu_idx);
2455
	mmap_read_unlock(kvm->mm);
2456

2457
	if (!kvm->mm->context.uses_cmm) {
2458
		mmap_write_lock(kvm->mm);
2459
		kvm->mm->context.uses_cmm = 1;
2460
		mmap_write_unlock(kvm->mm);
2461
	}
2462
out:
2463
	vfree(bits);
2464
	return r;
2465
}
2466

2467
/**
2468
 * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
2469
 * non protected.
2470
 * @kvm: the VM whose protected vCPUs are to be converted
2471
 * @rc: return value for the RC field of the UVC (in case of error)
2472
 * @rrc: return value for the RRC field of the UVC (in case of error)
2473
 *
2474
 * Does not stop in case of error, tries to convert as many
2475
 * CPUs as possible. In case of error, the RC and RRC of the last error are
2476
 * returned.
2477
 *
2478
 * Return: 0 in case of success, otherwise -EIO
2479
 */
2480
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2481
{
2482
	struct kvm_vcpu *vcpu;
2483
	unsigned long i;
2484
	u16 _rc, _rrc;
2485
	int ret = 0;
2486

2487
	/*
2488
	 * We ignore failures and try to destroy as many CPUs as possible.
2489
	 * At the same time we must not free the assigned resources when
2490
	 * this fails, as the ultravisor has still access to that memory.
2491
	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2492
	 * behind.
2493
	 * We want to return the first failure rc and rrc, though.
2494
	 */
2495
	kvm_for_each_vcpu(i, vcpu, kvm) {
2496
		mutex_lock(&vcpu->mutex);
2497
		if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
2498
			*rc = _rc;
2499
			*rrc = _rrc;
2500
			ret = -EIO;
2501
		}
2502
		mutex_unlock(&vcpu->mutex);
2503
	}
2504
	/* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2505
	if (use_gisa)
2506
		kvm_s390_gisa_enable(kvm);
2507
	return ret;
2508
}
2509

2510
/**
2511
 * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
2512
 * to protected.
2513
 * @kvm: the VM whose protected vCPUs are to be converted
2514
 * @rc: return value for the RC field of the UVC (in case of error)
2515
 * @rrc: return value for the RRC field of the UVC (in case of error)
2516
 *
2517
 * Tries to undo the conversion in case of error.
2518
 *
2519
 * Return: 0 in case of success, otherwise -EIO
2520
 */
2521
static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2522
{
2523
	unsigned long i;
2524
	int r = 0;
2525
	u16 dummy;
2526

2527
	struct kvm_vcpu *vcpu;
2528

2529
	/* Disable the GISA if the ultravisor does not support AIV. */
2530
	if (!uv_has_feature(BIT_UV_FEAT_AIV))
2531
		kvm_s390_gisa_disable(kvm);
2532

2533
	kvm_for_each_vcpu(i, vcpu, kvm) {
2534
		mutex_lock(&vcpu->mutex);
2535
		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2536
		mutex_unlock(&vcpu->mutex);
2537
		if (r)
2538
			break;
2539
	}
2540
	if (r)
2541
		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2542
	return r;
2543
}
2544

2545
/*
2546
 * Here we provide user space with a direct interface to query UV
2547
 * related data like UV maxima and available features as well as
2548
 * feature specific data.
2549
 *
2550
 * To facilitate future extension of the data structures we'll try to
2551
 * write data up to the maximum requested length.
2552
 */
2553
static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
2554
{
2555
	ssize_t len_min;
2556

2557
	switch (info->header.id) {
2558
	case KVM_PV_INFO_VM: {
2559
		len_min =  sizeof(info->header) + sizeof(info->vm);
2560

2561
		if (info->header.len_max < len_min)
2562
			return -EINVAL;
2563

2564
		memcpy(info->vm.inst_calls_list,
2565
		       uv_info.inst_calls_list,
2566
		       sizeof(uv_info.inst_calls_list));
2567

2568
		/* It's max cpuid not max cpus, so it's off by one */
2569
		info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
2570
		info->vm.max_guests = uv_info.max_num_sec_conf;
2571
		info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
2572
		info->vm.feature_indication = uv_info.uv_feature_indications;
2573

2574
		return len_min;
2575
	}
2576
	case KVM_PV_INFO_DUMP: {
2577
		len_min =  sizeof(info->header) + sizeof(info->dump);
2578

2579
		if (info->header.len_max < len_min)
2580
			return -EINVAL;
2581

2582
		info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
2583
		info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
2584
		info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
2585
		return len_min;
2586
	}
2587
	default:
2588
		return -EINVAL;
2589
	}
2590
}
2591

2592
static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
2593
			   struct kvm_s390_pv_dmp dmp)
2594
{
2595
	int r = -EINVAL;
2596
	void __user *result_buff = (void __user *)dmp.buff_addr;
2597

2598
	switch (dmp.subcmd) {
2599
	case KVM_PV_DUMP_INIT: {
2600
		if (kvm->arch.pv.dumping)
2601
			break;
2602

2603
		/*
2604
		 * Block SIE entry as concurrent dump UVCs could lead
2605
		 * to validities.
2606
		 */
2607
		kvm_s390_vcpu_block_all(kvm);
2608

2609
		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2610
				  UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
2611
		KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
2612
			     cmd->rc, cmd->rrc);
2613
		if (!r) {
2614
			kvm->arch.pv.dumping = true;
2615
		} else {
2616
			kvm_s390_vcpu_unblock_all(kvm);
2617
			r = -EINVAL;
2618
		}
2619
		break;
2620
	}
2621
	case KVM_PV_DUMP_CONFIG_STOR_STATE: {
2622
		if (!kvm->arch.pv.dumping)
2623
			break;
2624

2625
		/*
2626
		 * gaddr is an output parameter since we might stop
2627
		 * early. As dmp will be copied back in our caller, we
2628
		 * don't need to do it ourselves.
2629
		 */
2630
		r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
2631
						&cmd->rc, &cmd->rrc);
2632
		break;
2633
	}
2634
	case KVM_PV_DUMP_COMPLETE: {
2635
		if (!kvm->arch.pv.dumping)
2636
			break;
2637

2638
		r = -EINVAL;
2639
		if (dmp.buff_len < uv_info.conf_dump_finalize_len)
2640
			break;
2641

2642
		r = kvm_s390_pv_dump_complete(kvm, result_buff,
2643
					      &cmd->rc, &cmd->rrc);
2644
		break;
2645
	}
2646
	default:
2647
		r = -ENOTTY;
2648
		break;
2649
	}
2650

2651
	return r;
2652
}
2653

2654
static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2655
{
2656
	const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
2657
	void __user *argp = (void __user *)cmd->data;
2658
	int r = 0;
2659
	u16 dummy;
2660

2661
	if (need_lock)
2662
		mutex_lock(&kvm->lock);
2663

2664
	switch (cmd->cmd) {
2665
	case KVM_PV_ENABLE: {
2666
		r = -EINVAL;
2667
		if (kvm_s390_pv_is_protected(kvm))
2668
			break;
2669

2670
		/*
2671
		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
2672
		 *  esca, we need no cleanup in the error cases below
2673
		 */
2674
		r = sca_switch_to_extended(kvm);
2675
		if (r)
2676
			break;
2677

2678
		mmap_write_lock(kvm->mm);
2679
		r = gmap_helper_disable_cow_sharing();
2680
		mmap_write_unlock(kvm->mm);
2681
		if (r)
2682
			break;
2683

2684
		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2685
		if (r)
2686
			break;
2687

2688
		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2689
		if (r)
2690
			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2691

2692
		/* we need to block service interrupts from now on */
2693
		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2694
		break;
2695
	}
2696
	case KVM_PV_ASYNC_CLEANUP_PREPARE:
2697
		r = -EINVAL;
2698
		if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
2699
			break;
2700

2701
		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2702
		/*
2703
		 * If a CPU could not be destroyed, destroy VM will also fail.
2704
		 * There is no point in trying to destroy it. Instead return
2705
		 * the rc and rrc from the first CPU that failed destroying.
2706
		 */
2707
		if (r)
2708
			break;
2709
		r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
2710

2711
		/* no need to block service interrupts any more */
2712
		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2713
		break;
2714
	case KVM_PV_ASYNC_CLEANUP_PERFORM:
2715
		r = -EINVAL;
2716
		if (!async_destroy)
2717
			break;
2718
		/* kvm->lock must not be held; this is asserted inside the function. */
2719
		r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
2720
		break;
2721
	case KVM_PV_DISABLE: {
2722
		r = -EINVAL;
2723
		if (!kvm_s390_pv_is_protected(kvm))
2724
			break;
2725

2726
		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2727
		/*
2728
		 * If a CPU could not be destroyed, destroy VM will also fail.
2729
		 * There is no point in trying to destroy it. Instead return
2730
		 * the rc and rrc from the first CPU that failed destroying.
2731
		 */
2732
		if (r)
2733
			break;
2734
		r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
2735

2736
		/* no need to block service interrupts any more */
2737
		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2738
		break;
2739
	}
2740
	case KVM_PV_SET_SEC_PARMS: {
2741
		struct kvm_s390_pv_sec_parm parms = {};
2742
		void *hdr;
2743

2744
		r = -EINVAL;
2745
		if (!kvm_s390_pv_is_protected(kvm))
2746
			break;
2747

2748
		r = -EFAULT;
2749
		if (copy_from_user(&parms, argp, sizeof(parms)))
2750
			break;
2751

2752
		/* Currently restricted to 8KB */
2753
		r = -EINVAL;
2754
		if (parms.length > PAGE_SIZE * 2)
2755
			break;
2756

2757
		r = -ENOMEM;
2758
		hdr = vmalloc(parms.length);
2759
		if (!hdr)
2760
			break;
2761

2762
		r = -EFAULT;
2763
		if (!copy_from_user(hdr, (void __user *)parms.origin,
2764
				    parms.length))
2765
			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2766
						      &cmd->rc, &cmd->rrc);
2767

2768
		vfree(hdr);
2769
		break;
2770
	}
2771
	case KVM_PV_UNPACK: {
2772
		struct kvm_s390_pv_unp unp = {};
2773

2774
		r = -EINVAL;
2775
		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2776
			break;
2777

2778
		r = -EFAULT;
2779
		if (copy_from_user(&unp, argp, sizeof(unp)))
2780
			break;
2781

2782
		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2783
				       &cmd->rc, &cmd->rrc);
2784
		break;
2785
	}
2786
	case KVM_PV_VERIFY: {
2787
		r = -EINVAL;
2788
		if (!kvm_s390_pv_is_protected(kvm))
2789
			break;
2790

2791
		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2792
				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2793
		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2794
			     cmd->rrc);
2795
		break;
2796
	}
2797
	case KVM_PV_PREP_RESET: {
2798
		r = -EINVAL;
2799
		if (!kvm_s390_pv_is_protected(kvm))
2800
			break;
2801

2802
		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2803
				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2804
		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2805
			     cmd->rc, cmd->rrc);
2806
		break;
2807
	}
2808
	case KVM_PV_UNSHARE_ALL: {
2809
		r = -EINVAL;
2810
		if (!kvm_s390_pv_is_protected(kvm))
2811
			break;
2812

2813
		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2814
				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2815
		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2816
			     cmd->rc, cmd->rrc);
2817
		break;
2818
	}
2819
	case KVM_PV_INFO: {
2820
		struct kvm_s390_pv_info info = {};
2821
		ssize_t data_len;
2822

2823
		/*
2824
		 * No need to check the VM protection here.
2825
		 *
2826
		 * Maybe user space wants to query some of the data
2827
		 * when the VM is still unprotected. If we see the
2828
		 * need to fence a new data command we can still
2829
		 * return an error in the info handler.
2830
		 */
2831

2832
		r = -EFAULT;
2833
		if (copy_from_user(&info, argp, sizeof(info.header)))
2834
			break;
2835

2836
		r = -EINVAL;
2837
		if (info.header.len_max < sizeof(info.header))
2838
			break;
2839

2840
		data_len = kvm_s390_handle_pv_info(&info);
2841
		if (data_len < 0) {
2842
			r = data_len;
2843
			break;
2844
		}
2845
		/*
2846
		 * If a data command struct is extended (multiple
2847
		 * times) this can be used to determine how much of it
2848
		 * is valid.
2849
		 */
2850
		info.header.len_written = data_len;
2851

2852
		r = -EFAULT;
2853
		if (copy_to_user(argp, &info, data_len))
2854
			break;
2855

2856
		r = 0;
2857
		break;
2858
	}
2859
	case KVM_PV_DUMP: {
2860
		struct kvm_s390_pv_dmp dmp;
2861

2862
		r = -EINVAL;
2863
		if (!kvm_s390_pv_is_protected(kvm))
2864
			break;
2865

2866
		r = -EFAULT;
2867
		if (copy_from_user(&dmp, argp, sizeof(dmp)))
2868
			break;
2869

2870
		r = kvm_s390_pv_dmp(kvm, cmd, dmp);
2871
		if (r)
2872
			break;
2873

2874
		if (copy_to_user(argp, &dmp, sizeof(dmp))) {
2875
			r = -EFAULT;
2876
			break;
2877
		}
2878

2879
		break;
2880
	}
2881
	default:
2882
		r = -ENOTTY;
2883
	}
2884
	if (need_lock)
2885
		mutex_unlock(&kvm->lock);
2886

2887
	return r;
2888
}
2889

2890
static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_flags)
2891
{
2892
	if (mop->flags & ~supported_flags || !mop->size)
2893
		return -EINVAL;
2894
	if (mop->size > MEM_OP_MAX_SIZE)
2895
		return -E2BIG;
2896
	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2897
		if (mop->key > 0xf)
2898
			return -EINVAL;
2899
	} else {
2900
		mop->key = 0;
2901
	}
2902
	return 0;
2903
}
2904

2905
static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2906
{
2907
	void __user *uaddr = (void __user *)mop->buf;
2908
	enum gacc_mode acc_mode;
2909
	void *tmpbuf = NULL;
2910
	int r, srcu_idx;
2911

2912
	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION |
2913
					KVM_S390_MEMOP_F_CHECK_ONLY);
2914
	if (r)
2915
		return r;
2916

2917
	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2918
		tmpbuf = vmalloc(mop->size);
2919
		if (!tmpbuf)
2920
			return -ENOMEM;
2921
	}
2922

2923
	srcu_idx = srcu_read_lock(&kvm->srcu);
2924

2925
	if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
2926
		r = PGM_ADDRESSING;
2927
		goto out_unlock;
2928
	}
2929

2930
	acc_mode = mop->op == KVM_S390_MEMOP_ABSOLUTE_READ ? GACC_FETCH : GACC_STORE;
2931
	if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2932
		r = check_gpa_range(kvm, mop->gaddr, mop->size, acc_mode, mop->key);
2933
		goto out_unlock;
2934
	}
2935
	if (acc_mode == GACC_FETCH) {
2936
		r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2937
					      mop->size, GACC_FETCH, mop->key);
2938
		if (r)
2939
			goto out_unlock;
2940
		if (copy_to_user(uaddr, tmpbuf, mop->size))
2941
			r = -EFAULT;
2942
	} else {
2943
		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2944
			r = -EFAULT;
2945
			goto out_unlock;
2946
		}
2947
		r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2948
					      mop->size, GACC_STORE, mop->key);
2949
	}
2950

2951
out_unlock:
2952
	srcu_read_unlock(&kvm->srcu, srcu_idx);
2953

2954
	vfree(tmpbuf);
2955
	return r;
2956
}
2957

2958
static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2959
{
2960
	void __user *uaddr = (void __user *)mop->buf;
2961
	void __user *old_addr = (void __user *)mop->old_addr;
2962
	union {
2963
		__uint128_t quad;
2964
		char raw[sizeof(__uint128_t)];
2965
	} old = { .quad = 0}, new = { .quad = 0 };
2966
	unsigned int off_in_quad = sizeof(new) - mop->size;
2967
	int r, srcu_idx;
2968
	bool success;
2969

2970
	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION);
2971
	if (r)
2972
		return r;
2973
	/*
2974
	 * This validates off_in_quad. Checking that size is a power
2975
	 * of two is not necessary, as cmpxchg_guest_abs_with_key
2976
	 * takes care of that
2977
	 */
2978
	if (mop->size > sizeof(new))
2979
		return -EINVAL;
2980
	if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
2981
		return -EFAULT;
2982
	if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size))
2983
		return -EFAULT;
2984

2985
	srcu_idx = srcu_read_lock(&kvm->srcu);
2986

2987
	if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
2988
		r = PGM_ADDRESSING;
2989
		goto out_unlock;
2990
	}
2991

2992
	r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad,
2993
				       new.quad, mop->key, &success);
2994
	if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
2995
		r = -EFAULT;
2996

2997
out_unlock:
2998
	srcu_read_unlock(&kvm->srcu, srcu_idx);
2999
	return r;
3000
}
3001

3002
static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
3003
{
3004
	/*
3005
	 * This is technically a heuristic only, if the kvm->lock is not
3006
	 * taken, it is not guaranteed that the vm is/remains non-protected.
3007
	 * This is ok from a kernel perspective, wrongdoing is detected
3008
	 * on the access, -EFAULT is returned and the vm may crash the
3009
	 * next time it accesses the memory in question.
3010
	 * There is no sane usecase to do switching and a memop on two
3011
	 * different CPUs at the same time.
3012
	 */
3013
	if (kvm_s390_pv_get_handle(kvm))
3014
		return -EINVAL;
3015

3016
	switch (mop->op) {
3017
	case KVM_S390_MEMOP_ABSOLUTE_READ:
3018
	case KVM_S390_MEMOP_ABSOLUTE_WRITE:
3019
		return kvm_s390_vm_mem_op_abs(kvm, mop);
3020
	case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
3021
		return kvm_s390_vm_mem_op_cmpxchg(kvm, mop);
3022
	default:
3023
		return -EINVAL;
3024
	}
3025
}
3026

3027
int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
3028
{
3029
	struct kvm *kvm = filp->private_data;
3030
	void __user *argp = (void __user *)arg;
3031
	struct kvm_device_attr attr;
3032
	int r;
3033

3034
	switch (ioctl) {
3035
	case KVM_S390_INTERRUPT: {
3036
		struct kvm_s390_interrupt s390int;
3037

3038
		r = -EFAULT;
3039
		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3040
			break;
3041
		r = kvm_s390_inject_vm(kvm, &s390int);
3042
		break;
3043
	}
3044
	case KVM_CREATE_IRQCHIP: {
3045
		r = -EINVAL;
3046
		if (kvm->arch.use_irqchip)
3047
			r = 0;
3048
		break;
3049
	}
3050
	case KVM_SET_DEVICE_ATTR: {
3051
		r = -EFAULT;
3052
		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3053
			break;
3054
		r = kvm_s390_vm_set_attr(kvm, &attr);
3055
		break;
3056
	}
3057
	case KVM_GET_DEVICE_ATTR: {
3058
		r = -EFAULT;
3059
		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3060
			break;
3061
		r = kvm_s390_vm_get_attr(kvm, &attr);
3062
		break;
3063
	}
3064
	case KVM_HAS_DEVICE_ATTR: {
3065
		r = -EFAULT;
3066
		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3067
			break;
3068
		r = kvm_s390_vm_has_attr(kvm, &attr);
3069
		break;
3070
	}
3071
	case KVM_S390_GET_SKEYS: {
3072
		struct kvm_s390_skeys args;
3073

3074
		r = -EFAULT;
3075
		if (copy_from_user(&args, argp,
3076
				   sizeof(struct kvm_s390_skeys)))
3077
			break;
3078
		r = kvm_s390_get_skeys(kvm, &args);
3079
		break;
3080
	}
3081
	case KVM_S390_SET_SKEYS: {
3082
		struct kvm_s390_skeys args;
3083

3084
		r = -EFAULT;
3085
		if (copy_from_user(&args, argp,
3086
				   sizeof(struct kvm_s390_skeys)))
3087
			break;
3088
		r = kvm_s390_set_skeys(kvm, &args);
3089
		break;
3090
	}
3091
	case KVM_S390_GET_CMMA_BITS: {
3092
		struct kvm_s390_cmma_log args;
3093

3094
		r = -EFAULT;
3095
		if (copy_from_user(&args, argp, sizeof(args)))
3096
			break;
3097
		mutex_lock(&kvm->slots_lock);
3098
		r = kvm_s390_get_cmma_bits(kvm, &args);
3099
		mutex_unlock(&kvm->slots_lock);
3100
		if (!r) {
3101
			r = copy_to_user(argp, &args, sizeof(args));
3102
			if (r)
3103
				r = -EFAULT;
3104
		}
3105
		break;
3106
	}
3107
	case KVM_S390_SET_CMMA_BITS: {
3108
		struct kvm_s390_cmma_log args;
3109

3110
		r = -EFAULT;
3111
		if (copy_from_user(&args, argp, sizeof(args)))
3112
			break;
3113
		mutex_lock(&kvm->slots_lock);
3114
		r = kvm_s390_set_cmma_bits(kvm, &args);
3115
		mutex_unlock(&kvm->slots_lock);
3116
		break;
3117
	}
3118
	case KVM_S390_PV_COMMAND: {
3119
		struct kvm_pv_cmd args;
3120

3121
		/* protvirt means user cpu state */
3122
		kvm_s390_set_user_cpu_state_ctrl(kvm);
3123
		r = 0;
3124
		if (!is_prot_virt_host()) {
3125
			r = -EINVAL;
3126
			break;
3127
		}
3128
		if (copy_from_user(&args, argp, sizeof(args))) {
3129
			r = -EFAULT;
3130
			break;
3131
		}
3132
		if (args.flags) {
3133
			r = -EINVAL;
3134
			break;
3135
		}
3136
		/* must be called without kvm->lock */
3137
		r = kvm_s390_handle_pv(kvm, &args);
3138
		if (copy_to_user(argp, &args, sizeof(args))) {
3139
			r = -EFAULT;
3140
			break;
3141
		}
3142
		break;
3143
	}
3144
	case KVM_S390_MEM_OP: {
3145
		struct kvm_s390_mem_op mem_op;
3146

3147
		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3148
			r = kvm_s390_vm_mem_op(kvm, &mem_op);
3149
		else
3150
			r = -EFAULT;
3151
		break;
3152
	}
3153
	case KVM_S390_ZPCI_OP: {
3154
		struct kvm_s390_zpci_op args;
3155

3156
		r = -EINVAL;
3157
		if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3158
			break;
3159
		if (copy_from_user(&args, argp, sizeof(args))) {
3160
			r = -EFAULT;
3161
			break;
3162
		}
3163
		r = kvm_s390_pci_zpci_op(kvm, &args);
3164
		break;
3165
	}
3166
	default:
3167
		r = -ENOTTY;
3168
	}
3169

3170
	return r;
3171
}
3172

3173
static int kvm_s390_apxa_installed(void)
3174
{
3175
	struct ap_config_info info;
3176

3177
	if (ap_instructions_available()) {
3178
		if (ap_qci(&info) == 0)
3179
			return info.apxa;
3180
	}
3181

3182
	return 0;
3183
}
3184

3185
/*
3186
 * The format of the crypto control block (CRYCB) is specified in the 3 low
3187
 * order bits of the CRYCB designation (CRYCBD) field as follows:
3188
 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
3189
 *	     AP extended addressing (APXA) facility are installed.
3190
 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
3191
 * Format 2: Both the APXA and MSAX3 facilities are installed
3192
 */
3193
static void kvm_s390_set_crycb_format(struct kvm *kvm)
3194
{
3195
	kvm->arch.crypto.crycbd = virt_to_phys(kvm->arch.crypto.crycb);
3196

3197
	/* Clear the CRYCB format bits - i.e., set format 0 by default */
3198
	kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
3199

3200
	/* Check whether MSAX3 is installed */
3201
	if (!test_kvm_facility(kvm, 76))
3202
		return;
3203

3204
	if (kvm_s390_apxa_installed())
3205
		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
3206
	else
3207
		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
3208
}
3209

3210
/*
3211
 * kvm_arch_crypto_set_masks
3212
 *
3213
 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3214
 *	 to be set.
3215
 * @apm: the mask identifying the accessible AP adapters
3216
 * @aqm: the mask identifying the accessible AP domains
3217
 * @adm: the mask identifying the accessible AP control domains
3218
 *
3219
 * Set the masks that identify the adapters, domains and control domains to
3220
 * which the KVM guest is granted access.
3221
 *
3222
 * Note: The kvm->lock mutex must be locked by the caller before invoking this
3223
 *	 function.
3224
 */
3225
void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
3226
			       unsigned long *aqm, unsigned long *adm)
3227
{
3228
	struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
3229

3230
	kvm_s390_vcpu_block_all(kvm);
3231

3232
	switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
3233
	case CRYCB_FORMAT2: /* APCB1 use 256 bits */
3234
		memcpy(crycb->apcb1.apm, apm, 32);
3235
		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
3236
			 apm[0], apm[1], apm[2], apm[3]);
3237
		memcpy(crycb->apcb1.aqm, aqm, 32);
3238
		VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
3239
			 aqm[0], aqm[1], aqm[2], aqm[3]);
3240
		memcpy(crycb->apcb1.adm, adm, 32);
3241
		VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
3242
			 adm[0], adm[1], adm[2], adm[3]);
3243
		break;
3244
	case CRYCB_FORMAT1:
3245
	case CRYCB_FORMAT0: /* Fall through both use APCB0 */
3246
		memcpy(crycb->apcb0.apm, apm, 8);
3247
		memcpy(crycb->apcb0.aqm, aqm, 2);
3248
		memcpy(crycb->apcb0.adm, adm, 2);
3249
		VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
3250
			 apm[0], *((unsigned short *)aqm),
3251
			 *((unsigned short *)adm));
3252
		break;
3253
	default:	/* Can not happen */
3254
		break;
3255
	}
3256

3257
	/* recreate the shadow crycb for each vcpu */
3258
	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3259
	kvm_s390_vcpu_unblock_all(kvm);
3260
}
3261
EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
3262

3263
/*
3264
 * kvm_arch_crypto_clear_masks
3265
 *
3266
 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3267
 *	 to be cleared.
3268
 *
3269
 * Clear the masks that identify the adapters, domains and control domains to
3270
 * which the KVM guest is granted access.
3271
 *
3272
 * Note: The kvm->lock mutex must be locked by the caller before invoking this
3273
 *	 function.
3274
 */
3275
void kvm_arch_crypto_clear_masks(struct kvm *kvm)
3276
{
3277
	kvm_s390_vcpu_block_all(kvm);
3278

3279
	memset(&kvm->arch.crypto.crycb->apcb0, 0,
3280
	       sizeof(kvm->arch.crypto.crycb->apcb0));
3281
	memset(&kvm->arch.crypto.crycb->apcb1, 0,
3282
	       sizeof(kvm->arch.crypto.crycb->apcb1));
3283

3284
	VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
3285
	/* recreate the shadow crycb for each vcpu */
3286
	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3287
	kvm_s390_vcpu_unblock_all(kvm);
3288
}
3289
EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
3290

3291
static u64 kvm_s390_get_initial_cpuid(void)
3292
{
3293
	struct cpuid cpuid;
3294

3295
	get_cpu_id(&cpuid);
3296
	cpuid.version = 0xff;
3297
	return *((u64 *) &cpuid);
3298
}
3299

3300
static void kvm_s390_crypto_init(struct kvm *kvm)
3301
{
3302
	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
3303
	kvm_s390_set_crycb_format(kvm);
3304
	init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
3305

3306
	if (!test_kvm_facility(kvm, 76))
3307
		return;
3308

3309
	/* Enable AES/DEA protected key functions by default */
3310
	kvm->arch.crypto.aes_kw = 1;
3311
	kvm->arch.crypto.dea_kw = 1;
3312
	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
3313
			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
3314
	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
3315
			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
3316
}
3317

3318
static void sca_dispose(struct kvm *kvm)
3319
{
3320
	if (kvm->arch.use_esca)
3321
		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
3322
	else
3323
		free_page((unsigned long)(kvm->arch.sca));
3324
	kvm->arch.sca = NULL;
3325
}
3326

3327
void kvm_arch_free_vm(struct kvm *kvm)
3328
{
3329
	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3330
		kvm_s390_pci_clear_list(kvm);
3331

3332
	__kvm_arch_free_vm(kvm);
3333
}
3334

3335
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
3336
{
3337
	gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
3338
	int i, rc;
3339
	char debug_name[16];
3340
	static unsigned long sca_offset;
3341

3342
	rc = -EINVAL;
3343
#ifdef CONFIG_KVM_S390_UCONTROL
3344
	if (type & ~KVM_VM_S390_UCONTROL)
3345
		goto out_err;
3346
	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
3347
		goto out_err;
3348
#else
3349
	if (type)
3350
		goto out_err;
3351
#endif
3352

3353
	rc = s390_enable_sie();
3354
	if (rc)
3355
		goto out_err;
3356

3357
	rc = -ENOMEM;
3358

3359
	if (!sclp.has_64bscao)
3360
		alloc_flags |= GFP_DMA;
3361
	rwlock_init(&kvm->arch.sca_lock);
3362
	/* start with basic SCA */
3363
	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
3364
	if (!kvm->arch.sca)
3365
		goto out_err;
3366
	mutex_lock(&kvm_lock);
3367
	sca_offset += 16;
3368
	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
3369
		sca_offset = 0;
3370
	kvm->arch.sca = (struct bsca_block *)
3371
			((char *) kvm->arch.sca + sca_offset);
3372
	mutex_unlock(&kvm_lock);
3373

3374
	sprintf(debug_name, "kvm-%u", current->pid);
3375

3376
	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
3377
	if (!kvm->arch.dbf)
3378
		goto out_err;
3379

3380
	BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
3381
	kvm->arch.sie_page2 =
3382
	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
3383
	if (!kvm->arch.sie_page2)
3384
		goto out_err;
3385

3386
	kvm->arch.sie_page2->kvm = kvm;
3387
	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
3388

3389
	for (i = 0; i < kvm_s390_fac_size(); i++) {
3390
		kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
3391
					      (kvm_s390_fac_base[i] |
3392
					       kvm_s390_fac_ext[i]);
3393
		kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
3394
					      kvm_s390_fac_base[i];
3395
	}
3396
	kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
3397

3398
	/* we are always in czam mode - even on pre z14 machines */
3399
	set_kvm_facility(kvm->arch.model.fac_mask, 138);
3400
	set_kvm_facility(kvm->arch.model.fac_list, 138);
3401
	/* we emulate STHYI in kvm */
3402
	set_kvm_facility(kvm->arch.model.fac_mask, 74);
3403
	set_kvm_facility(kvm->arch.model.fac_list, 74);
3404
	if (machine_has_tlb_guest()) {
3405
		set_kvm_facility(kvm->arch.model.fac_mask, 147);
3406
		set_kvm_facility(kvm->arch.model.fac_list, 147);
3407
	}
3408

3409
	if (css_general_characteristics.aiv && test_facility(65))
3410
		set_kvm_facility(kvm->arch.model.fac_mask, 65);
3411

3412
	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
3413
	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
3414

3415
	kvm->arch.model.uv_feat_guest.feat = 0;
3416

3417
	kvm_s390_crypto_init(kvm);
3418

3419
	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
3420
		mutex_lock(&kvm->lock);
3421
		kvm_s390_pci_init_list(kvm);
3422
		kvm_s390_vcpu_pci_enable_interp(kvm);
3423
		mutex_unlock(&kvm->lock);
3424
	}
3425

3426
	mutex_init(&kvm->arch.float_int.ais_lock);
3427
	spin_lock_init(&kvm->arch.float_int.lock);
3428
	for (i = 0; i < FIRQ_LIST_COUNT; i++)
3429
		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
3430
	init_waitqueue_head(&kvm->arch.ipte_wq);
3431
	mutex_init(&kvm->arch.ipte_mutex);
3432

3433
	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
3434
	VM_EVENT(kvm, 3, "vm created with type %lu", type);
3435

3436
	if (type & KVM_VM_S390_UCONTROL) {
3437
		struct kvm_userspace_memory_region2 fake_memslot = {
3438
			.slot = KVM_S390_UCONTROL_MEMSLOT,
3439
			.guest_phys_addr = 0,
3440
			.userspace_addr = 0,
3441
			.memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE),
3442
			.flags = 0,
3443
		};
3444

3445
		kvm->arch.gmap = NULL;
3446
		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
3447
		/* one flat fake memslot covering the whole address-space */
3448
		mutex_lock(&kvm->slots_lock);
3449
		KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm);
3450
		mutex_unlock(&kvm->slots_lock);
3451
	} else {
3452
		if (sclp.hamax == U64_MAX)
3453
			kvm->arch.mem_limit = TASK_SIZE_MAX;
3454
		else
3455
			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
3456
						    sclp.hamax + 1);
3457
		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
3458
		if (!kvm->arch.gmap)
3459
			goto out_err;
3460
		kvm->arch.gmap->private = kvm;
3461
		kvm->arch.gmap->pfault_enabled = 0;
3462
	}
3463

3464
	kvm->arch.use_pfmfi = sclp.has_pfmfi;
3465
	kvm->arch.use_skf = sclp.has_skey;
3466
	spin_lock_init(&kvm->arch.start_stop_lock);
3467
	kvm_s390_vsie_init(kvm);
3468
	if (use_gisa)
3469
		kvm_s390_gisa_init(kvm);
3470
	INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
3471
	kvm->arch.pv.set_aside = NULL;
3472
	KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
3473

3474
	return 0;
3475
out_err:
3476
	free_page((unsigned long)kvm->arch.sie_page2);
3477
	debug_unregister(kvm->arch.dbf);
3478
	sca_dispose(kvm);
3479
	KVM_EVENT(3, "creation of vm failed: %d", rc);
3480
	return rc;
3481
}
3482

3483
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3484
{
3485
	u16 rc, rrc;
3486

3487
	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
3488
	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
3489
	kvm_s390_clear_local_irqs(vcpu);
3490
	kvm_clear_async_pf_completion_queue(vcpu);
3491
	if (!kvm_is_ucontrol(vcpu->kvm))
3492
		sca_del_vcpu(vcpu);
3493
	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3494

3495
	if (kvm_is_ucontrol(vcpu->kvm))
3496
		gmap_remove(vcpu->arch.gmap);
3497

3498
	if (vcpu->kvm->arch.use_cmma)
3499
		kvm_s390_vcpu_unsetup_cmma(vcpu);
3500
	/* We can not hold the vcpu mutex here, we are already dying */
3501
	if (kvm_s390_pv_cpu_get_handle(vcpu))
3502
		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
3503
	free_page((unsigned long)(vcpu->arch.sie_block));
3504
}
3505

3506
void kvm_arch_destroy_vm(struct kvm *kvm)
3507
{
3508
	u16 rc, rrc;
3509

3510
	kvm_destroy_vcpus(kvm);
3511
	sca_dispose(kvm);
3512
	kvm_s390_gisa_destroy(kvm);
3513
	/*
3514
	 * We are already at the end of life and kvm->lock is not taken.
3515
	 * This is ok as the file descriptor is closed by now and nobody
3516
	 * can mess with the pv state.
3517
	 */
3518
	kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
3519
	/*
3520
	 * Remove the mmu notifier only when the whole KVM VM is torn down,
3521
	 * and only if one was registered to begin with. If the VM is
3522
	 * currently not protected, but has been previously been protected,
3523
	 * then it's possible that the notifier is still registered.
3524
	 */
3525
	if (kvm->arch.pv.mmu_notifier.ops)
3526
		mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
3527

3528
	debug_unregister(kvm->arch.dbf);
3529
	free_page((unsigned long)kvm->arch.sie_page2);
3530
	if (!kvm_is_ucontrol(kvm))
3531
		gmap_remove(kvm->arch.gmap);
3532
	kvm_s390_destroy_adapters(kvm);
3533
	kvm_s390_clear_float_irqs(kvm);
3534
	kvm_s390_vsie_destroy(kvm);
3535
	KVM_EVENT(3, "vm 0x%p destroyed", kvm);
3536
}
3537

3538
/* Section: vcpu related */
3539
static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
3540
{
3541
	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
3542
	if (!vcpu->arch.gmap)
3543
		return -ENOMEM;
3544
	vcpu->arch.gmap->private = vcpu->kvm;
3545

3546
	return 0;
3547
}
3548

3549
static void sca_del_vcpu(struct kvm_vcpu *vcpu)
3550
{
3551
	if (!kvm_s390_use_sca_entries())
3552
		return;
3553
	read_lock(&vcpu->kvm->arch.sca_lock);
3554
	if (vcpu->kvm->arch.use_esca) {
3555
		struct esca_block *sca = vcpu->kvm->arch.sca;
3556

3557
		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3558
		sca->cpu[vcpu->vcpu_id].sda = 0;
3559
	} else {
3560
		struct bsca_block *sca = vcpu->kvm->arch.sca;
3561

3562
		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3563
		sca->cpu[vcpu->vcpu_id].sda = 0;
3564
	}
3565
	read_unlock(&vcpu->kvm->arch.sca_lock);
3566
}
3567

3568
static void sca_add_vcpu(struct kvm_vcpu *vcpu)
3569
{
3570
	if (!kvm_s390_use_sca_entries()) {
3571
		phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
3572

3573
		/* we still need the basic sca for the ipte control */
3574
		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3575
		vcpu->arch.sie_block->scaol = sca_phys;
3576
		return;
3577
	}
3578
	read_lock(&vcpu->kvm->arch.sca_lock);
3579
	if (vcpu->kvm->arch.use_esca) {
3580
		struct esca_block *sca = vcpu->kvm->arch.sca;
3581
		phys_addr_t sca_phys = virt_to_phys(sca);
3582

3583
		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3584
		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3585
		vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
3586
		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3587
		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3588
	} else {
3589
		struct bsca_block *sca = vcpu->kvm->arch.sca;
3590
		phys_addr_t sca_phys = virt_to_phys(sca);
3591

3592
		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
3593
		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
3594
		vcpu->arch.sie_block->scaol = sca_phys;
3595
		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3596
	}
3597
	read_unlock(&vcpu->kvm->arch.sca_lock);
3598
}
3599

3600
/* Basic SCA to Extended SCA data copy routines */
3601
static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3602
{
3603
	d->sda = s->sda;
3604
	d->sigp_ctrl.c = s->sigp_ctrl.c;
3605
	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3606
}
3607

3608
static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3609
{
3610
	int i;
3611

3612
	d->ipte_control = s->ipte_control;
3613
	d->mcn[0] = s->mcn;
3614
	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3615
		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3616
}
3617

3618
static int sca_switch_to_extended(struct kvm *kvm)
3619
{
3620
	struct bsca_block *old_sca = kvm->arch.sca;
3621
	struct esca_block *new_sca;
3622
	struct kvm_vcpu *vcpu;
3623
	unsigned long vcpu_idx;
3624
	u32 scaol, scaoh;
3625
	phys_addr_t new_sca_phys;
3626

3627
	if (kvm->arch.use_esca)
3628
		return 0;
3629

3630
	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3631
	if (!new_sca)
3632
		return -ENOMEM;
3633

3634
	new_sca_phys = virt_to_phys(new_sca);
3635
	scaoh = new_sca_phys >> 32;
3636
	scaol = new_sca_phys & ESCA_SCAOL_MASK;
3637

3638
	kvm_s390_vcpu_block_all(kvm);
3639
	write_lock(&kvm->arch.sca_lock);
3640

3641
	sca_copy_b_to_e(new_sca, old_sca);
3642

3643
	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3644
		vcpu->arch.sie_block->scaoh = scaoh;
3645
		vcpu->arch.sie_block->scaol = scaol;
3646
		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3647
	}
3648
	kvm->arch.sca = new_sca;
3649
	kvm->arch.use_esca = 1;
3650

3651
	write_unlock(&kvm->arch.sca_lock);
3652
	kvm_s390_vcpu_unblock_all(kvm);
3653

3654
	free_page((unsigned long)old_sca);
3655

3656
	VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)",
3657
		 old_sca, kvm->arch.sca);
3658
	return 0;
3659
}
3660

3661
static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3662
{
3663
	int rc;
3664

3665
	if (!kvm_s390_use_sca_entries()) {
3666
		if (id < KVM_MAX_VCPUS)
3667
			return true;
3668
		return false;
3669
	}
3670
	if (id < KVM_S390_BSCA_CPU_SLOTS)
3671
		return true;
3672
	if (!sclp.has_esca || !sclp.has_64bscao)
3673
		return false;
3674

3675
	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3676

3677
	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3678
}
3679

3680
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3681
static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3682
{
3683
	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3684
	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3685
	vcpu->arch.cputm_start = get_tod_clock_fast();
3686
	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3687
}
3688

3689
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3690
static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3691
{
3692
	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3693
	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3694
	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3695
	vcpu->arch.cputm_start = 0;
3696
	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3697
}
3698

3699
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3700
static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3701
{
3702
	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3703
	vcpu->arch.cputm_enabled = true;
3704
	__start_cpu_timer_accounting(vcpu);
3705
}
3706

3707
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3708
static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3709
{
3710
	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3711
	__stop_cpu_timer_accounting(vcpu);
3712
	vcpu->arch.cputm_enabled = false;
3713
}
3714

3715
static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3716
{
3717
	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3718
	__enable_cpu_timer_accounting(vcpu);
3719
	preempt_enable();
3720
}
3721

3722
static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3723
{
3724
	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3725
	__disable_cpu_timer_accounting(vcpu);
3726
	preempt_enable();
3727
}
3728

3729
/* set the cpu timer - may only be called from the VCPU thread itself */
3730
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3731
{
3732
	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3733
	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3734
	if (vcpu->arch.cputm_enabled)
3735
		vcpu->arch.cputm_start = get_tod_clock_fast();
3736
	vcpu->arch.sie_block->cputm = cputm;
3737
	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3738
	preempt_enable();
3739
}
3740

3741
/* update and get the cpu timer - can also be called from other VCPU threads */
3742
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3743
{
3744
	unsigned int seq;
3745
	__u64 value;
3746

3747
	if (unlikely(!vcpu->arch.cputm_enabled))
3748
		return vcpu->arch.sie_block->cputm;
3749

3750
	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3751
	do {
3752
		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3753
		/*
3754
		 * If the writer would ever execute a read in the critical
3755
		 * section, e.g. in irq context, we have a deadlock.
3756
		 */
3757
		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3758
		value = vcpu->arch.sie_block->cputm;
3759
		/* if cputm_start is 0, accounting is being started/stopped */
3760
		if (likely(vcpu->arch.cputm_start))
3761
			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3762
	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3763
	preempt_enable();
3764
	return value;
3765
}
3766

3767
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3768
{
3769

3770
	kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3771
	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3772
		__start_cpu_timer_accounting(vcpu);
3773
	vcpu->cpu = cpu;
3774
}
3775

3776
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3777
{
3778
	vcpu->cpu = -1;
3779
	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3780
		__stop_cpu_timer_accounting(vcpu);
3781
	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3782

3783
}
3784

3785
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3786
{
3787
	mutex_lock(&vcpu->kvm->lock);
3788
	preempt_disable();
3789
	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3790
	vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3791
	preempt_enable();
3792
	mutex_unlock(&vcpu->kvm->lock);
3793
	if (!kvm_is_ucontrol(vcpu->kvm)) {
3794
		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3795
		sca_add_vcpu(vcpu);
3796
	}
3797
	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3798
		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3799
}
3800

3801
static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3802
{
3803
	if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3804
	    test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3805
		return true;
3806
	return false;
3807
}
3808

3809
static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3810
{
3811
	/* At least one ECC subfunction must be present */
3812
	return kvm_has_pckmo_subfunc(kvm, 32) ||
3813
	       kvm_has_pckmo_subfunc(kvm, 33) ||
3814
	       kvm_has_pckmo_subfunc(kvm, 34) ||
3815
	       kvm_has_pckmo_subfunc(kvm, 40) ||
3816
	       kvm_has_pckmo_subfunc(kvm, 41);
3817

3818
}
3819

3820
static bool kvm_has_pckmo_hmac(struct kvm *kvm)
3821
{
3822
	/* At least one HMAC subfunction must be present */
3823
	return kvm_has_pckmo_subfunc(kvm, 118) ||
3824
	       kvm_has_pckmo_subfunc(kvm, 122);
3825
}
3826

3827
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3828
{
3829
	/*
3830
	 * If the AP instructions are not being interpreted and the MSAX3
3831
	 * facility is not configured for the guest, there is nothing to set up.
3832
	 */
3833
	if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3834
		return;
3835

3836
	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3837
	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3838
	vcpu->arch.sie_block->eca &= ~ECA_APIE;
3839
	vcpu->arch.sie_block->ecd &= ~(ECD_ECC | ECD_HMAC);
3840

3841
	if (vcpu->kvm->arch.crypto.apie)
3842
		vcpu->arch.sie_block->eca |= ECA_APIE;
3843

3844
	/* Set up protected key support */
3845
	if (vcpu->kvm->arch.crypto.aes_kw) {
3846
		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3847
		/* ecc/hmac is also wrapped with AES key */
3848
		if (kvm_has_pckmo_ecc(vcpu->kvm))
3849
			vcpu->arch.sie_block->ecd |= ECD_ECC;
3850
		if (kvm_has_pckmo_hmac(vcpu->kvm))
3851
			vcpu->arch.sie_block->ecd |= ECD_HMAC;
3852
	}
3853

3854
	if (vcpu->kvm->arch.crypto.dea_kw)
3855
		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3856
}
3857

3858
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3859
{
3860
	free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
3861
	vcpu->arch.sie_block->cbrlo = 0;
3862
}
3863

3864
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3865
{
3866
	void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
3867

3868
	if (!cbrlo_page)
3869
		return -ENOMEM;
3870

3871
	vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
3872
	return 0;
3873
}
3874

3875
static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3876
{
3877
	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3878

3879
	vcpu->arch.sie_block->ibc = model->ibc;
3880
	if (test_kvm_facility(vcpu->kvm, 7))
3881
		vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
3882
}
3883

3884
static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3885
{
3886
	int rc = 0;
3887
	u16 uvrc, uvrrc;
3888

3889
	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3890
						    CPUSTAT_SM |
3891
						    CPUSTAT_STOPPED);
3892

3893
	if (test_kvm_facility(vcpu->kvm, 78))
3894
		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3895
	else if (test_kvm_facility(vcpu->kvm, 8))
3896
		kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3897

3898
	kvm_s390_vcpu_setup_model(vcpu);
3899

3900
	/* pgste_set_pte has special handling for !machine_has_esop() */
3901
	if (machine_has_esop())
3902
		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3903
	if (test_kvm_facility(vcpu->kvm, 9))
3904
		vcpu->arch.sie_block->ecb |= ECB_SRSI;
3905
	if (test_kvm_facility(vcpu->kvm, 11))
3906
		vcpu->arch.sie_block->ecb |= ECB_PTF;
3907
	if (test_kvm_facility(vcpu->kvm, 73))
3908
		vcpu->arch.sie_block->ecb |= ECB_TE;
3909
	if (!kvm_is_ucontrol(vcpu->kvm))
3910
		vcpu->arch.sie_block->ecb |= ECB_SPECI;
3911

3912
	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3913
		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3914
	if (test_kvm_facility(vcpu->kvm, 130))
3915
		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3916
	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3917
	if (sclp.has_cei)
3918
		vcpu->arch.sie_block->eca |= ECA_CEI;
3919
	if (sclp.has_ib)
3920
		vcpu->arch.sie_block->eca |= ECA_IB;
3921
	if (sclp.has_siif)
3922
		vcpu->arch.sie_block->eca |= ECA_SII;
3923
	if (sclp.has_sigpif)
3924
		vcpu->arch.sie_block->eca |= ECA_SIGPI;
3925
	if (test_kvm_facility(vcpu->kvm, 129)) {
3926
		vcpu->arch.sie_block->eca |= ECA_VX;
3927
		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3928
	}
3929
	if (test_kvm_facility(vcpu->kvm, 139))
3930
		vcpu->arch.sie_block->ecd |= ECD_MEF;
3931
	if (test_kvm_facility(vcpu->kvm, 156))
3932
		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3933
	if (vcpu->arch.sie_block->gd) {
3934
		vcpu->arch.sie_block->eca |= ECA_AIV;
3935
		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3936
			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3937
	}
3938
	vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
3939
	vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
3940

3941
	if (sclp.has_kss)
3942
		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3943
	else
3944
		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3945

3946
	if (vcpu->kvm->arch.use_cmma) {
3947
		rc = kvm_s390_vcpu_setup_cmma(vcpu);
3948
		if (rc)
3949
			return rc;
3950
	}
3951
	hrtimer_setup(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, CLOCK_MONOTONIC,
3952
		      HRTIMER_MODE_REL);
3953

3954
	vcpu->arch.sie_block->hpid = HPID_KVM;
3955

3956
	kvm_s390_vcpu_crypto_setup(vcpu);
3957

3958
	kvm_s390_vcpu_pci_setup(vcpu);
3959

3960
	mutex_lock(&vcpu->kvm->lock);
3961
	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3962
		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3963
		if (rc)
3964
			kvm_s390_vcpu_unsetup_cmma(vcpu);
3965
	}
3966
	mutex_unlock(&vcpu->kvm->lock);
3967

3968
	return rc;
3969
}
3970

3971
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3972
{
3973
	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3974
		return -EINVAL;
3975
	return 0;
3976
}
3977

3978
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3979
{
3980
	struct sie_page *sie_page;
3981
	int rc;
3982

3983
	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3984
	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3985
	if (!sie_page)
3986
		return -ENOMEM;
3987

3988
	vcpu->arch.sie_block = &sie_page->sie_block;
3989
	vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
3990

3991
	/* the real guest size will always be smaller than msl */
3992
	vcpu->arch.sie_block->mso = 0;
3993
	vcpu->arch.sie_block->msl = sclp.hamax;
3994

3995
	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3996
	spin_lock_init(&vcpu->arch.local_int.lock);
3997
	vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3998
	seqcount_init(&vcpu->arch.cputm_seqcount);
3999

4000
	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4001
	kvm_clear_async_pf_completion_queue(vcpu);
4002
	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
4003
				    KVM_SYNC_GPRS |
4004
				    KVM_SYNC_ACRS |
4005
				    KVM_SYNC_CRS |
4006
				    KVM_SYNC_ARCH0 |
4007
				    KVM_SYNC_PFAULT |
4008
				    KVM_SYNC_DIAG318;
4009
	vcpu->arch.acrs_loaded = false;
4010
	kvm_s390_set_prefix(vcpu, 0);
4011
	if (test_kvm_facility(vcpu->kvm, 64))
4012
		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
4013
	if (test_kvm_facility(vcpu->kvm, 82))
4014
		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
4015
	if (test_kvm_facility(vcpu->kvm, 133))
4016
		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
4017
	if (test_kvm_facility(vcpu->kvm, 156))
4018
		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
4019
	/* fprs can be synchronized via vrs, even if the guest has no vx. With
4020
	 * cpu_has_vx(), (load|store)_fpu_regs() will work with vrs format.
4021
	 */
4022
	if (cpu_has_vx())
4023
		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
4024
	else
4025
		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
4026

4027
	if (kvm_is_ucontrol(vcpu->kvm)) {
4028
		rc = __kvm_ucontrol_vcpu_init(vcpu);
4029
		if (rc)
4030
			goto out_free_sie_block;
4031
	}
4032

4033
	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p",
4034
		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
4035
	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
4036

4037
	rc = kvm_s390_vcpu_setup(vcpu);
4038
	if (rc)
4039
		goto out_ucontrol_uninit;
4040

4041
	kvm_s390_update_topology_change_report(vcpu->kvm, 1);
4042
	return 0;
4043

4044
out_ucontrol_uninit:
4045
	if (kvm_is_ucontrol(vcpu->kvm))
4046
		gmap_remove(vcpu->arch.gmap);
4047
out_free_sie_block:
4048
	free_page((unsigned long)(vcpu->arch.sie_block));
4049
	return rc;
4050
}
4051

4052
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
4053
{
4054
	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4055
	return kvm_s390_vcpu_has_irq(vcpu, 0);
4056
}
4057

4058
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
4059
{
4060
	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
4061
}
4062

4063
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
4064
{
4065
	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
4066
	exit_sie(vcpu);
4067
}
4068

4069
void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
4070
{
4071
	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
4072
}
4073

4074
static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
4075
{
4076
	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
4077
	exit_sie(vcpu);
4078
}
4079

4080
bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
4081
{
4082
	return atomic_read(&vcpu->arch.sie_block->prog20) &
4083
	       (PROG_BLOCK_SIE | PROG_REQUEST);
4084
}
4085

4086
static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
4087
{
4088
	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
4089
}
4090

4091
/*
4092
 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
4093
 * If the CPU is not running (e.g. waiting as idle) the function will
4094
 * return immediately. */
4095
void exit_sie(struct kvm_vcpu *vcpu)
4096
{
4097
	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
4098
	kvm_s390_vsie_kick(vcpu);
4099
	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
4100
		cpu_relax();
4101
}
4102

4103
/* Kick a guest cpu out of SIE to process a request synchronously */
4104
void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
4105
{
4106
	__kvm_make_request(req, vcpu);
4107
	kvm_s390_vcpu_request(vcpu);
4108
}
4109

4110
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
4111
			      unsigned long end)
4112
{
4113
	struct kvm *kvm = gmap->private;
4114
	struct kvm_vcpu *vcpu;
4115
	unsigned long prefix;
4116
	unsigned long i;
4117

4118
	trace_kvm_s390_gmap_notifier(start, end, gmap_is_shadow(gmap));
4119

4120
	if (gmap_is_shadow(gmap))
4121
		return;
4122
	if (start >= 1UL << 31)
4123
		/* We are only interested in prefix pages */
4124
		return;
4125
	kvm_for_each_vcpu(i, vcpu, kvm) {
4126
		/* match against both prefix pages */
4127
		prefix = kvm_s390_get_prefix(vcpu);
4128
		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
4129
			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
4130
				   start, end);
4131
			kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4132
		}
4133
	}
4134
}
4135

4136
bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
4137
{
4138
	/* do not poll with more than halt_poll_max_steal percent of steal time */
4139
	if (get_lowcore()->avg_steal_timer * 100 / (TICK_USEC << 12) >=
4140
	    READ_ONCE(halt_poll_max_steal)) {
4141
		vcpu->stat.halt_no_poll_steal++;
4142
		return true;
4143
	}
4144
	return false;
4145
}
4146

4147
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
4148
{
4149
	/* kvm common code refers to this, but never calls it */
4150
	BUG();
4151
	return 0;
4152
}
4153

4154
static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
4155
					   struct kvm_one_reg *reg)
4156
{
4157
	int r = -EINVAL;
4158

4159
	switch (reg->id) {
4160
	case KVM_REG_S390_TODPR:
4161
		r = put_user(vcpu->arch.sie_block->todpr,
4162
			     (u32 __user *)reg->addr);
4163
		break;
4164
	case KVM_REG_S390_EPOCHDIFF:
4165
		r = put_user(vcpu->arch.sie_block->epoch,
4166
			     (u64 __user *)reg->addr);
4167
		break;
4168
	case KVM_REG_S390_CPU_TIMER:
4169
		r = put_user(kvm_s390_get_cpu_timer(vcpu),
4170
			     (u64 __user *)reg->addr);
4171
		break;
4172
	case KVM_REG_S390_CLOCK_COMP:
4173
		r = put_user(vcpu->arch.sie_block->ckc,
4174
			     (u64 __user *)reg->addr);
4175
		break;
4176
	case KVM_REG_S390_PFTOKEN:
4177
		r = put_user(vcpu->arch.pfault_token,
4178
			     (u64 __user *)reg->addr);
4179
		break;
4180
	case KVM_REG_S390_PFCOMPARE:
4181
		r = put_user(vcpu->arch.pfault_compare,
4182
			     (u64 __user *)reg->addr);
4183
		break;
4184
	case KVM_REG_S390_PFSELECT:
4185
		r = put_user(vcpu->arch.pfault_select,
4186
			     (u64 __user *)reg->addr);
4187
		break;
4188
	case KVM_REG_S390_PP:
4189
		r = put_user(vcpu->arch.sie_block->pp,
4190
			     (u64 __user *)reg->addr);
4191
		break;
4192
	case KVM_REG_S390_GBEA:
4193
		r = put_user(vcpu->arch.sie_block->gbea,
4194
			     (u64 __user *)reg->addr);
4195
		break;
4196
	default:
4197
		break;
4198
	}
4199

4200
	return r;
4201
}
4202

4203
static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
4204
					   struct kvm_one_reg *reg)
4205
{
4206
	int r = -EINVAL;
4207
	__u64 val;
4208

4209
	switch (reg->id) {
4210
	case KVM_REG_S390_TODPR:
4211
		r = get_user(vcpu->arch.sie_block->todpr,
4212
			     (u32 __user *)reg->addr);
4213
		break;
4214
	case KVM_REG_S390_EPOCHDIFF:
4215
		r = get_user(vcpu->arch.sie_block->epoch,
4216
			     (u64 __user *)reg->addr);
4217
		break;
4218
	case KVM_REG_S390_CPU_TIMER:
4219
		r = get_user(val, (u64 __user *)reg->addr);
4220
		if (!r)
4221
			kvm_s390_set_cpu_timer(vcpu, val);
4222
		break;
4223
	case KVM_REG_S390_CLOCK_COMP:
4224
		r = get_user(vcpu->arch.sie_block->ckc,
4225
			     (u64 __user *)reg->addr);
4226
		break;
4227
	case KVM_REG_S390_PFTOKEN:
4228
		r = get_user(vcpu->arch.pfault_token,
4229
			     (u64 __user *)reg->addr);
4230
		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4231
			kvm_clear_async_pf_completion_queue(vcpu);
4232
		break;
4233
	case KVM_REG_S390_PFCOMPARE:
4234
		r = get_user(vcpu->arch.pfault_compare,
4235
			     (u64 __user *)reg->addr);
4236
		break;
4237
	case KVM_REG_S390_PFSELECT:
4238
		r = get_user(vcpu->arch.pfault_select,
4239
			     (u64 __user *)reg->addr);
4240
		break;
4241
	case KVM_REG_S390_PP:
4242
		r = get_user(vcpu->arch.sie_block->pp,
4243
			     (u64 __user *)reg->addr);
4244
		break;
4245
	case KVM_REG_S390_GBEA:
4246
		r = get_user(vcpu->arch.sie_block->gbea,
4247
			     (u64 __user *)reg->addr);
4248
		break;
4249
	default:
4250
		break;
4251
	}
4252

4253
	return r;
4254
}
4255

4256
static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
4257
{
4258
	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
4259
	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4260
	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
4261

4262
	kvm_clear_async_pf_completion_queue(vcpu);
4263
	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
4264
		kvm_s390_vcpu_stop(vcpu);
4265
	kvm_s390_clear_local_irqs(vcpu);
4266
}
4267

4268
static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
4269
{
4270
	/* Initial reset is a superset of the normal reset */
4271
	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4272

4273
	/*
4274
	 * This equals initial cpu reset in pop, but we don't switch to ESA.
4275
	 * We do not only reset the internal data, but also ...
4276
	 */
4277
	vcpu->arch.sie_block->gpsw.mask = 0;
4278
	vcpu->arch.sie_block->gpsw.addr = 0;
4279
	kvm_s390_set_prefix(vcpu, 0);
4280
	kvm_s390_set_cpu_timer(vcpu, 0);
4281
	vcpu->arch.sie_block->ckc = 0;
4282
	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
4283
	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
4284
	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
4285

4286
	/* ... the data in sync regs */
4287
	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
4288
	vcpu->run->s.regs.ckc = 0;
4289
	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
4290
	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
4291
	vcpu->run->psw_addr = 0;
4292
	vcpu->run->psw_mask = 0;
4293
	vcpu->run->s.regs.todpr = 0;
4294
	vcpu->run->s.regs.cputm = 0;
4295
	vcpu->run->s.regs.ckc = 0;
4296
	vcpu->run->s.regs.pp = 0;
4297
	vcpu->run->s.regs.gbea = 1;
4298
	vcpu->run->s.regs.fpc = 0;
4299
	/*
4300
	 * Do not reset these registers in the protected case, as some of
4301
	 * them are overlaid and they are not accessible in this case
4302
	 * anyway.
4303
	 */
4304
	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4305
		vcpu->arch.sie_block->gbea = 1;
4306
		vcpu->arch.sie_block->pp = 0;
4307
		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4308
		vcpu->arch.sie_block->todpr = 0;
4309
	}
4310
}
4311

4312
static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
4313
{
4314
	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
4315

4316
	/* Clear reset is a superset of the initial reset */
4317
	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4318

4319
	memset(&regs->gprs, 0, sizeof(regs->gprs));
4320
	memset(&regs->vrs, 0, sizeof(regs->vrs));
4321
	memset(&regs->acrs, 0, sizeof(regs->acrs));
4322
	memset(&regs->gscb, 0, sizeof(regs->gscb));
4323

4324
	regs->etoken = 0;
4325
	regs->etoken_extension = 0;
4326
}
4327

4328
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4329
{
4330
	vcpu_load(vcpu);
4331
	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
4332
	vcpu_put(vcpu);
4333
	return 0;
4334
}
4335

4336
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4337
{
4338
	vcpu_load(vcpu);
4339
	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
4340
	vcpu_put(vcpu);
4341
	return 0;
4342
}
4343

4344
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4345
				  struct kvm_sregs *sregs)
4346
{
4347
	vcpu_load(vcpu);
4348

4349
	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
4350
	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
4351

4352
	vcpu_put(vcpu);
4353
	return 0;
4354
}
4355

4356
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4357
				  struct kvm_sregs *sregs)
4358
{
4359
	vcpu_load(vcpu);
4360

4361
	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
4362
	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
4363

4364
	vcpu_put(vcpu);
4365
	return 0;
4366
}
4367

4368
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4369
{
4370
	int ret = 0;
4371

4372
	vcpu_load(vcpu);
4373

4374
	vcpu->run->s.regs.fpc = fpu->fpc;
4375
	if (cpu_has_vx())
4376
		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
4377
				 (freg_t *) fpu->fprs);
4378
	else
4379
		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
4380

4381
	vcpu_put(vcpu);
4382
	return ret;
4383
}
4384

4385
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4386
{
4387
	vcpu_load(vcpu);
4388

4389
	if (cpu_has_vx())
4390
		convert_vx_to_fp((freg_t *) fpu->fprs,
4391
				 (__vector128 *) vcpu->run->s.regs.vrs);
4392
	else
4393
		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
4394
	fpu->fpc = vcpu->run->s.regs.fpc;
4395

4396
	vcpu_put(vcpu);
4397
	return 0;
4398
}
4399

4400
static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
4401
{
4402
	int rc = 0;
4403

4404
	if (!is_vcpu_stopped(vcpu))
4405
		rc = -EBUSY;
4406
	else {
4407
		vcpu->run->psw_mask = psw.mask;
4408
		vcpu->run->psw_addr = psw.addr;
4409
	}
4410
	return rc;
4411
}
4412

4413
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4414
				  struct kvm_translation *tr)
4415
{
4416
	return -EINVAL; /* not implemented yet */
4417
}
4418

4419
#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
4420
			      KVM_GUESTDBG_USE_HW_BP | \
4421
			      KVM_GUESTDBG_ENABLE)
4422

4423
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4424
					struct kvm_guest_debug *dbg)
4425
{
4426
	int rc = 0;
4427

4428
	vcpu_load(vcpu);
4429

4430
	vcpu->guest_debug = 0;
4431
	kvm_s390_clear_bp_data(vcpu);
4432

4433
	if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
4434
		rc = -EINVAL;
4435
		goto out;
4436
	}
4437
	if (!sclp.has_gpere) {
4438
		rc = -EINVAL;
4439
		goto out;
4440
	}
4441

4442
	if (dbg->control & KVM_GUESTDBG_ENABLE) {
4443
		vcpu->guest_debug = dbg->control;
4444
		/* enforce guest PER */
4445
		kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
4446

4447
		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
4448
			rc = kvm_s390_import_bp_data(vcpu, dbg);
4449
	} else {
4450
		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4451
		vcpu->arch.guestdbg.last_bp = 0;
4452
	}
4453

4454
	if (rc) {
4455
		vcpu->guest_debug = 0;
4456
		kvm_s390_clear_bp_data(vcpu);
4457
		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4458
	}
4459

4460
out:
4461
	vcpu_put(vcpu);
4462
	return rc;
4463
}
4464

4465
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4466
				    struct kvm_mp_state *mp_state)
4467
{
4468
	int ret;
4469

4470
	vcpu_load(vcpu);
4471

4472
	/* CHECK_STOP and LOAD are not supported yet */
4473
	ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
4474
				      KVM_MP_STATE_OPERATING;
4475

4476
	vcpu_put(vcpu);
4477
	return ret;
4478
}
4479

4480
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4481
				    struct kvm_mp_state *mp_state)
4482
{
4483
	int rc = 0;
4484

4485
	vcpu_load(vcpu);
4486

4487
	/* user space knows about this interface - let it control the state */
4488
	kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
4489

4490
	switch (mp_state->mp_state) {
4491
	case KVM_MP_STATE_STOPPED:
4492
		rc = kvm_s390_vcpu_stop(vcpu);
4493
		break;
4494
	case KVM_MP_STATE_OPERATING:
4495
		rc = kvm_s390_vcpu_start(vcpu);
4496
		break;
4497
	case KVM_MP_STATE_LOAD:
4498
		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4499
			rc = -ENXIO;
4500
			break;
4501
		}
4502
		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
4503
		break;
4504
	case KVM_MP_STATE_CHECK_STOP:
4505
		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
4506
	default:
4507
		rc = -ENXIO;
4508
	}
4509

4510
	vcpu_put(vcpu);
4511
	return rc;
4512
}
4513

4514
static bool ibs_enabled(struct kvm_vcpu *vcpu)
4515
{
4516
	return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
4517
}
4518

4519
static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags)
4520
{
4521
	struct kvm *kvm = gmap->private;
4522
	gfn_t gfn = gpa_to_gfn(gaddr);
4523
	bool unlocked;
4524
	hva_t vmaddr;
4525
	gpa_t tmp;
4526
	int rc;
4527

4528
	if (kvm_is_ucontrol(kvm)) {
4529
		tmp = __gmap_translate(gmap, gaddr);
4530
		gfn = gpa_to_gfn(tmp);
4531
	}
4532

4533
	vmaddr = gfn_to_hva(kvm, gfn);
4534
	rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked);
4535
	if (!rc)
4536
		rc = __gmap_link(gmap, gaddr, vmaddr);
4537
	return rc;
4538
}
4539

4540
/**
4541
 * __kvm_s390_mprotect_many() - Apply specified protection to guest pages
4542
 * @gmap: the gmap of the guest
4543
 * @gpa: the starting guest address
4544
 * @npages: how many pages to protect
4545
 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
4546
 * @bits: pgste notification bits to set
4547
 *
4548
 * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one()
4549
 *
4550
 * Context: kvm->srcu and gmap->mm need to be held in read mode
4551
 */
4552
int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot,
4553
			     unsigned long bits)
4554
{
4555
	unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
4556
	gpa_t end = gpa + npages * PAGE_SIZE;
4557
	int rc;
4558

4559
	for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) {
4560
		rc = gmap_protect_one(gmap, gpa, prot, bits);
4561
		if (rc == -EAGAIN) {
4562
			__kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag);
4563
			rc = gmap_protect_one(gmap, gpa, prot, bits);
4564
		}
4565
		if (rc < 0)
4566
			return rc;
4567
	}
4568

4569
	return 0;
4570
}
4571

4572
static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu)
4573
{
4574
	gpa_t gaddr = kvm_s390_get_prefix(vcpu);
4575
	int idx, rc;
4576

4577
	idx = srcu_read_lock(&vcpu->kvm->srcu);
4578
	mmap_read_lock(vcpu->arch.gmap->mm);
4579

4580
	rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT);
4581

4582
	mmap_read_unlock(vcpu->arch.gmap->mm);
4583
	srcu_read_unlock(&vcpu->kvm->srcu, idx);
4584

4585
	return rc;
4586
}
4587

4588
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
4589
{
4590
retry:
4591
	kvm_s390_vcpu_request_handled(vcpu);
4592
	if (!kvm_request_pending(vcpu))
4593
		return 0;
4594
	/*
4595
	 * If the guest prefix changed, re-arm the ipte notifier for the
4596
	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
4597
	 * This ensures that the ipte instruction for this request has
4598
	 * already finished. We might race against a second unmapper that
4599
	 * wants to set the blocking bit. Lets just retry the request loop.
4600
	 */
4601
	if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
4602
		int rc;
4603

4604
		rc = kvm_s390_mprotect_notify_prefix(vcpu);
4605
		if (rc) {
4606
			kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4607
			return rc;
4608
		}
4609
		goto retry;
4610
	}
4611

4612
	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
4613
		vcpu->arch.sie_block->ihcpu = 0xffff;
4614
		goto retry;
4615
	}
4616

4617
	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
4618
		if (!ibs_enabled(vcpu)) {
4619
			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
4620
			kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
4621
		}
4622
		goto retry;
4623
	}
4624

4625
	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
4626
		if (ibs_enabled(vcpu)) {
4627
			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
4628
			kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
4629
		}
4630
		goto retry;
4631
	}
4632

4633
	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
4634
		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
4635
		goto retry;
4636
	}
4637

4638
	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
4639
		/*
4640
		 * Disable CMM virtualization; we will emulate the ESSA
4641
		 * instruction manually, in order to provide additional
4642
		 * functionalities needed for live migration.
4643
		 */
4644
		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
4645
		goto retry;
4646
	}
4647

4648
	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
4649
		/*
4650
		 * Re-enable CMM virtualization if CMMA is available and
4651
		 * CMM has been used.
4652
		 */
4653
		if ((vcpu->kvm->arch.use_cmma) &&
4654
		    (vcpu->kvm->mm->context.uses_cmm))
4655
			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
4656
		goto retry;
4657
	}
4658

4659
	/* we left the vsie handler, nothing to do, just clear the request */
4660
	kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4661

4662
	return 0;
4663
}
4664

4665
static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4666
{
4667
	struct kvm_vcpu *vcpu;
4668
	union tod_clock clk;
4669
	unsigned long i;
4670

4671
	preempt_disable();
4672

4673
	store_tod_clock_ext(&clk);
4674

4675
	kvm->arch.epoch = gtod->tod - clk.tod;
4676
	kvm->arch.epdx = 0;
4677
	if (test_kvm_facility(kvm, 139)) {
4678
		kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4679
		if (kvm->arch.epoch > gtod->tod)
4680
			kvm->arch.epdx -= 1;
4681
	}
4682

4683
	kvm_s390_vcpu_block_all(kvm);
4684
	kvm_for_each_vcpu(i, vcpu, kvm) {
4685
		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4686
		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
4687
	}
4688

4689
	kvm_s390_vcpu_unblock_all(kvm);
4690
	preempt_enable();
4691
}
4692

4693
int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4694
{
4695
	if (!mutex_trylock(&kvm->lock))
4696
		return 0;
4697
	__kvm_s390_set_tod_clock(kvm, gtod);
4698
	mutex_unlock(&kvm->lock);
4699
	return 1;
4700
}
4701

4702
static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4703
				      unsigned long token)
4704
{
4705
	struct kvm_s390_interrupt inti;
4706
	struct kvm_s390_irq irq;
4707

4708
	if (start_token) {
4709
		irq.u.ext.ext_params2 = token;
4710
		irq.type = KVM_S390_INT_PFAULT_INIT;
4711
		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4712
	} else {
4713
		inti.type = KVM_S390_INT_PFAULT_DONE;
4714
		inti.parm64 = token;
4715
		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4716
	}
4717
}
4718

4719
bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4720
				     struct kvm_async_pf *work)
4721
{
4722
	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4723
	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4724

4725
	return true;
4726
}
4727

4728
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4729
				 struct kvm_async_pf *work)
4730
{
4731
	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4732
	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4733
}
4734

4735
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4736
			       struct kvm_async_pf *work)
4737
{
4738
	/* s390 will always inject the page directly */
4739
}
4740

4741
bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4742
{
4743
	/*
4744
	 * s390 will always inject the page directly,
4745
	 * but we still want check_async_completion to cleanup
4746
	 */
4747
	return true;
4748
}
4749

4750
static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4751
{
4752
	hva_t hva;
4753
	struct kvm_arch_async_pf arch;
4754

4755
	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4756
		return false;
4757
	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4758
	    vcpu->arch.pfault_compare)
4759
		return false;
4760
	if (psw_extint_disabled(vcpu))
4761
		return false;
4762
	if (kvm_s390_vcpu_has_irq(vcpu, 0))
4763
		return false;
4764
	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4765
		return false;
4766
	if (!vcpu->arch.gmap->pfault_enabled)
4767
		return false;
4768

4769
	hva = gfn_to_hva(vcpu->kvm, current->thread.gmap_teid.addr);
4770
	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4771
		return false;
4772

4773
	return kvm_setup_async_pf(vcpu, current->thread.gmap_teid.addr * PAGE_SIZE, hva, &arch);
4774
}
4775

4776
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4777
{
4778
	int rc, cpuflags;
4779

4780
	/*
4781
	 * On s390 notifications for arriving pages will be delivered directly
4782
	 * to the guest but the house keeping for completed pfaults is
4783
	 * handled outside the worker.
4784
	 */
4785
	kvm_check_async_pf_completion(vcpu);
4786

4787
	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4788
	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4789

4790
	if (need_resched())
4791
		schedule();
4792

4793
	if (!kvm_is_ucontrol(vcpu->kvm)) {
4794
		rc = kvm_s390_deliver_pending_interrupts(vcpu);
4795
		if (rc || guestdbg_exit_pending(vcpu))
4796
			return rc;
4797
	}
4798

4799
	rc = kvm_s390_handle_requests(vcpu);
4800
	if (rc)
4801
		return rc;
4802

4803
	if (guestdbg_enabled(vcpu)) {
4804
		kvm_s390_backup_guest_per_regs(vcpu);
4805
		kvm_s390_patch_guest_per_regs(vcpu);
4806
	}
4807

4808
	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4809

4810
	vcpu->arch.sie_block->icptcode = 0;
4811
	current->thread.gmap_int_code = 0;
4812
	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4813
	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4814
	trace_kvm_s390_sie_enter(vcpu, cpuflags);
4815

4816
	return 0;
4817
}
4818

4819
static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu)
4820
{
4821
	struct kvm_s390_pgm_info pgm_info = {
4822
		.code = PGM_ADDRESSING,
4823
	};
4824
	u8 opcode, ilen;
4825
	int rc;
4826

4827
	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4828
	trace_kvm_s390_sie_fault(vcpu);
4829

4830
	/*
4831
	 * We want to inject an addressing exception, which is defined as a
4832
	 * suppressing or terminating exception. However, since we came here
4833
	 * by a DAT access exception, the PSW still points to the faulting
4834
	 * instruction since DAT exceptions are nullifying. So we've got
4835
	 * to look up the current opcode to get the length of the instruction
4836
	 * to be able to forward the PSW.
4837
	 */
4838
	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4839
	ilen = insn_length(opcode);
4840
	if (rc < 0) {
4841
		return rc;
4842
	} else if (rc) {
4843
		/* Instruction-Fetching Exceptions - we can't detect the ilen.
4844
		 * Forward by arbitrary ilc, injection will take care of
4845
		 * nullification if necessary.
4846
		 */
4847
		pgm_info = vcpu->arch.pgm;
4848
		ilen = 4;
4849
	}
4850
	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4851
	kvm_s390_forward_psw(vcpu, ilen);
4852
	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4853
}
4854

4855
static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu)
4856
{
4857
	KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm,
4858
		"Unexpected program interrupt 0x%x, TEID 0x%016lx",
4859
		current->thread.gmap_int_code, current->thread.gmap_teid.val);
4860
}
4861

4862
/*
4863
 * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu
4864
 * @vcpu: the vCPU whose gmap is to be fixed up
4865
 * @gfn: the guest frame number used for memslots (including fake memslots)
4866
 * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps
4867
 * @flags: FOLL_* flags
4868
 *
4869
 * Return: 0 on success, < 0 in case of error.
4870
 * Context: The mm lock must not be held before calling. May sleep.
4871
 */
4872
int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags)
4873
{
4874
	struct kvm_memory_slot *slot;
4875
	unsigned int fault_flags;
4876
	bool writable, unlocked;
4877
	unsigned long vmaddr;
4878
	struct page *page;
4879
	kvm_pfn_t pfn;
4880
	int rc;
4881

4882
	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
4883
	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
4884
		return vcpu_post_run_addressing_exception(vcpu);
4885

4886
	fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
4887
	if (vcpu->arch.gmap->pfault_enabled)
4888
		flags |= FOLL_NOWAIT;
4889
	vmaddr = __gfn_to_hva_memslot(slot, gfn);
4890

4891
try_again:
4892
	pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page);
4893

4894
	/* Access outside memory, inject addressing exception */
4895
	if (is_noslot_pfn(pfn))
4896
		return vcpu_post_run_addressing_exception(vcpu);
4897
	/* Signal pending: try again */
4898
	if (pfn == KVM_PFN_ERR_SIGPENDING)
4899
		return -EAGAIN;
4900

4901
	/* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */
4902
	if (pfn == KVM_PFN_ERR_NEEDS_IO) {
4903
		trace_kvm_s390_major_guest_pfault(vcpu);
4904
		if (kvm_arch_setup_async_pf(vcpu))
4905
			return 0;
4906
		vcpu->stat.pfault_sync++;
4907
		/* Could not setup async pfault, try again synchronously */
4908
		flags &= ~FOLL_NOWAIT;
4909
		goto try_again;
4910
	}
4911
	/* Any other error */
4912
	if (is_error_pfn(pfn))
4913
		return -EFAULT;
4914

4915
	/* Success */
4916
	mmap_read_lock(vcpu->arch.gmap->mm);
4917
	/* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */
4918
	rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked);
4919
	if (!rc)
4920
		rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr);
4921
	scoped_guard(spinlock, &vcpu->kvm->mmu_lock) {
4922
		kvm_release_faultin_page(vcpu->kvm, page, false, writable);
4923
	}
4924
	mmap_read_unlock(vcpu->arch.gmap->mm);
4925
	return rc;
4926
}
4927

4928
static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags)
4929
{
4930
	unsigned long gaddr_tmp;
4931
	gfn_t gfn;
4932

4933
	gfn = gpa_to_gfn(gaddr);
4934
	if (kvm_is_ucontrol(vcpu->kvm)) {
4935
		/*
4936
		 * This translates the per-vCPU guest address into a
4937
		 * fake guest address, which can then be used with the
4938
		 * fake memslots that are identity mapping userspace.
4939
		 * This allows ucontrol VMs to use the normal fault
4940
		 * resolution path, like normal VMs.
4941
		 */
4942
		mmap_read_lock(vcpu->arch.gmap->mm);
4943
		gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr);
4944
		mmap_read_unlock(vcpu->arch.gmap->mm);
4945
		if (gaddr_tmp == -EFAULT) {
4946
			vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4947
			vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
4948
			vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION;
4949
			return -EREMOTE;
4950
		}
4951
		gfn = gpa_to_gfn(gaddr_tmp);
4952
	}
4953
	return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags);
4954
}
4955

4956
static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
4957
{
4958
	unsigned int flags = 0;
4959
	unsigned long gaddr;
4960
	int rc;
4961

4962
	gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
4963
	if (kvm_s390_cur_gmap_fault_is_write())
4964
		flags = FAULT_FLAG_WRITE;
4965

4966
	switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) {
4967
	case 0:
4968
		vcpu->stat.exit_null++;
4969
		break;
4970
	case PGM_SECURE_STORAGE_ACCESS:
4971
	case PGM_SECURE_STORAGE_VIOLATION:
4972
		kvm_s390_assert_primary_as(vcpu);
4973
		/*
4974
		 * This can happen after a reboot with asynchronous teardown;
4975
		 * the new guest (normal or protected) will run on top of the
4976
		 * previous protected guest. The old pages need to be destroyed
4977
		 * so the new guest can use them.
4978
		 */
4979
		if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) {
4980
			/*
4981
			 * Either KVM messed up the secure guest mapping or the
4982
			 * same page is mapped into multiple secure guests.
4983
			 *
4984
			 * This exception is only triggered when a guest 2 is
4985
			 * running and can therefore never occur in kernel
4986
			 * context.
4987
			 */
4988
			pr_warn_ratelimited("Secure storage violation (%x) in task: %s, pid %d\n",
4989
					    current->thread.gmap_int_code, current->comm,
4990
					    current->pid);
4991
			send_sig(SIGSEGV, current, 0);
4992
		}
4993
		break;
4994
	case PGM_NON_SECURE_STORAGE_ACCESS:
4995
		kvm_s390_assert_primary_as(vcpu);
4996
		/*
4997
		 * This is normal operation; a page belonging to a protected
4998
		 * guest has not been imported yet. Try to import the page into
4999
		 * the protected guest.
5000
		 */
5001
		rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr);
5002
		if (rc == -EINVAL)
5003
			send_sig(SIGSEGV, current, 0);
5004
		if (rc != -ENXIO)
5005
			break;
5006
		flags = FAULT_FLAG_WRITE;
5007
		fallthrough;
5008
	case PGM_PROTECTION:
5009
	case PGM_SEGMENT_TRANSLATION:
5010
	case PGM_PAGE_TRANSLATION:
5011
	case PGM_ASCE_TYPE:
5012
	case PGM_REGION_FIRST_TRANS:
5013
	case PGM_REGION_SECOND_TRANS:
5014
	case PGM_REGION_THIRD_TRANS:
5015
		kvm_s390_assert_primary_as(vcpu);
5016
		return vcpu_dat_fault_handler(vcpu, gaddr, flags);
5017
	default:
5018
		KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
5019
			current->thread.gmap_int_code, current->thread.gmap_teid.val);
5020
		send_sig(SIGSEGV, current, 0);
5021
		break;
5022
	}
5023
	return 0;
5024
}
5025

5026
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
5027
{
5028
	struct mcck_volatile_info *mcck_info;
5029
	struct sie_page *sie_page;
5030
	int rc;
5031

5032
	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
5033
		   vcpu->arch.sie_block->icptcode);
5034
	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
5035

5036
	if (guestdbg_enabled(vcpu))
5037
		kvm_s390_restore_guest_per_regs(vcpu);
5038

5039
	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
5040
	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
5041

5042
	if (exit_reason == -EINTR) {
5043
		VCPU_EVENT(vcpu, 3, "%s", "machine check");
5044
		sie_page = container_of(vcpu->arch.sie_block,
5045
					struct sie_page, sie_block);
5046
		mcck_info = &sie_page->mcck_info;
5047
		kvm_s390_reinject_machine_check(vcpu, mcck_info);
5048
		return 0;
5049
	}
5050

5051
	if (vcpu->arch.sie_block->icptcode > 0) {
5052
		rc = kvm_handle_sie_intercept(vcpu);
5053

5054
		if (rc != -EOPNOTSUPP)
5055
			return rc;
5056
		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
5057
		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
5058
		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
5059
		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
5060
		return -EREMOTE;
5061
	}
5062

5063
	return vcpu_post_run_handle_fault(vcpu);
5064
}
5065

5066
int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
5067
				    u64 *gprs, unsigned long gasce)
5068
{
5069
	int ret;
5070

5071
	guest_state_enter_irqoff();
5072

5073
	/*
5074
	 * The guest_state_{enter,exit}_irqoff() functions inform lockdep and
5075
	 * tracing that entry to the guest will enable host IRQs, and exit from
5076
	 * the guest will disable host IRQs.
5077
	 *
5078
	 * We must not use lockdep/tracing/RCU in this critical section, so we
5079
	 * use the low-level arch_local_irq_*() helpers to enable/disable IRQs.
5080
	 */
5081
	arch_local_irq_enable();
5082
	ret = sie64a(scb, gprs, gasce);
5083
	arch_local_irq_disable();
5084

5085
	guest_state_exit_irqoff();
5086

5087
	return ret;
5088
}
5089

5090
#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
5091
static int __vcpu_run(struct kvm_vcpu *vcpu)
5092
{
5093
	int rc, exit_reason;
5094
	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
5095

5096
	/*
5097
	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
5098
	 * ning the guest), so that memslots (and other stuff) are protected
5099
	 */
5100
	kvm_vcpu_srcu_read_lock(vcpu);
5101

5102
	do {
5103
		rc = vcpu_pre_run(vcpu);
5104
		if (rc || guestdbg_exit_pending(vcpu))
5105
			break;
5106

5107
		kvm_vcpu_srcu_read_unlock(vcpu);
5108
		/*
5109
		 * As PF_VCPU will be used in fault handler, between
5110
		 * guest_timing_enter_irqoff and guest_timing_exit_irqoff
5111
		 * should be no uaccess.
5112
		 */
5113
		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5114
			memcpy(sie_page->pv_grregs,
5115
			       vcpu->run->s.regs.gprs,
5116
			       sizeof(sie_page->pv_grregs));
5117
		}
5118

5119
		local_irq_disable();
5120
		guest_timing_enter_irqoff();
5121
		__disable_cpu_timer_accounting(vcpu);
5122

5123
		exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block,
5124
						      vcpu->run->s.regs.gprs,
5125
						      vcpu->arch.gmap->asce);
5126

5127
		__enable_cpu_timer_accounting(vcpu);
5128
		guest_timing_exit_irqoff();
5129
		local_irq_enable();
5130

5131
		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5132
			memcpy(vcpu->run->s.regs.gprs,
5133
			       sie_page->pv_grregs,
5134
			       sizeof(sie_page->pv_grregs));
5135
			/*
5136
			 * We're not allowed to inject interrupts on intercepts
5137
			 * that leave the guest state in an "in-between" state
5138
			 * where the next SIE entry will do a continuation.
5139
			 * Fence interrupts in our "internal" PSW.
5140
			 */
5141
			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
5142
			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
5143
				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5144
			}
5145
		}
5146
		kvm_vcpu_srcu_read_lock(vcpu);
5147

5148
		rc = vcpu_post_run(vcpu, exit_reason);
5149
	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
5150

5151
	kvm_vcpu_srcu_read_unlock(vcpu);
5152
	return rc;
5153
}
5154

5155
static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
5156
{
5157
	struct kvm_run *kvm_run = vcpu->run;
5158
	struct runtime_instr_cb *riccb;
5159
	struct gs_cb *gscb;
5160

5161
	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
5162
	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
5163
	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
5164
	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
5165
	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
5166
		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
5167
		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
5168
		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
5169
	}
5170
	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
5171
		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
5172
		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
5173
		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
5174
		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
5175
			kvm_clear_async_pf_completion_queue(vcpu);
5176
	}
5177
	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
5178
		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
5179
		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
5180
		VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
5181
	}
5182
	/*
5183
	 * If userspace sets the riccb (e.g. after migration) to a valid state,
5184
	 * we should enable RI here instead of doing the lazy enablement.
5185
	 */
5186
	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
5187
	    test_kvm_facility(vcpu->kvm, 64) &&
5188
	    riccb->v &&
5189
	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
5190
		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
5191
		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
5192
	}
5193
	/*
5194
	 * If userspace sets the gscb (e.g. after migration) to non-zero,
5195
	 * we should enable GS here instead of doing the lazy enablement.
5196
	 */
5197
	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
5198
	    test_kvm_facility(vcpu->kvm, 133) &&
5199
	    gscb->gssm &&
5200
	    !vcpu->arch.gs_enabled) {
5201
		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
5202
		vcpu->arch.sie_block->ecb |= ECB_GS;
5203
		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
5204
		vcpu->arch.gs_enabled = 1;
5205
	}
5206
	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
5207
	    test_kvm_facility(vcpu->kvm, 82)) {
5208
		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
5209
		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
5210
	}
5211
	if (cpu_has_gs()) {
5212
		preempt_disable();
5213
		local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
5214
		if (current->thread.gs_cb) {
5215
			vcpu->arch.host_gscb = current->thread.gs_cb;
5216
			save_gs_cb(vcpu->arch.host_gscb);
5217
		}
5218
		if (vcpu->arch.gs_enabled) {
5219
			current->thread.gs_cb = (struct gs_cb *)
5220
						&vcpu->run->s.regs.gscb;
5221
			restore_gs_cb(current->thread.gs_cb);
5222
		}
5223
		preempt_enable();
5224
	}
5225
	/* SIE will load etoken directly from SDNX and therefore kvm_run */
5226
}
5227

5228
static void sync_regs(struct kvm_vcpu *vcpu)
5229
{
5230
	struct kvm_run *kvm_run = vcpu->run;
5231

5232
	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
5233
		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
5234
	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
5235
		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
5236
		/* some control register changes require a tlb flush */
5237
		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5238
	}
5239
	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
5240
		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
5241
		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
5242
	}
5243
	save_access_regs(vcpu->arch.host_acrs);
5244
	restore_access_regs(vcpu->run->s.regs.acrs);
5245
	vcpu->arch.acrs_loaded = true;
5246
	kvm_s390_fpu_load(vcpu->run);
5247
	/* Sync fmt2 only data */
5248
	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
5249
		sync_regs_fmt2(vcpu);
5250
	} else {
5251
		/*
5252
		 * In several places we have to modify our internal view to
5253
		 * not do things that are disallowed by the ultravisor. For
5254
		 * example we must not inject interrupts after specific exits
5255
		 * (e.g. 112 prefix page not secure). We do this by turning
5256
		 * off the machine check, external and I/O interrupt bits
5257
		 * of our PSW copy. To avoid getting validity intercepts, we
5258
		 * do only accept the condition code from userspace.
5259
		 */
5260
		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
5261
		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
5262
						   PSW_MASK_CC;
5263
	}
5264

5265
	kvm_run->kvm_dirty_regs = 0;
5266
}
5267

5268
static void store_regs_fmt2(struct kvm_vcpu *vcpu)
5269
{
5270
	struct kvm_run *kvm_run = vcpu->run;
5271

5272
	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
5273
	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
5274
	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
5275
	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
5276
	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
5277
	if (cpu_has_gs()) {
5278
		preempt_disable();
5279
		local_ctl_set_bit(2, CR2_GUARDED_STORAGE_BIT);
5280
		if (vcpu->arch.gs_enabled)
5281
			save_gs_cb(current->thread.gs_cb);
5282
		current->thread.gs_cb = vcpu->arch.host_gscb;
5283
		restore_gs_cb(vcpu->arch.host_gscb);
5284
		if (!vcpu->arch.host_gscb)
5285
			local_ctl_clear_bit(2, CR2_GUARDED_STORAGE_BIT);
5286
		vcpu->arch.host_gscb = NULL;
5287
		preempt_enable();
5288
	}
5289
	/* SIE will save etoken directly into SDNX and therefore kvm_run */
5290
}
5291

5292
static void store_regs(struct kvm_vcpu *vcpu)
5293
{
5294
	struct kvm_run *kvm_run = vcpu->run;
5295

5296
	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
5297
	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
5298
	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
5299
	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
5300
	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
5301
	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
5302
	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
5303
	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
5304
	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
5305
	save_access_regs(vcpu->run->s.regs.acrs);
5306
	restore_access_regs(vcpu->arch.host_acrs);
5307
	vcpu->arch.acrs_loaded = false;
5308
	kvm_s390_fpu_store(vcpu->run);
5309
	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
5310
		store_regs_fmt2(vcpu);
5311
}
5312

5313
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
5314
{
5315
	struct kvm_run *kvm_run = vcpu->run;
5316
	DECLARE_KERNEL_FPU_ONSTACK32(fpu);
5317
	int rc;
5318

5319
	/*
5320
	 * Running a VM while dumping always has the potential to
5321
	 * produce inconsistent dump data. But for PV vcpus a SIE
5322
	 * entry while dumping could also lead to a fatal validity
5323
	 * intercept which we absolutely want to avoid.
5324
	 */
5325
	if (vcpu->kvm->arch.pv.dumping)
5326
		return -EINVAL;
5327

5328
	if (!vcpu->wants_to_run)
5329
		return -EINTR;
5330

5331
	if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
5332
	    kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
5333
		return -EINVAL;
5334

5335
	vcpu_load(vcpu);
5336

5337
	if (guestdbg_exit_pending(vcpu)) {
5338
		kvm_s390_prepare_debug_exit(vcpu);
5339
		rc = 0;
5340
		goto out;
5341
	}
5342

5343
	kvm_sigset_activate(vcpu);
5344

5345
	/*
5346
	 * no need to check the return value of vcpu_start as it can only have
5347
	 * an error for protvirt, but protvirt means user cpu state
5348
	 */
5349
	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
5350
		kvm_s390_vcpu_start(vcpu);
5351
	} else if (is_vcpu_stopped(vcpu)) {
5352
		pr_err_ratelimited("can't run stopped vcpu %d\n",
5353
				   vcpu->vcpu_id);
5354
		rc = -EINVAL;
5355
		goto out;
5356
	}
5357

5358
	kernel_fpu_begin(&fpu, KERNEL_FPC | KERNEL_VXR);
5359
	sync_regs(vcpu);
5360
	enable_cpu_timer_accounting(vcpu);
5361

5362
	might_fault();
5363
	rc = __vcpu_run(vcpu);
5364

5365
	if (signal_pending(current) && !rc) {
5366
		kvm_run->exit_reason = KVM_EXIT_INTR;
5367
		rc = -EINTR;
5368
	}
5369

5370
	if (guestdbg_exit_pending(vcpu) && !rc)  {
5371
		kvm_s390_prepare_debug_exit(vcpu);
5372
		rc = 0;
5373
	}
5374

5375
	if (rc == -EREMOTE) {
5376
		/* userspace support is needed, kvm_run has been prepared */
5377
		rc = 0;
5378
	}
5379

5380
	disable_cpu_timer_accounting(vcpu);
5381
	store_regs(vcpu);
5382
	kernel_fpu_end(&fpu, KERNEL_FPC | KERNEL_VXR);
5383

5384
	kvm_sigset_deactivate(vcpu);
5385

5386
	vcpu->stat.exit_userspace++;
5387
out:
5388
	vcpu_put(vcpu);
5389
	return rc;
5390
}
5391

5392
/*
5393
 * store status at address
5394
 * we use have two special cases:
5395
 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
5396
 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
5397
 */
5398
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
5399
{
5400
	unsigned char archmode = 1;
5401
	freg_t fprs[NUM_FPRS];
5402
	unsigned int px;
5403
	u64 clkcomp, cputm;
5404
	int rc;
5405

5406
	px = kvm_s390_get_prefix(vcpu);
5407
	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
5408
		if (write_guest_abs(vcpu, 163, &archmode, 1))
5409
			return -EFAULT;
5410
		gpa = 0;
5411
	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
5412
		if (write_guest_real(vcpu, 163, &archmode, 1))
5413
			return -EFAULT;
5414
		gpa = px;
5415
	} else
5416
		gpa -= __LC_FPREGS_SAVE_AREA;
5417

5418
	/* manually convert vector registers if necessary */
5419
	if (cpu_has_vx()) {
5420
		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
5421
		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5422
				     fprs, 128);
5423
	} else {
5424
		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
5425
				     vcpu->run->s.regs.fprs, 128);
5426
	}
5427
	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
5428
			      vcpu->run->s.regs.gprs, 128);
5429
	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
5430
			      &vcpu->arch.sie_block->gpsw, 16);
5431
	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
5432
			      &px, 4);
5433
	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
5434
			      &vcpu->run->s.regs.fpc, 4);
5435
	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
5436
			      &vcpu->arch.sie_block->todpr, 4);
5437
	cputm = kvm_s390_get_cpu_timer(vcpu);
5438
	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
5439
			      &cputm, 8);
5440
	clkcomp = vcpu->arch.sie_block->ckc >> 8;
5441
	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
5442
			      &clkcomp, 8);
5443
	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
5444
			      &vcpu->run->s.regs.acrs, 64);
5445
	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
5446
			      &vcpu->arch.sie_block->gcr, 128);
5447
	return rc ? -EFAULT : 0;
5448
}
5449

5450
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
5451
{
5452
	/*
5453
	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
5454
	 * switch in the run ioctl. Let's update our copies before we save
5455
	 * it into the save area
5456
	 */
5457
	kvm_s390_fpu_store(vcpu->run);
5458
	save_access_regs(vcpu->run->s.regs.acrs);
5459

5460
	return kvm_s390_store_status_unloaded(vcpu, addr);
5461
}
5462

5463
static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5464
{
5465
	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
5466
	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
5467
}
5468

5469
static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
5470
{
5471
	unsigned long i;
5472
	struct kvm_vcpu *vcpu;
5473

5474
	kvm_for_each_vcpu(i, vcpu, kvm) {
5475
		__disable_ibs_on_vcpu(vcpu);
5476
	}
5477
}
5478

5479
static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5480
{
5481
	if (!sclp.has_ibs)
5482
		return;
5483
	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
5484
	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
5485
}
5486

5487
int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
5488
{
5489
	int i, online_vcpus, r = 0, started_vcpus = 0;
5490

5491
	if (!is_vcpu_stopped(vcpu))
5492
		return 0;
5493

5494
	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
5495
	/* Only one cpu at a time may enter/leave the STOPPED state. */
5496
	spin_lock(&vcpu->kvm->arch.start_stop_lock);
5497
	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5498

5499
	/* Let's tell the UV that we want to change into the operating state */
5500
	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5501
		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
5502
		if (r) {
5503
			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5504
			return r;
5505
		}
5506
	}
5507

5508
	for (i = 0; i < online_vcpus; i++) {
5509
		if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
5510
			started_vcpus++;
5511
	}
5512

5513
	if (started_vcpus == 0) {
5514
		/* we're the only active VCPU -> speed it up */
5515
		__enable_ibs_on_vcpu(vcpu);
5516
	} else if (started_vcpus == 1) {
5517
		/*
5518
		 * As we are starting a second VCPU, we have to disable
5519
		 * the IBS facility on all VCPUs to remove potentially
5520
		 * outstanding ENABLE requests.
5521
		 */
5522
		__disable_ibs_on_all_vcpus(vcpu->kvm);
5523
	}
5524

5525
	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
5526
	/*
5527
	 * The real PSW might have changed due to a RESTART interpreted by the
5528
	 * ultravisor. We block all interrupts and let the next sie exit
5529
	 * refresh our view.
5530
	 */
5531
	if (kvm_s390_pv_cpu_is_protected(vcpu))
5532
		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5533
	/*
5534
	 * Another VCPU might have used IBS while we were offline.
5535
	 * Let's play safe and flush the VCPU at startup.
5536
	 */
5537
	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5538
	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5539
	return 0;
5540
}
5541

5542
int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
5543
{
5544
	int i, online_vcpus, r = 0, started_vcpus = 0;
5545
	struct kvm_vcpu *started_vcpu = NULL;
5546

5547
	if (is_vcpu_stopped(vcpu))
5548
		return 0;
5549

5550
	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
5551
	/* Only one cpu at a time may enter/leave the STOPPED state. */
5552
	spin_lock(&vcpu->kvm->arch.start_stop_lock);
5553
	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5554

5555
	/* Let's tell the UV that we want to change into the stopped state */
5556
	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5557
		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
5558
		if (r) {
5559
			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5560
			return r;
5561
		}
5562
	}
5563

5564
	/*
5565
	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
5566
	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
5567
	 * have been fully processed. This will ensure that the VCPU
5568
	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
5569
	 */
5570
	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
5571
	kvm_s390_clear_stop_irq(vcpu);
5572

5573
	__disable_ibs_on_vcpu(vcpu);
5574

5575
	for (i = 0; i < online_vcpus; i++) {
5576
		struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
5577

5578
		if (!is_vcpu_stopped(tmp)) {
5579
			started_vcpus++;
5580
			started_vcpu = tmp;
5581
		}
5582
	}
5583

5584
	if (started_vcpus == 1) {
5585
		/*
5586
		 * As we only have one VCPU left, we want to enable the
5587
		 * IBS facility for that VCPU to speed it up.
5588
		 */
5589
		__enable_ibs_on_vcpu(started_vcpu);
5590
	}
5591

5592
	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5593
	return 0;
5594
}
5595

5596
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5597
				     struct kvm_enable_cap *cap)
5598
{
5599
	int r;
5600

5601
	if (cap->flags)
5602
		return -EINVAL;
5603

5604
	switch (cap->cap) {
5605
	case KVM_CAP_S390_CSS_SUPPORT:
5606
		if (!vcpu->kvm->arch.css_support) {
5607
			vcpu->kvm->arch.css_support = 1;
5608
			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
5609
			trace_kvm_s390_enable_css(vcpu->kvm);
5610
		}
5611
		r = 0;
5612
		break;
5613
	default:
5614
		r = -EINVAL;
5615
		break;
5616
	}
5617
	return r;
5618
}
5619

5620
static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
5621
				  struct kvm_s390_mem_op *mop)
5622
{
5623
	void __user *uaddr = (void __user *)mop->buf;
5624
	void *sida_addr;
5625
	int r = 0;
5626

5627
	if (mop->flags || !mop->size)
5628
		return -EINVAL;
5629
	if (mop->size + mop->sida_offset < mop->size)
5630
		return -EINVAL;
5631
	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
5632
		return -E2BIG;
5633
	if (!kvm_s390_pv_cpu_is_protected(vcpu))
5634
		return -EINVAL;
5635

5636
	sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
5637

5638
	switch (mop->op) {
5639
	case KVM_S390_MEMOP_SIDA_READ:
5640
		if (copy_to_user(uaddr, sida_addr, mop->size))
5641
			r = -EFAULT;
5642

5643
		break;
5644
	case KVM_S390_MEMOP_SIDA_WRITE:
5645
		if (copy_from_user(sida_addr, uaddr, mop->size))
5646
			r = -EFAULT;
5647
		break;
5648
	}
5649
	return r;
5650
}
5651

5652
static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
5653
				 struct kvm_s390_mem_op *mop)
5654
{
5655
	void __user *uaddr = (void __user *)mop->buf;
5656
	enum gacc_mode acc_mode;
5657
	void *tmpbuf = NULL;
5658
	int r;
5659

5660
	r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_INJECT_EXCEPTION |
5661
					KVM_S390_MEMOP_F_CHECK_ONLY |
5662
					KVM_S390_MEMOP_F_SKEY_PROTECTION);
5663
	if (r)
5664
		return r;
5665
	if (mop->ar >= NUM_ACRS)
5666
		return -EINVAL;
5667
	if (kvm_s390_pv_cpu_is_protected(vcpu))
5668
		return -EINVAL;
5669
	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
5670
		tmpbuf = vmalloc(mop->size);
5671
		if (!tmpbuf)
5672
			return -ENOMEM;
5673
	}
5674

5675
	acc_mode = mop->op == KVM_S390_MEMOP_LOGICAL_READ ? GACC_FETCH : GACC_STORE;
5676
	if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5677
		r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5678
				    acc_mode, mop->key);
5679
		goto out_inject;
5680
	}
5681
	if (acc_mode == GACC_FETCH) {
5682
		r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5683
					mop->size, mop->key);
5684
		if (r)
5685
			goto out_inject;
5686
		if (copy_to_user(uaddr, tmpbuf, mop->size)) {
5687
			r = -EFAULT;
5688
			goto out_free;
5689
		}
5690
	} else {
5691
		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
5692
			r = -EFAULT;
5693
			goto out_free;
5694
		}
5695
		r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5696
					 mop->size, mop->key);
5697
	}
5698

5699
out_inject:
5700
	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
5701
		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
5702

5703
out_free:
5704
	vfree(tmpbuf);
5705
	return r;
5706
}
5707

5708
static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
5709
				     struct kvm_s390_mem_op *mop)
5710
{
5711
	int r, srcu_idx;
5712

5713
	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5714

5715
	switch (mop->op) {
5716
	case KVM_S390_MEMOP_LOGICAL_READ:
5717
	case KVM_S390_MEMOP_LOGICAL_WRITE:
5718
		r = kvm_s390_vcpu_mem_op(vcpu, mop);
5719
		break;
5720
	case KVM_S390_MEMOP_SIDA_READ:
5721
	case KVM_S390_MEMOP_SIDA_WRITE:
5722
		/* we are locked against sida going away by the vcpu->mutex */
5723
		r = kvm_s390_vcpu_sida_op(vcpu, mop);
5724
		break;
5725
	default:
5726
		r = -EINVAL;
5727
	}
5728

5729
	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
5730
	return r;
5731
}
5732

5733
long kvm_arch_vcpu_async_ioctl(struct file *filp,
5734
			       unsigned int ioctl, unsigned long arg)
5735
{
5736
	struct kvm_vcpu *vcpu = filp->private_data;
5737
	void __user *argp = (void __user *)arg;
5738
	int rc;
5739

5740
	switch (ioctl) {
5741
	case KVM_S390_IRQ: {
5742
		struct kvm_s390_irq s390irq;
5743

5744
		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
5745
			return -EFAULT;
5746
		rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
5747
		break;
5748
	}
5749
	case KVM_S390_INTERRUPT: {
5750
		struct kvm_s390_interrupt s390int;
5751
		struct kvm_s390_irq s390irq = {};
5752

5753
		if (copy_from_user(&s390int, argp, sizeof(s390int)))
5754
			return -EFAULT;
5755
		if (s390int_to_s390irq(&s390int, &s390irq))
5756
			return -EINVAL;
5757
		rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
5758
		break;
5759
	}
5760
	default:
5761
		rc = -ENOIOCTLCMD;
5762
		break;
5763
	}
5764

5765
	/*
5766
	 * To simplify single stepping of userspace-emulated instructions,
5767
	 * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
5768
	 * should_handle_per_ifetch()). However, if userspace emulation injects
5769
	 * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
5770
	 * after (and not before) the interrupt delivery.
5771
	 */
5772
	if (!rc)
5773
		vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
5774

5775
	return rc;
5776
}
5777

5778
static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
5779
					struct kvm_pv_cmd *cmd)
5780
{
5781
	struct kvm_s390_pv_dmp dmp;
5782
	void *data;
5783
	int ret;
5784

5785
	/* Dump initialization is a prerequisite */
5786
	if (!vcpu->kvm->arch.pv.dumping)
5787
		return -EINVAL;
5788

5789
	if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
5790
		return -EFAULT;
5791

5792
	/* We only handle this subcmd right now */
5793
	if (dmp.subcmd != KVM_PV_DUMP_CPU)
5794
		return -EINVAL;
5795

5796
	/* CPU dump length is the same as create cpu storage donation. */
5797
	if (dmp.buff_len != uv_info.guest_cpu_stor_len)
5798
		return -EINVAL;
5799

5800
	data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
5801
	if (!data)
5802
		return -ENOMEM;
5803

5804
	ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
5805

5806
	VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
5807
		   vcpu->vcpu_id, cmd->rc, cmd->rrc);
5808

5809
	if (ret)
5810
		ret = -EINVAL;
5811

5812
	/* On success copy over the dump data */
5813
	if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
5814
		ret = -EFAULT;
5815

5816
	kvfree(data);
5817
	return ret;
5818
}
5819

5820
long kvm_arch_vcpu_ioctl(struct file *filp,
5821
			 unsigned int ioctl, unsigned long arg)
5822
{
5823
	struct kvm_vcpu *vcpu = filp->private_data;
5824
	void __user *argp = (void __user *)arg;
5825
	int idx;
5826
	long r;
5827
	u16 rc, rrc;
5828

5829
	vcpu_load(vcpu);
5830

5831
	switch (ioctl) {
5832
	case KVM_S390_STORE_STATUS:
5833
		idx = srcu_read_lock(&vcpu->kvm->srcu);
5834
		r = kvm_s390_store_status_unloaded(vcpu, arg);
5835
		srcu_read_unlock(&vcpu->kvm->srcu, idx);
5836
		break;
5837
	case KVM_S390_SET_INITIAL_PSW: {
5838
		psw_t psw;
5839

5840
		r = -EFAULT;
5841
		if (copy_from_user(&psw, argp, sizeof(psw)))
5842
			break;
5843
		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
5844
		break;
5845
	}
5846
	case KVM_S390_CLEAR_RESET:
5847
		r = 0;
5848
		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
5849
		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5850
			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5851
					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
5852
			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
5853
				   rc, rrc);
5854
		}
5855
		break;
5856
	case KVM_S390_INITIAL_RESET:
5857
		r = 0;
5858
		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
5859
		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5860
			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5861
					  UVC_CMD_CPU_RESET_INITIAL,
5862
					  &rc, &rrc);
5863
			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
5864
				   rc, rrc);
5865
		}
5866
		break;
5867
	case KVM_S390_NORMAL_RESET:
5868
		r = 0;
5869
		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
5870
		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5871
			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5872
					  UVC_CMD_CPU_RESET, &rc, &rrc);
5873
			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
5874
				   rc, rrc);
5875
		}
5876
		break;
5877
	case KVM_SET_ONE_REG:
5878
	case KVM_GET_ONE_REG: {
5879
		struct kvm_one_reg reg;
5880
		r = -EINVAL;
5881
		if (kvm_s390_pv_cpu_is_protected(vcpu))
5882
			break;
5883
		r = -EFAULT;
5884
		if (copy_from_user(&reg, argp, sizeof(reg)))
5885
			break;
5886
		if (ioctl == KVM_SET_ONE_REG)
5887
			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
5888
		else
5889
			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
5890
		break;
5891
	}
5892
#ifdef CONFIG_KVM_S390_UCONTROL
5893
	case KVM_S390_UCAS_MAP: {
5894
		struct kvm_s390_ucas_mapping ucasmap;
5895

5896
		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5897
			r = -EFAULT;
5898
			break;
5899
		}
5900

5901
		if (!kvm_is_ucontrol(vcpu->kvm)) {
5902
			r = -EINVAL;
5903
			break;
5904
		}
5905

5906
		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5907
				     ucasmap.vcpu_addr, ucasmap.length);
5908
		break;
5909
	}
5910
	case KVM_S390_UCAS_UNMAP: {
5911
		struct kvm_s390_ucas_mapping ucasmap;
5912

5913
		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5914
			r = -EFAULT;
5915
			break;
5916
		}
5917

5918
		if (!kvm_is_ucontrol(vcpu->kvm)) {
5919
			r = -EINVAL;
5920
			break;
5921
		}
5922

5923
		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5924
			ucasmap.length);
5925
		break;
5926
	}
5927
#endif
5928
	case KVM_S390_VCPU_FAULT: {
5929
		idx = srcu_read_lock(&vcpu->kvm->srcu);
5930
		r = vcpu_dat_fault_handler(vcpu, arg, 0);
5931
		srcu_read_unlock(&vcpu->kvm->srcu, idx);
5932
		break;
5933
	}
5934
	case KVM_ENABLE_CAP:
5935
	{
5936
		struct kvm_enable_cap cap;
5937
		r = -EFAULT;
5938
		if (copy_from_user(&cap, argp, sizeof(cap)))
5939
			break;
5940
		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5941
		break;
5942
	}
5943
	case KVM_S390_MEM_OP: {
5944
		struct kvm_s390_mem_op mem_op;
5945

5946
		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5947
			r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5948
		else
5949
			r = -EFAULT;
5950
		break;
5951
	}
5952
	case KVM_S390_SET_IRQ_STATE: {
5953
		struct kvm_s390_irq_state irq_state;
5954

5955
		r = -EFAULT;
5956
		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5957
			break;
5958
		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5959
		    irq_state.len == 0 ||
5960
		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5961
			r = -EINVAL;
5962
			break;
5963
		}
5964
		/* do not use irq_state.flags, it will break old QEMUs */
5965
		r = kvm_s390_set_irq_state(vcpu,
5966
					   (void __user *) irq_state.buf,
5967
					   irq_state.len);
5968
		break;
5969
	}
5970
	case KVM_S390_GET_IRQ_STATE: {
5971
		struct kvm_s390_irq_state irq_state;
5972

5973
		r = -EFAULT;
5974
		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5975
			break;
5976
		if (irq_state.len == 0) {
5977
			r = -EINVAL;
5978
			break;
5979
		}
5980
		/* do not use irq_state.flags, it will break old QEMUs */
5981
		r = kvm_s390_get_irq_state(vcpu,
5982
					   (__u8 __user *)  irq_state.buf,
5983
					   irq_state.len);
5984
		break;
5985
	}
5986
	case KVM_S390_PV_CPU_COMMAND: {
5987
		struct kvm_pv_cmd cmd;
5988

5989
		r = -EINVAL;
5990
		if (!is_prot_virt_host())
5991
			break;
5992

5993
		r = -EFAULT;
5994
		if (copy_from_user(&cmd, argp, sizeof(cmd)))
5995
			break;
5996

5997
		r = -EINVAL;
5998
		if (cmd.flags)
5999
			break;
6000

6001
		/* We only handle this cmd right now */
6002
		if (cmd.cmd != KVM_PV_DUMP)
6003
			break;
6004

6005
		r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
6006

6007
		/* Always copy over UV rc / rrc data */
6008
		if (copy_to_user((__u8 __user *)argp, &cmd.rc,
6009
				 sizeof(cmd.rc) + sizeof(cmd.rrc)))
6010
			r = -EFAULT;
6011
		break;
6012
	}
6013
	default:
6014
		r = -ENOTTY;
6015
	}
6016

6017
	vcpu_put(vcpu);
6018
	return r;
6019
}
6020

6021
vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
6022
{
6023
#ifdef CONFIG_KVM_S390_UCONTROL
6024
	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
6025
		 && (kvm_is_ucontrol(vcpu->kvm))) {
6026
		vmf->page = virt_to_page(vcpu->arch.sie_block);
6027
		get_page(vmf->page);
6028
		return 0;
6029
	}
6030
#endif
6031
	return VM_FAULT_SIGBUS;
6032
}
6033

6034
bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
6035
{
6036
	return true;
6037
}
6038

6039
/* Section: memory related */
6040
int kvm_arch_prepare_memory_region(struct kvm *kvm,
6041
				   const struct kvm_memory_slot *old,
6042
				   struct kvm_memory_slot *new,
6043
				   enum kvm_mr_change change)
6044
{
6045
	gpa_t size;
6046

6047
	if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS)
6048
		return -EINVAL;
6049

6050
	/* When we are protected, we should not change the memory slots */
6051
	if (kvm_s390_pv_get_handle(kvm))
6052
		return -EINVAL;
6053

6054
	if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) {
6055
		/*
6056
		 * A few sanity checks. We can have memory slots which have to be
6057
		 * located/ended at a segment boundary (1MB). The memory in userland is
6058
		 * ok to be fragmented into various different vmas. It is okay to mmap()
6059
		 * and munmap() stuff in this slot after doing this call at any time
6060
		 */
6061

6062
		if (new->userspace_addr & 0xffffful)
6063
			return -EINVAL;
6064

6065
		size = new->npages * PAGE_SIZE;
6066
		if (size & 0xffffful)
6067
			return -EINVAL;
6068

6069
		if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
6070
			return -EINVAL;
6071
	}
6072

6073
	if (!kvm->arch.migration_mode)
6074
		return 0;
6075

6076
	/*
6077
	 * Turn off migration mode when:
6078
	 * - userspace creates a new memslot with dirty logging off,
6079
	 * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
6080
	 *   dirty logging is turned off.
6081
	 * Migration mode expects dirty page logging being enabled to store
6082
	 * its dirty bitmap.
6083
	 */
6084
	if (change != KVM_MR_DELETE &&
6085
	    !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
6086
		WARN(kvm_s390_vm_stop_migration(kvm),
6087
		     "Failed to stop migration mode");
6088

6089
	return 0;
6090
}
6091

6092
void kvm_arch_commit_memory_region(struct kvm *kvm,
6093
				struct kvm_memory_slot *old,
6094
				const struct kvm_memory_slot *new,
6095
				enum kvm_mr_change change)
6096
{
6097
	int rc = 0;
6098

6099
	if (kvm_is_ucontrol(kvm))
6100
		return;
6101

6102
	switch (change) {
6103
	case KVM_MR_DELETE:
6104
		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
6105
					old->npages * PAGE_SIZE);
6106
		break;
6107
	case KVM_MR_MOVE:
6108
		rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
6109
					old->npages * PAGE_SIZE);
6110
		if (rc)
6111
			break;
6112
		fallthrough;
6113
	case KVM_MR_CREATE:
6114
		rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
6115
				      new->base_gfn * PAGE_SIZE,
6116
				      new->npages * PAGE_SIZE);
6117
		break;
6118
	case KVM_MR_FLAGS_ONLY:
6119
		break;
6120
	default:
6121
		WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
6122
	}
6123
	if (rc)
6124
		pr_warn("failed to commit memory region\n");
6125
	return;
6126
}
6127

6128
static inline unsigned long nonhyp_mask(int i)
6129
{
6130
	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
6131

6132
	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
6133
}
6134

6135
static int __init kvm_s390_init(void)
6136
{
6137
	int i, r;
6138

6139
	if (!sclp.has_sief2) {
6140
		pr_info("SIE is not available\n");
6141
		return -ENODEV;
6142
	}
6143

6144
	if (nested && hpage) {
6145
		pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
6146
		return -EINVAL;
6147
	}
6148

6149
	for (i = 0; i < 16; i++)
6150
		kvm_s390_fac_base[i] |=
6151
			stfle_fac_list[i] & nonhyp_mask(i);
6152

6153
	r = __kvm_s390_init();
6154
	if (r)
6155
		return r;
6156

6157
	r = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
6158
	if (r) {
6159
		__kvm_s390_exit();
6160
		return r;
6161
	}
6162
	return 0;
6163
}
6164

6165
static void __exit kvm_s390_exit(void)
6166
{
6167
	kvm_exit();
6168

6169
	__kvm_s390_exit();
6170
}
6171

6172
module_init(kvm_s390_init);
6173
module_exit(kvm_s390_exit);
6174

6175
/*
6176
 * Enable autoloading of the kvm module.
6177
 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
6178
 * since x86 takes a different approach.
6179
 */
6180
#include <linux/miscdevice.h>
6181
MODULE_ALIAS_MISCDEV(KVM_MINOR);
6182
MODULE_ALIAS("devname:kvm");
6183

6184
Product

Resources

Company