Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/amd64/vmm/intel/vmx_msr.c
39536 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2011 NetApp, Inc.
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
#include <sys/param.h>
30
#include <sys/systm.h>
31
#include <sys/proc.h>
32
33
#include <machine/clock.h>
34
#include <machine/cpufunc.h>
35
#include <machine/md_var.h>
36
#include <machine/pcb.h>
37
#include <machine/specialreg.h>
38
#include <machine/vmm.h>
39
40
#include "vmx.h"
41
#include "vmx_msr.h"
42
#include "x86.h"
43
44
static bool
45
vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
46
{
47
48
return ((msr_val & (1UL << (bitpos + 32))) != 0);
49
}
50
51
static bool
52
vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
53
{
54
55
return ((msr_val & (1UL << bitpos)) == 0);
56
}
57
58
uint32_t
59
vmx_revision(void)
60
{
61
62
return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
63
}
64
65
/*
66
* Generate a bitmask to be used for the VMCS execution control fields.
67
*
68
* The caller specifies what bits should be set to one in 'ones_mask'
69
* and what bits should be set to zero in 'zeros_mask'. The don't-care
70
* bits are set to the default value. The default values are obtained
71
* based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
72
* VMX Capabilities".
73
*
74
* Returns zero on success and non-zero on error.
75
*/
76
int
77
vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
78
uint32_t zeros_mask, uint32_t *retval)
79
{
80
int i;
81
uint64_t val, trueval;
82
bool true_ctls_avail, one_allowed, zero_allowed;
83
84
/* We cannot ask the same bit to be set to both '1' and '0' */
85
if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
86
return (EINVAL);
87
88
true_ctls_avail = (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) != 0;
89
90
val = rdmsr(ctl_reg);
91
if (true_ctls_avail)
92
trueval = rdmsr(true_ctl_reg); /* step c */
93
else
94
trueval = val; /* step a */
95
96
for (i = 0; i < 32; i++) {
97
one_allowed = vmx_ctl_allows_one_setting(trueval, i);
98
zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
99
100
KASSERT(one_allowed || zero_allowed,
101
("invalid zero/one setting for bit %d of ctl 0x%0x, "
102
"truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
103
104
if (zero_allowed && !one_allowed) { /* b(i),c(i) */
105
if (ones_mask & (1 << i))
106
return (EINVAL);
107
*retval &= ~(1 << i);
108
} else if (one_allowed && !zero_allowed) { /* b(i),c(i) */
109
if (zeros_mask & (1 << i))
110
return (EINVAL);
111
*retval |= 1 << i;
112
} else {
113
if (zeros_mask & (1 << i)) /* b(ii),c(ii) */
114
*retval &= ~(1 << i);
115
else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
116
*retval |= 1 << i;
117
else if (!true_ctls_avail)
118
*retval &= ~(1 << i); /* b(iii) */
119
else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
120
*retval &= ~(1 << i);
121
else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
122
*retval |= 1 << i;
123
else {
124
panic("vmx_set_ctlreg: unable to determine "
125
"correct value of ctl bit %d for msr "
126
"0x%0x and true msr 0x%0x", i, ctl_reg,
127
true_ctl_reg);
128
}
129
}
130
}
131
132
return (0);
133
}
134
135
void
136
msr_bitmap_initialize(char *bitmap)
137
{
138
139
memset(bitmap, 0xff, PAGE_SIZE);
140
}
141
142
int
143
msr_bitmap_change_access(char *bitmap, u_int msr, int access)
144
{
145
int byte, bit;
146
147
if (msr <= 0x00001FFF)
148
byte = msr / 8;
149
else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
150
byte = 1024 + (msr - 0xC0000000) / 8;
151
else
152
return (EINVAL);
153
154
bit = msr & 0x7;
155
156
if (access & MSR_BITMAP_ACCESS_READ)
157
bitmap[byte] &= ~(1 << bit);
158
else
159
bitmap[byte] |= 1 << bit;
160
161
byte += 2048;
162
if (access & MSR_BITMAP_ACCESS_WRITE)
163
bitmap[byte] &= ~(1 << bit);
164
else
165
bitmap[byte] |= 1 << bit;
166
167
return (0);
168
}
169
170
static uint64_t misc_enable;
171
static uint64_t platform_info;
172
static uint64_t turbo_ratio_limit;
173
static uint64_t host_msrs[GUEST_MSR_NUM];
174
175
static bool
176
nehalem_cpu(void)
177
{
178
u_int family, model;
179
180
/*
181
* The family:model numbers belonging to the Nehalem microarchitecture
182
* are documented in Section 35.5, Intel SDM dated Feb 2014.
183
*/
184
family = CPUID_TO_FAMILY(cpu_id);
185
model = CPUID_TO_MODEL(cpu_id);
186
if (family == 0x6) {
187
switch (model) {
188
case 0x1A:
189
case 0x1E:
190
case 0x1F:
191
case 0x2E:
192
return (true);
193
default:
194
break;
195
}
196
}
197
return (false);
198
}
199
200
static bool
201
westmere_cpu(void)
202
{
203
u_int family, model;
204
205
/*
206
* The family:model numbers belonging to the Westmere microarchitecture
207
* are documented in Section 35.6, Intel SDM dated Feb 2014.
208
*/
209
family = CPUID_TO_FAMILY(cpu_id);
210
model = CPUID_TO_MODEL(cpu_id);
211
if (family == 0x6) {
212
switch (model) {
213
case 0x25:
214
case 0x2C:
215
return (true);
216
default:
217
break;
218
}
219
}
220
return (false);
221
}
222
223
static bool
224
pat_valid(uint64_t val)
225
{
226
int i, pa;
227
228
/*
229
* From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
230
*
231
* Extract PA0 through PA7 and validate that each one encodes a
232
* valid memory type.
233
*/
234
for (i = 0; i < 8; i++) {
235
pa = (val >> (i * 8)) & 0xff;
236
if (pa == 2 || pa == 3 || pa >= 8)
237
return (false);
238
}
239
return (true);
240
}
241
242
void
243
vmx_msr_init(void)
244
{
245
uint64_t bus_freq, ratio;
246
int i;
247
248
/*
249
* It is safe to cache the values of the following MSRs because
250
* they don't change based on curcpu, curproc or curthread.
251
*/
252
host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
253
host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
254
host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
255
host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
256
257
/*
258
* Initialize emulated MSRs
259
*/
260
misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
261
/*
262
* Set mandatory bits
263
* 11: branch trace disabled
264
* 12: PEBS unavailable
265
* Clear unsupported features
266
* 16: SpeedStep enable
267
* 18: enable MONITOR FSM
268
*/
269
misc_enable |= (1 << 12) | (1 << 11);
270
misc_enable &= ~((1 << 18) | (1 << 16));
271
272
if (nehalem_cpu() || westmere_cpu())
273
bus_freq = 133330000; /* 133Mhz */
274
else
275
bus_freq = 100000000; /* 100Mhz */
276
277
/*
278
* XXXtime
279
* The ratio should really be based on the virtual TSC frequency as
280
* opposed to the host TSC.
281
*/
282
ratio = (tsc_freq / bus_freq) & 0xff;
283
284
/*
285
* The register definition is based on the micro-architecture
286
* but the following bits are always the same:
287
* [15:8] Maximum Non-Turbo Ratio
288
* [28] Programmable Ratio Limit for Turbo Mode
289
* [29] Programmable TDC-TDP Limit for Turbo Mode
290
* [47:40] Maximum Efficiency Ratio
291
*
292
* The other bits can be safely set to 0 on all
293
* micro-architectures up to Haswell.
294
*/
295
platform_info = (ratio << 8) | (ratio << 40);
296
297
/*
298
* The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
299
* dependent on the maximum cores per package supported by the micro-
300
* architecture. For e.g., Westmere supports 6 cores per package and
301
* uses the low 48 bits. Sandybridge support 8 cores per package and
302
* uses up all 64 bits.
303
*
304
* However, the unused bits are reserved so we pretend that all bits
305
* in this MSR are valid.
306
*/
307
for (i = 0; i < 8; i++)
308
turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
309
}
310
311
void
312
vmx_msr_guest_init(struct vmx *vmx, struct vmx_vcpu *vcpu)
313
{
314
/*
315
* The permissions bitmap is shared between all vcpus so initialize it
316
* once when initializing the vBSP.
317
*/
318
if (vcpu->vcpuid == 0) {
319
guest_msr_rw(vmx, MSR_LSTAR);
320
guest_msr_rw(vmx, MSR_CSTAR);
321
guest_msr_rw(vmx, MSR_STAR);
322
guest_msr_rw(vmx, MSR_SF_MASK);
323
guest_msr_rw(vmx, MSR_KGSBASE);
324
}
325
326
/*
327
* Initialize guest IA32_PAT MSR with default value after reset.
328
*/
329
vcpu->guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
330
PAT_VALUE(1, PAT_WRITE_THROUGH) |
331
PAT_VALUE(2, PAT_UNCACHED) |
332
PAT_VALUE(3, PAT_UNCACHEABLE) |
333
PAT_VALUE(4, PAT_WRITE_BACK) |
334
PAT_VALUE(5, PAT_WRITE_THROUGH) |
335
PAT_VALUE(6, PAT_UNCACHED) |
336
PAT_VALUE(7, PAT_UNCACHEABLE);
337
338
return;
339
}
340
341
void
342
vmx_msr_guest_enter(struct vmx_vcpu *vcpu)
343
{
344
345
/* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */
346
update_pcb_bases(curpcb);
347
wrmsr(MSR_LSTAR, vcpu->guest_msrs[IDX_MSR_LSTAR]);
348
wrmsr(MSR_CSTAR, vcpu->guest_msrs[IDX_MSR_CSTAR]);
349
wrmsr(MSR_STAR, vcpu->guest_msrs[IDX_MSR_STAR]);
350
wrmsr(MSR_SF_MASK, vcpu->guest_msrs[IDX_MSR_SF_MASK]);
351
wrmsr(MSR_KGSBASE, vcpu->guest_msrs[IDX_MSR_KGSBASE]);
352
}
353
354
void
355
vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu)
356
{
357
uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX];
358
uint32_t host_aux = cpu_auxmsr();
359
360
if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux)
361
wrmsr(MSR_TSC_AUX, guest_tsc_aux);
362
}
363
364
void
365
vmx_msr_guest_exit(struct vmx_vcpu *vcpu)
366
{
367
368
/* Save guest MSRs */
369
vcpu->guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
370
vcpu->guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
371
vcpu->guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
372
vcpu->guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
373
vcpu->guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
374
375
/* Restore host MSRs */
376
wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
377
wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
378
wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
379
wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
380
381
/* MSR_KGSBASE will be restored on the way back to userspace */
382
}
383
384
void
385
vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu)
386
{
387
uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX];
388
uint32_t host_aux = cpu_auxmsr();
389
390
if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux)
391
/*
392
* Note that it is not necessary to save the guest value
393
* here; vcpu->guest_msrs[IDX_MSR_TSC_AUX] always
394
* contains the current value since it is updated whenever
395
* the guest writes to it (which is expected to be very
396
* rare).
397
*/
398
wrmsr(MSR_TSC_AUX, host_aux);
399
}
400
401
int
402
vmx_rdmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t *val, bool *retu)
403
{
404
int error;
405
406
error = 0;
407
408
switch (num) {
409
case MSR_MCG_CAP:
410
case MSR_MCG_STATUS:
411
*val = 0;
412
break;
413
case MSR_MTRRcap:
414
case MSR_MTRRdefType:
415
case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
416
case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
417
case MSR_MTRR64kBase:
418
case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1:
419
if (vm_rdmtrr(&vcpu->mtrr, num, val) != 0) {
420
vm_inject_gp(vcpu->vcpu);
421
}
422
break;
423
case MSR_IA32_MISC_ENABLE:
424
*val = misc_enable;
425
break;
426
case MSR_PLATFORM_INFO:
427
*val = platform_info;
428
break;
429
case MSR_TURBO_RATIO_LIMIT:
430
case MSR_TURBO_RATIO_LIMIT1:
431
*val = turbo_ratio_limit;
432
break;
433
case MSR_PAT:
434
*val = vcpu->guest_msrs[IDX_MSR_PAT];
435
break;
436
default:
437
error = EINVAL;
438
break;
439
}
440
return (error);
441
}
442
443
int
444
vmx_wrmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t val, bool *retu)
445
{
446
uint64_t changed;
447
int error;
448
449
error = 0;
450
451
switch (num) {
452
case MSR_MCG_CAP:
453
case MSR_MCG_STATUS:
454
break; /* ignore writes */
455
case MSR_MTRRcap:
456
case MSR_MTRRdefType:
457
case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
458
case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
459
case MSR_MTRR64kBase:
460
case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1:
461
if (vm_wrmtrr(&vcpu->mtrr, num, val) != 0) {
462
vm_inject_gp(vcpu->vcpu);
463
}
464
break;
465
case MSR_IA32_MISC_ENABLE:
466
changed = val ^ misc_enable;
467
/*
468
* If the host has disabled the NX feature then the guest
469
* also cannot use it. However, a Linux guest will try to
470
* enable the NX feature by writing to the MISC_ENABLE MSR.
471
*
472
* This can be safely ignored because the memory management
473
* code looks at CPUID.80000001H:EDX.NX to check if the
474
* functionality is actually enabled.
475
*/
476
changed &= ~(1UL << 34);
477
478
/*
479
* Punt to userspace if any other bits are being modified.
480
*/
481
if (changed)
482
error = EINVAL;
483
484
break;
485
case MSR_PAT:
486
if (pat_valid(val))
487
vcpu->guest_msrs[IDX_MSR_PAT] = val;
488
else
489
vm_inject_gp(vcpu->vcpu);
490
break;
491
case MSR_TSC:
492
error = vmx_set_tsc_offset(vcpu, val - rdtsc());
493
break;
494
case MSR_TSC_AUX:
495
if (vmx_have_msr_tsc_aux)
496
/*
497
* vmx_msr_guest_enter_tsc_aux() will apply this
498
* value when it is called immediately before guest
499
* entry.
500
*/
501
vcpu->guest_msrs[IDX_MSR_TSC_AUX] = val;
502
else
503
vm_inject_gp(vcpu->vcpu);
504
break;
505
default:
506
error = EINVAL;
507
break;
508
}
509
510
return (error);
511
}
512
513