#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <machine/clock.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/pcb.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
#include "vmx.h"
#include "vmx_msr.h"
#include "x86.h"
static bool
vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
{
return ((msr_val & (1UL << (bitpos + 32))) != 0);
}
static bool
vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
{
return ((msr_val & (1UL << bitpos)) == 0);
}
uint32_t
vmx_revision(void)
{
return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
}
int
vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
uint32_t zeros_mask, uint32_t *retval)
{
int i;
uint64_t val, trueval;
bool true_ctls_avail, one_allowed, zero_allowed;
if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
return (EINVAL);
true_ctls_avail = (rdmsr(MSR_VMX_BASIC) & (1UL << 55)) != 0;
val = rdmsr(ctl_reg);
if (true_ctls_avail)
trueval = rdmsr(true_ctl_reg);
else
trueval = val;
for (i = 0; i < 32; i++) {
one_allowed = vmx_ctl_allows_one_setting(trueval, i);
zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
KASSERT(one_allowed || zero_allowed,
("invalid zero/one setting for bit %d of ctl 0x%0x, "
"truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
if (zero_allowed && !one_allowed) {
if (ones_mask & (1 << i))
return (EINVAL);
*retval &= ~(1 << i);
} else if (one_allowed && !zero_allowed) {
if (zeros_mask & (1 << i))
return (EINVAL);
*retval |= 1 << i;
} else {
if (zeros_mask & (1 << i))
*retval &= ~(1 << i);
else if (ones_mask & (1 << i))
*retval |= 1 << i;
else if (!true_ctls_avail)
*retval &= ~(1 << i);
else if (vmx_ctl_allows_zero_setting(val, i))
*retval &= ~(1 << i);
else if (vmx_ctl_allows_one_setting(val, i))
*retval |= 1 << i;
else {
panic("vmx_set_ctlreg: unable to determine "
"correct value of ctl bit %d for msr "
"0x%0x and true msr 0x%0x", i, ctl_reg,
true_ctl_reg);
}
}
}
return (0);
}
void
msr_bitmap_initialize(char *bitmap)
{
memset(bitmap, 0xff, PAGE_SIZE);
}
int
msr_bitmap_change_access(char *bitmap, u_int msr, int access)
{
int byte, bit;
if (msr <= 0x00001FFF)
byte = msr / 8;
else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
byte = 1024 + (msr - 0xC0000000) / 8;
else
return (EINVAL);
bit = msr & 0x7;
if (access & MSR_BITMAP_ACCESS_READ)
bitmap[byte] &= ~(1 << bit);
else
bitmap[byte] |= 1 << bit;
byte += 2048;
if (access & MSR_BITMAP_ACCESS_WRITE)
bitmap[byte] &= ~(1 << bit);
else
bitmap[byte] |= 1 << bit;
return (0);
}
static uint64_t misc_enable;
static uint64_t platform_info;
static uint64_t turbo_ratio_limit;
static uint64_t host_msrs[GUEST_MSR_NUM];
static bool
nehalem_cpu(void)
{
u_int family, model;
family = CPUID_TO_FAMILY(cpu_id);
model = CPUID_TO_MODEL(cpu_id);
if (family == 0x6) {
switch (model) {
case 0x1A:
case 0x1E:
case 0x1F:
case 0x2E:
return (true);
default:
break;
}
}
return (false);
}
static bool
westmere_cpu(void)
{
u_int family, model;
family = CPUID_TO_FAMILY(cpu_id);
model = CPUID_TO_MODEL(cpu_id);
if (family == 0x6) {
switch (model) {
case 0x25:
case 0x2C:
return (true);
default:
break;
}
}
return (false);
}
static bool
pat_valid(uint64_t val)
{
int i, pa;
for (i = 0; i < 8; i++) {
pa = (val >> (i * 8)) & 0xff;
if (pa == 2 || pa == 3 || pa >= 8)
return (false);
}
return (true);
}
void
vmx_msr_init(void)
{
uint64_t bus_freq, ratio;
int i;
host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
misc_enable |= (1 << 12) | (1 << 11);
misc_enable &= ~((1 << 18) | (1 << 16));
if (nehalem_cpu() || westmere_cpu())
bus_freq = 133330000;
else
bus_freq = 100000000;
ratio = (tsc_freq / bus_freq) & 0xff;
platform_info = (ratio << 8) | (ratio << 40);
for (i = 0; i < 8; i++)
turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
}
void
vmx_msr_guest_init(struct vmx *vmx, struct vmx_vcpu *vcpu)
{
if (vcpu->vcpuid == 0) {
guest_msr_rw(vmx, MSR_LSTAR);
guest_msr_rw(vmx, MSR_CSTAR);
guest_msr_rw(vmx, MSR_STAR);
guest_msr_rw(vmx, MSR_SF_MASK);
guest_msr_rw(vmx, MSR_KGSBASE);
}
vcpu->guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
PAT_VALUE(1, PAT_WRITE_THROUGH) |
PAT_VALUE(2, PAT_UNCACHED) |
PAT_VALUE(3, PAT_UNCACHEABLE) |
PAT_VALUE(4, PAT_WRITE_BACK) |
PAT_VALUE(5, PAT_WRITE_THROUGH) |
PAT_VALUE(6, PAT_UNCACHED) |
PAT_VALUE(7, PAT_UNCACHEABLE);
return;
}
void
vmx_msr_guest_enter(struct vmx_vcpu *vcpu)
{
update_pcb_bases(curpcb);
wrmsr(MSR_LSTAR, vcpu->guest_msrs[IDX_MSR_LSTAR]);
wrmsr(MSR_CSTAR, vcpu->guest_msrs[IDX_MSR_CSTAR]);
wrmsr(MSR_STAR, vcpu->guest_msrs[IDX_MSR_STAR]);
wrmsr(MSR_SF_MASK, vcpu->guest_msrs[IDX_MSR_SF_MASK]);
wrmsr(MSR_KGSBASE, vcpu->guest_msrs[IDX_MSR_KGSBASE]);
}
void
vmx_msr_guest_enter_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu)
{
uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX];
uint32_t host_aux = cpu_auxmsr();
if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux)
wrmsr(MSR_TSC_AUX, guest_tsc_aux);
}
void
vmx_msr_guest_exit(struct vmx_vcpu *vcpu)
{
vcpu->guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
vcpu->guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
vcpu->guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
vcpu->guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
vcpu->guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
}
void
vmx_msr_guest_exit_tsc_aux(struct vmx *vmx, struct vmx_vcpu *vcpu)
{
uint64_t guest_tsc_aux = vcpu->guest_msrs[IDX_MSR_TSC_AUX];
uint32_t host_aux = cpu_auxmsr();
if (vmx_have_msr_tsc_aux && guest_tsc_aux != host_aux)
wrmsr(MSR_TSC_AUX, host_aux);
}
int
vmx_rdmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t *val, bool *retu)
{
int error;
error = 0;
switch (num) {
case MSR_MCG_CAP:
case MSR_MCG_STATUS:
*val = 0;
break;
case MSR_MTRRcap:
case MSR_MTRRdefType:
case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
case MSR_MTRR64kBase:
case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1:
if (vm_rdmtrr(&vcpu->mtrr, num, val) != 0) {
vm_inject_gp(vcpu->vcpu);
}
break;
case MSR_IA32_MISC_ENABLE:
*val = misc_enable;
break;
case MSR_PLATFORM_INFO:
*val = platform_info;
break;
case MSR_TURBO_RATIO_LIMIT:
case MSR_TURBO_RATIO_LIMIT1:
*val = turbo_ratio_limit;
break;
case MSR_PAT:
*val = vcpu->guest_msrs[IDX_MSR_PAT];
break;
default:
error = EINVAL;
break;
}
return (error);
}
int
vmx_wrmsr(struct vmx_vcpu *vcpu, u_int num, uint64_t val, bool *retu)
{
uint64_t changed;
int error;
error = 0;
switch (num) {
case MSR_MCG_CAP:
case MSR_MCG_STATUS:
break;
case MSR_MTRRcap:
case MSR_MTRRdefType:
case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
case MSR_MTRR64kBase:
case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1:
if (vm_wrmtrr(&vcpu->mtrr, num, val) != 0) {
vm_inject_gp(vcpu->vcpu);
}
break;
case MSR_IA32_MISC_ENABLE:
changed = val ^ misc_enable;
changed &= ~(1UL << 34);
if (changed)
error = EINVAL;
break;
case MSR_PAT:
if (pat_valid(val))
vcpu->guest_msrs[IDX_MSR_PAT] = val;
else
vm_inject_gp(vcpu->vcpu);
break;
case MSR_TSC:
error = vmx_set_tsc_offset(vcpu, val - rdtsc());
break;
case MSR_TSC_AUX:
if (vmx_have_msr_tsc_aux)
vcpu->guest_msrs[IDX_MSR_TSC_AUX] = val;
else
vm_inject_gp(vcpu->vcpu);
break;
default:
error = EINVAL;
break;
}
return (error);
}