Path: blob/main/usr.sbin/bhyve/amd64/bhyverun_machdep.c
109543 views
/*-1* SPDX-License-Identifier: BSD-2-Clause2*3* Copyright (c) 2011 NetApp, Inc.4* All rights reserved.5*6* Redistribution and use in source and binary forms, with or without7* modification, are permitted provided that the following conditions8* are met:9* 1. Redistributions of source code must retain the above copyright10* notice, this list of conditions and the following disclaimer.11* 2. Redistributions in binary form must reproduce the above copyright12* notice, this list of conditions and the following disclaimer in the13* documentation and/or other materials provided with the distribution.14*15* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND16* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE17* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE18* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE19* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL20* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS21* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)22* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT23* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY24* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF25* SUCH DAMAGE.26*/2728#include <assert.h>29#include <err.h>30#include <stdbool.h>31#include <stdlib.h>32#include <sysexits.h>3334#include <vmmapi.h>3536#include "acpi.h"37#include "atkbdc.h"38#include "bhyverun.h"39#include "bootrom.h"40#include "config.h"41#include "debug.h"42#include "e820.h"43#include "fwctl.h"44#include "ioapic.h"45#include "inout.h"46#include "kernemu_dev.h"47#include "mptbl.h"48#include "pci_emul.h"49#include "pci_irq.h"50#include "pci_lpc.h"51#include "rtc.h"52#include "smbiostbl.h"53#include "xmsr.h"5455void56bhyve_init_config(void)57{58init_config();5960/* Set default values prior to option parsing. */61set_config_bool("acpi_tables", true);62set_config_bool("acpi_tables_in_memory", true);63set_config_value("memory.size", "256M");64set_config_bool("x86.strictmsr", true);65set_config_bool("x86.verbosemsr", false);66set_config_value("lpc.fwcfg", "bhyve");67}6869void70bhyve_usage(int code)71{72const char *progname;7374progname = getprogname();7576fprintf(stderr,77"Usage: %s [-aCDeHhPSuWwxY]\n"78" %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n"79" %*s [-G port] [-k config_file] [-l lpc] [-m mem] [-o var=value]\n"80" %*s [-p vcpu:hostcpu] [-r file] [-s pci] [-U uuid] vmname\n"81" -a: local apic is in xAPIC mode (deprecated)\n"82" -C: include guest memory in core file\n"83" -c: number of CPUs and/or topology specification\n"84" -D: destroy on power-off\n"85" -e: exit on unhandled I/O access\n"86" -G: start a debug server\n"87" -H: vmexit from the guest on HLT\n"88" -h: help\n"89" -k: key=value flat config file\n"90" -K: PS2 keyboard layout\n"91" -l: LPC device configuration\n"92" -M: monitor mode\n"93" -m: memory size\n"94" -n: NUMA domain specification\n"95" -o: set config 'var' to 'value'\n"96" -P: vmexit from the guest on pause\n"97" -p: pin 'vcpu' to 'hostcpu'\n"98#ifdef BHYVE_SNAPSHOT99" -r: path to checkpoint file\n"100#endif101" -S: guest memory cannot be swapped\n"102" -s: <slot,driver,configinfo> PCI slot config\n"103" -U: UUID\n"104" -u: RTC keeps UTC time\n"105" -W: force virtio to use single-vector MSI\n"106" -w: ignore unimplemented MSRs\n"107" -x: local APIC is in x2APIC mode\n"108" -Y: disable MPtable generation\n",109progname, (int)strlen(progname), "", (int)strlen(progname), "",110(int)strlen(progname), "");111exit(code);112}113114void115bhyve_optparse(int argc, char **argv)116{117const char *optstr;118int c;119120#ifdef BHYVE_SNAPSHOT121optstr = "aehuwxACDHIMPSWYk:f:o:p:G:c:s:m:n:l:K:U:r:";122#else123optstr = "aehuwxACDHIMPSWYk:f:o:p:G:c:s:m:n:l:K:U:";124#endif125while ((c = getopt(argc, argv, optstr)) != -1) {126switch (c) {127case 'a':128set_config_bool("x86.x2apic", false);129break;130case 'A':131/*132* NOP. For backward compatibility. Most systems don't133* work properly without sane ACPI tables. Therefore,134* we're always generating them.135*/136break;137case 'D':138set_config_bool("destroy_on_poweroff", true);139break;140case 'p':141if (bhyve_pincpu_parse(optarg) != 0) {142errx(EX_USAGE, "invalid vcpu pinning "143"configuration '%s'", optarg);144}145break;146case 'c':147if (bhyve_topology_parse(optarg) != 0) {148errx(EX_USAGE, "invalid cpu topology "149"'%s'", optarg);150}151break;152case 'C':153set_config_bool("memory.guest_in_core", true);154break;155case 'f':156if (qemu_fwcfg_parse_cmdline_arg(optarg) != 0) {157errx(EX_USAGE, "invalid fwcfg item '%s'",158optarg);159}160break;161case 'G':162bhyve_parse_gdb_options(optarg);163break;164case 'k':165bhyve_parse_simple_config_file(optarg);166break;167case 'K':168set_config_value("keyboard.layout", optarg);169break;170case 'l':171if (strncmp(optarg, "help", strlen(optarg)) == 0) {172lpc_print_supported_devices();173exit(0);174} else if (lpc_device_parse(optarg) != 0) {175errx(EX_USAGE, "invalid lpc device "176"configuration '%s'", optarg);177}178break;179#ifdef BHYVE_SNAPSHOT180case 'r':181restore_file = optarg;182break;183#endif184case 's':185if (strncmp(optarg, "help", strlen(optarg)) == 0) {186pci_print_supported_devices();187exit(0);188} else if (pci_parse_slot(optarg) != 0)189exit(BHYVE_EXIT_ERROR);190else191break;192case 'S':193set_config_bool("memory.wired", true);194break;195case 'm':196set_config_value("memory.size", optarg);197break;198case 'M':199set_config_bool("monitor", true);200break;201case 'n':202if (bhyve_numa_parse(optarg) != 0)203errx(EX_USAGE,204"invalid NUMA configuration "205"'%s'",206optarg);207if (!get_config_bool("acpi_tables"))208errx(EX_USAGE, "NUMA emulation requires ACPI");209break;210case 'o':211if (!bhyve_parse_config_option(optarg)) {212errx(EX_USAGE,213"invalid configuration option '%s'",214optarg);215}216break;217case 'H':218set_config_bool("x86.vmexit_on_hlt", true);219break;220case 'I':221/*222* The "-I" option was used to add an ioapic to the223* virtual machine.224*225* An ioapic is now provided unconditionally for each226* virtual machine and this option is now deprecated.227*/228break;229case 'P':230set_config_bool("x86.vmexit_on_pause", true);231break;232case 'e':233set_config_bool("x86.strictio", true);234break;235case 'u':236set_config_bool("rtc.use_localtime", false);237break;238case 'U':239set_config_value("uuid", optarg);240break;241case 'w':242set_config_bool("x86.strictmsr", false);243break;244case 'W':245set_config_bool("virtio_msix", false);246break;247case 'x':248set_config_bool("x86.x2apic", true);249break;250case 'Y':251set_config_bool("x86.mptable", false);252break;253case 'h':254bhyve_usage(0);255default:256bhyve_usage(1);257}258}259260/* Handle backwards compatibility aliases in config options. */261if (get_config_value("lpc.bootrom") != NULL &&262get_config_value("bootrom") == NULL) {263warnx("lpc.bootrom is deprecated, use '-o bootrom' instead");264set_config_value("bootrom", get_config_value("lpc.bootrom"));265}266if (get_config_value("lpc.bootvars") != NULL &&267get_config_value("bootvars") == NULL) {268warnx("lpc.bootvars is deprecated, use '-o bootvars' instead");269set_config_value("bootvars", get_config_value("lpc.bootvars"));270}271}272273void274bhyve_init_vcpu(struct vcpu *vcpu)275{276int err, tmp;277278if (get_config_bool_default("x86.vmexit_on_hlt", false)) {279err = vm_get_capability(vcpu, VM_CAP_HALT_EXIT, &tmp);280if (err < 0) {281EPRINTLN("VM exit on HLT not supported");282exit(BHYVE_EXIT_ERROR);283}284vm_set_capability(vcpu, VM_CAP_HALT_EXIT, 1);285}286287if (get_config_bool_default("x86.vmexit_on_pause", false)) {288/*289* pause exit support required for this mode290*/291err = vm_get_capability(vcpu, VM_CAP_PAUSE_EXIT, &tmp);292if (err < 0) {293EPRINTLN("SMP mux requested, no pause support");294exit(BHYVE_EXIT_ERROR);295}296vm_set_capability(vcpu, VM_CAP_PAUSE_EXIT, 1);297}298299if (get_config_bool_default("x86.x2apic", false))300err = vm_set_x2apic_state(vcpu, X2APIC_ENABLED);301else302err = vm_set_x2apic_state(vcpu, X2APIC_DISABLED);303304if (err) {305EPRINTLN("Unable to set x2apic state (%d)", err);306exit(BHYVE_EXIT_ERROR);307}308309vm_set_capability(vcpu, VM_CAP_ENABLE_INVPCID, 1);310311err = vm_set_capability(vcpu, VM_CAP_IPI_EXIT, 1);312assert(err == 0);313}314315void316bhyve_start_vcpu(struct vcpu *vcpu, bool bsp)317{318int error;319320if (bsp) {321if (bootrom_boot()) {322error = vm_set_capability(vcpu,323VM_CAP_UNRESTRICTED_GUEST, 1);324if (error != 0) {325err(4, "ROM boot failed: unrestricted guest "326"capability not available");327}328error = vcpu_reset(vcpu);329assert(error == 0);330}331} else {332bhyve_init_vcpu(vcpu);333334/*335* Enable the 'unrestricted guest' mode for APs.336*337* APs startup in power-on 16-bit mode.338*/339error = vm_set_capability(vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);340assert(error == 0);341}342343fbsdrun_addcpu(vcpu_id(vcpu));344}345346int347bhyve_init_platform(struct vmctx *ctx, struct vcpu *bsp __unused)348{349int error;350351error = init_msr();352if (error != 0)353return (error);354init_inout();355kernemu_dev_init();356atkbdc_init(ctx);357pci_irq_init(ctx);358ioapic_init(ctx);359rtc_init(ctx);360sci_init(ctx);361error = e820_init(ctx);362if (error != 0)363return (error);364error = bootrom_loadrom(ctx);365if (error != 0)366return (error);367368return (0);369}370371int372bhyve_init_platform_late(struct vmctx *ctx, struct vcpu *bsp __unused)373{374int error;375376if (get_config_bool_default("x86.mptable", true)) {377error = mptable_build(ctx, guest_ncpus);378if (error != 0)379return (error);380}381error = smbios_build(ctx);382if (error != 0)383return (error);384error = e820_finalize();385if (error != 0)386return (error);387388if (bootrom_boot() && strcmp(lpc_fwcfg(), "bhyve") == 0)389fwctl_init();390391if (get_config_bool("acpi_tables")) {392error = acpi_build(ctx, guest_ncpus);393assert(error == 0);394}395396return (0);397}398399400