Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/usr.sbin/bhyve/bhyverun.c
106842 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2011 NetApp, Inc.
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
#include <sys/types.h>
30
#ifndef WITHOUT_CAPSICUM
31
#include <sys/capsicum.h>
32
#endif
33
#include <sys/cpuset.h>
34
#include <sys/domainset.h>
35
#include <sys/mman.h>
36
#ifdef BHYVE_SNAPSHOT
37
#include <sys/socket.h>
38
#include <sys/stat.h>
39
#endif
40
#include <sys/time.h>
41
#ifdef BHYVE_SNAPSHOT
42
#include <sys/un.h>
43
#endif
44
#include <sys/wait.h>
45
46
#include <machine/atomic.h>
47
48
#ifndef WITHOUT_CAPSICUM
49
#include <capsicum_helpers.h>
50
#endif
51
#include <stdio.h>
52
#include <stdlib.h>
53
#include <string.h>
54
#include <err.h>
55
#include <errno.h>
56
#ifdef BHYVE_SNAPSHOT
57
#include <fcntl.h>
58
#endif
59
#include <libgen.h>
60
#include <libutil.h>
61
#include <unistd.h>
62
#include <assert.h>
63
#include <pthread.h>
64
#include <pthread_np.h>
65
#include <sysexits.h>
66
#include <stdbool.h>
67
#include <stdint.h>
68
#ifdef BHYVE_SNAPSHOT
69
#include <ucl.h>
70
#include <unistd.h>
71
72
#include <libxo/xo.h>
73
#endif
74
75
#include <dev/vmm/vmm_mem.h>
76
#include <vmmapi.h>
77
78
#include "acpi.h"
79
#include "bhyverun.h"
80
#include "bootrom.h"
81
#include "config.h"
82
#include "debug.h"
83
#ifdef BHYVE_GDB
84
#include "gdb.h"
85
#endif
86
#include "mem.h"
87
#include "mevent.h"
88
#include "pci_emul.h"
89
#ifdef __amd64__
90
#include "amd64/pci_lpc.h"
91
#endif
92
#include "qemu_fwcfg.h"
93
#ifdef BHYVE_SNAPSHOT
94
#include "snapshot.h"
95
#endif
96
#include "tpm_device.h"
97
#include "vmgenc.h"
98
#include "vmexit.h"
99
100
#define MB (1024UL * 1024)
101
#define GB (1024UL * MB)
102
103
int guest_ncpus;
104
uint16_t cpu_cores, cpu_sockets, cpu_threads;
105
106
int raw_stdio = 0;
107
108
#ifdef BHYVE_SNAPSHOT
109
char *restore_file;
110
#endif
111
112
static const int BSP = 0;
113
114
static cpuset_t cpumask;
115
116
static struct vm_mem_domain guest_domains[VM_MAXMEMDOM];
117
static int guest_ndomains = 0;
118
119
static void vm_loop(struct vmctx *ctx, struct vcpu *vcpu);
120
121
static struct vcpu_info {
122
struct vmctx *ctx;
123
struct vcpu *vcpu;
124
int vcpuid;
125
} *vcpu_info;
126
127
static cpuset_t **vcpumap;
128
129
/*
130
* XXX This parser is known to have the following issues:
131
* 1. It accepts null key=value tokens ",," as setting "cpus" to an
132
* empty string.
133
*
134
* The acceptance of a null specification ('-c ""') is by design to match the
135
* manual page syntax specification, this results in a topology of 1 vCPU.
136
*/
137
int
138
bhyve_topology_parse(const char *opt)
139
{
140
char *cp, *str, *tofree;
141
142
if (*opt == '\0') {
143
set_config_value("sockets", "1");
144
set_config_value("cores", "1");
145
set_config_value("threads", "1");
146
set_config_value("cpus", "1");
147
return (0);
148
}
149
150
tofree = str = strdup(opt);
151
if (str == NULL)
152
errx(4, "Failed to allocate memory");
153
154
while ((cp = strsep(&str, ",")) != NULL) {
155
if (strncmp(cp, "cpus=", strlen("cpus=")) == 0)
156
set_config_value("cpus", cp + strlen("cpus="));
157
else if (strncmp(cp, "sockets=", strlen("sockets=")) == 0)
158
set_config_value("sockets", cp + strlen("sockets="));
159
else if (strncmp(cp, "cores=", strlen("cores=")) == 0)
160
set_config_value("cores", cp + strlen("cores="));
161
else if (strncmp(cp, "threads=", strlen("threads=")) == 0)
162
set_config_value("threads", cp + strlen("threads="));
163
else if (strchr(cp, '=') != NULL)
164
goto out;
165
else
166
set_config_value("cpus", cp);
167
}
168
free(tofree);
169
return (0);
170
171
out:
172
free(tofree);
173
return (-1);
174
}
175
176
static int
177
parse_int_value(const char *key, const char *value, int minval, int maxval)
178
{
179
char *cp;
180
long lval;
181
182
errno = 0;
183
lval = strtol(value, &cp, 0);
184
if (errno != 0 || *cp != '\0' || cp == value || lval < minval ||
185
lval > maxval)
186
errx(4, "Invalid value for %s: '%s'", key, value);
187
return (lval);
188
}
189
190
int
191
bhyve_numa_parse(const char *opt)
192
{
193
int id = -1;
194
nvlist_t *nvl;
195
char *cp, *str, *tofree;
196
char pathbuf[64] = { 0 };
197
char *size = NULL, *cpus = NULL, *domain_policy = NULL;
198
199
if (*opt == '\0') {
200
return (-1);
201
}
202
203
tofree = str = strdup(opt);
204
if (str == NULL)
205
errx(4, "Failed to allocate memory");
206
207
while ((cp = strsep(&str, ",")) != NULL) {
208
if (strncmp(cp, "id=", strlen("id=")) == 0)
209
id = parse_int_value("id", cp + strlen("id="), 0,
210
UINT8_MAX);
211
else if (strncmp(cp, "size=", strlen("size=")) == 0)
212
size = cp + strlen("size=");
213
else if (strncmp(cp,
214
"domain_policy=", strlen("domain_policy=")) == 0)
215
domain_policy = cp + strlen("domain_policy=");
216
else if (strncmp(cp, "cpus=", strlen("cpus=")) == 0)
217
cpus = cp + strlen("cpus=");
218
}
219
220
if (id == -1) {
221
EPRINTLN("Missing NUMA domain ID in '%s'", opt);
222
goto out;
223
}
224
225
snprintf(pathbuf, sizeof(pathbuf), "domains.%d", id);
226
nvl = find_config_node(pathbuf);
227
if (nvl == NULL)
228
nvl = create_config_node(pathbuf);
229
if (size != NULL)
230
set_config_value_node(nvl, "size", size);
231
if (domain_policy != NULL)
232
set_config_value_node(nvl, "domain_policy", domain_policy);
233
if (cpus != NULL)
234
set_config_value_node(nvl, "cpus", cpus);
235
236
free(tofree);
237
return (0);
238
239
out:
240
free(tofree);
241
return (-1);
242
}
243
244
static void
245
calc_mem_affinity(size_t vm_memsize)
246
{
247
int i;
248
nvlist_t *nvl;
249
bool need_recalc;
250
const char *value;
251
struct vm_mem_domain *dom;
252
char pathbuf[64] = { 0 };
253
254
need_recalc = false;
255
for (i = 0; i < VM_MAXMEMDOM; i++) {
256
dom = &guest_domains[i];
257
snprintf(pathbuf, sizeof(pathbuf), "domains.%d", i);
258
nvl = find_config_node(pathbuf);
259
if (nvl == NULL) {
260
break;
261
}
262
263
value = get_config_value_node(nvl, "size");
264
need_recalc |= value == NULL;
265
if (value != NULL && vm_parse_memsize(value, &dom->size)) {
266
errx(EX_USAGE, "invalid memsize for domain %d: '%s'", i,
267
value);
268
}
269
270
dom->ds_mask = calloc(1, sizeof(domainset_t));
271
if (dom->ds_mask == NULL) {
272
errx(EX_OSERR, "Failed to allocate domainset mask");
273
}
274
dom->ds_size = sizeof(domainset_t);
275
value = get_config_value_node(nvl, "domain_policy");
276
if (value == NULL) {
277
dom->ds_policy = DOMAINSET_POLICY_INVALID;
278
DOMAINSET_ZERO(dom->ds_mask);
279
} else if (domainset_parselist(value, dom->ds_mask, &dom->ds_policy) !=
280
CPUSET_PARSE_OK) {
281
errx(EX_USAGE, "failed to parse domain policy '%s'", value);
282
}
283
}
284
285
guest_ndomains = i;
286
if (guest_ndomains == 0) {
287
/*
288
* No domains were specified - create domain
289
* 0 holding all CPUs and memory.
290
*/
291
guest_ndomains = 1;
292
guest_domains[0].size = vm_memsize;
293
} else if (need_recalc) {
294
warnx("At least one domain memory size was not specified, distributing"
295
" total VM memory size across all domains");
296
for (i = 0; i < guest_ndomains; i++) {
297
guest_domains[i].size = vm_memsize / guest_ndomains;
298
}
299
}
300
}
301
302
/*
303
* Set the sockets, cores, threads, and guest_cpus variables based on
304
* the configured topology.
305
*
306
* The limits of UINT16_MAX are due to the types passed to
307
* vm_set_topology(). vmm.ko may enforce tighter limits.
308
*/
309
static void
310
calc_topology(void)
311
{
312
const char *value;
313
bool explicit_cpus;
314
uint64_t ncpus;
315
316
value = get_config_value("cpus");
317
if (value != NULL) {
318
guest_ncpus = parse_int_value("cpus", value, 1, UINT16_MAX);
319
explicit_cpus = true;
320
} else {
321
guest_ncpus = 1;
322
explicit_cpus = false;
323
}
324
value = get_config_value("cores");
325
if (value != NULL)
326
cpu_cores = parse_int_value("cores", value, 1, UINT16_MAX);
327
else
328
cpu_cores = 1;
329
value = get_config_value("threads");
330
if (value != NULL)
331
cpu_threads = parse_int_value("threads", value, 1, UINT16_MAX);
332
else
333
cpu_threads = 1;
334
value = get_config_value("sockets");
335
if (value != NULL)
336
cpu_sockets = parse_int_value("sockets", value, 1, UINT16_MAX);
337
else
338
cpu_sockets = guest_ncpus;
339
340
/*
341
* Compute sockets * cores * threads avoiding overflow. The
342
* range check above insures these are 16 bit values.
343
*/
344
ncpus = (uint64_t)cpu_sockets * cpu_cores * cpu_threads;
345
if (ncpus > UINT16_MAX)
346
errx(4, "Computed number of vCPUs too high: %ju",
347
(uintmax_t)ncpus);
348
349
if (explicit_cpus) {
350
if (guest_ncpus != (int)ncpus)
351
errx(4, "Topology (%d sockets, %d cores, %d threads) "
352
"does not match %d vCPUs",
353
cpu_sockets, cpu_cores, cpu_threads,
354
guest_ncpus);
355
} else
356
guest_ncpus = ncpus;
357
}
358
359
int
360
bhyve_pincpu_parse(const char *opt)
361
{
362
const char *value;
363
char *newval;
364
char key[16];
365
int vcpu, pcpu;
366
367
if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) {
368
fprintf(stderr, "invalid format: %s\n", opt);
369
return (-1);
370
}
371
372
if (vcpu < 0) {
373
fprintf(stderr, "invalid vcpu '%d'\n", vcpu);
374
return (-1);
375
}
376
377
if (pcpu < 0 || pcpu >= CPU_SETSIZE) {
378
fprintf(stderr, "hostcpu '%d' outside valid range from "
379
"0 to %d\n", pcpu, CPU_SETSIZE - 1);
380
return (-1);
381
}
382
383
snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu);
384
value = get_config_value(key);
385
386
if (asprintf(&newval, "%s%s%d", value != NULL ? value : "",
387
value != NULL ? "," : "", pcpu) == -1) {
388
perror("failed to build new cpuset string");
389
return (-1);
390
}
391
392
set_config_value(key, newval);
393
free(newval);
394
return (0);
395
}
396
397
static void
398
parse_cpuset(int vcpu, const char *list, cpuset_t *set)
399
{
400
char *cp, *token;
401
int pcpu, start;
402
403
CPU_ZERO(set);
404
start = -1;
405
token = __DECONST(char *, list);
406
for (;;) {
407
pcpu = strtoul(token, &cp, 0);
408
if (cp == token)
409
errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list);
410
if (pcpu < 0 || pcpu >= CPU_SETSIZE)
411
errx(4, "hostcpu '%d' outside valid range from 0 to %d",
412
pcpu, CPU_SETSIZE - 1);
413
switch (*cp) {
414
case ',':
415
case '\0':
416
if (start >= 0) {
417
if (start > pcpu)
418
errx(4, "Invalid hostcpu range %d-%d",
419
start, pcpu);
420
while (start < pcpu) {
421
CPU_SET(start, set);
422
start++;
423
}
424
start = -1;
425
}
426
CPU_SET(pcpu, set);
427
break;
428
case '-':
429
if (start >= 0)
430
errx(4, "invalid cpuset for vcpu %d: '%s'",
431
vcpu, list);
432
start = pcpu;
433
break;
434
default:
435
errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list);
436
}
437
if (*cp == '\0')
438
break;
439
token = cp + 1;
440
}
441
}
442
443
static void
444
build_vcpumaps(void)
445
{
446
char key[16];
447
const char *value;
448
int vcpu;
449
450
vcpumap = calloc(guest_ncpus, sizeof(*vcpumap));
451
for (vcpu = 0; vcpu < guest_ncpus; vcpu++) {
452
snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu);
453
value = get_config_value(key);
454
if (value == NULL)
455
continue;
456
vcpumap[vcpu] = malloc(sizeof(cpuset_t));
457
if (vcpumap[vcpu] == NULL)
458
err(4, "Failed to allocate cpuset for vcpu %d", vcpu);
459
parse_cpuset(vcpu, value, vcpumap[vcpu]);
460
}
461
}
462
463
static void
464
set_vcpu_affinities(void)
465
{
466
int cpu, error;
467
nvlist_t *nvl = NULL;
468
cpuset_t cpus;
469
const char *value;
470
char pathbuf[64] = { 0 };
471
472
for (int dom = 0; dom < guest_ndomains; dom++) {
473
snprintf(pathbuf, sizeof(pathbuf), "domains.%d", dom);
474
nvl = find_config_node(pathbuf);
475
if (nvl == NULL)
476
break;
477
478
value = get_config_value_node(nvl, "cpus");
479
if (value == NULL) {
480
EPRINTLN("Missing CPU set for domain %d", dom);
481
exit(BHYVE_EXIT_ERROR);
482
}
483
484
parse_cpuset(dom, value, &cpus);
485
CPU_FOREACH_ISSET(cpu, &cpus) {
486
error = acpi_add_vcpu_affinity(cpu, dom);
487
if (error) {
488
EPRINTLN(
489
"Unable to set vCPU %d affinity for domain %d: %s",
490
cpu, dom, strerror(errno));
491
exit(BHYVE_EXIT_ERROR);
492
}
493
}
494
}
495
if (guest_ndomains > 1 || nvl != NULL)
496
return;
497
498
/*
499
* If we're dealing with one domain and no cpuset was provided, create a
500
* default one holding all cpus.
501
*/
502
for (cpu = 0; cpu < guest_ncpus; cpu++) {
503
error = acpi_add_vcpu_affinity(cpu, 0);
504
if (error) {
505
EPRINTLN(
506
"Unable to set vCPU %d affinity for domain %d: %s",
507
cpu, 0, strerror(errno));
508
exit(BHYVE_EXIT_ERROR);
509
}
510
}
511
}
512
513
void *
514
paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len)
515
{
516
517
return (vm_map_gpa(ctx, gaddr, len));
518
}
519
520
#ifdef BHYVE_SNAPSHOT
521
uintptr_t
522
paddr_host2guest(struct vmctx *ctx, void *addr)
523
{
524
return (vm_rev_map_gpa(ctx, addr));
525
}
526
#endif
527
528
int
529
fbsdrun_virtio_msix(void)
530
{
531
532
return (get_config_bool_default("virtio_msix", true));
533
}
534
535
struct vcpu *
536
fbsdrun_vcpu(int vcpuid)
537
{
538
return (vcpu_info[vcpuid].vcpu);
539
}
540
541
static void *
542
fbsdrun_start_thread(void *param)
543
{
544
char tname[MAXCOMLEN + 1];
545
struct vcpu_info *vi = param;
546
int error;
547
548
snprintf(tname, sizeof(tname), "vcpu %d", vi->vcpuid);
549
pthread_set_name_np(pthread_self(), tname);
550
551
if (vcpumap[vi->vcpuid] != NULL) {
552
error = pthread_setaffinity_np(pthread_self(),
553
sizeof(cpuset_t), vcpumap[vi->vcpuid]);
554
assert(error == 0);
555
}
556
557
#ifdef BHYVE_SNAPSHOT
558
checkpoint_cpu_add(vi->vcpuid);
559
#endif
560
#ifdef BHYVE_GDB
561
gdb_cpu_add(vi->vcpu);
562
#endif
563
564
vm_loop(vi->ctx, vi->vcpu);
565
/* We get here if the VM was destroyed asynchronously. */
566
exit(BHYVE_EXIT_ERROR);
567
}
568
569
void
570
fbsdrun_addcpu(int vcpuid)
571
{
572
struct vcpu_info *vi;
573
pthread_t thr;
574
int error;
575
576
vi = &vcpu_info[vcpuid];
577
578
error = vm_activate_cpu(vi->vcpu);
579
if (error != 0)
580
err(EX_OSERR, "could not activate CPU %d", vi->vcpuid);
581
582
CPU_SET_ATOMIC(vcpuid, &cpumask);
583
584
error = vm_suspend_cpu(vi->vcpu);
585
assert(error == 0);
586
587
error = pthread_create(&thr, NULL, fbsdrun_start_thread, vi);
588
assert(error == 0);
589
}
590
591
void
592
fbsdrun_deletecpu(int vcpu)
593
{
594
static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER;
595
static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER;
596
597
pthread_mutex_lock(&resetcpu_mtx);
598
if (!CPU_ISSET(vcpu, &cpumask)) {
599
EPRINTLN("Attempting to delete unknown cpu %d", vcpu);
600
exit(BHYVE_EXIT_ERROR);
601
}
602
603
CPU_CLR(vcpu, &cpumask);
604
605
if (vcpu != BSP) {
606
pthread_cond_signal(&resetcpu_cond);
607
pthread_mutex_unlock(&resetcpu_mtx);
608
pthread_exit(NULL);
609
/* NOTREACHED */
610
}
611
612
while (!CPU_EMPTY(&cpumask)) {
613
pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx);
614
}
615
pthread_mutex_unlock(&resetcpu_mtx);
616
}
617
618
int
619
fbsdrun_suspendcpu(int vcpuid)
620
{
621
return (vm_suspend_cpu(vcpu_info[vcpuid].vcpu));
622
}
623
624
static void
625
vm_loop(struct vmctx *ctx, struct vcpu *vcpu)
626
{
627
struct vm_exit vme;
628
struct vm_run vmrun;
629
int error, rc;
630
enum vm_exitcode exitcode;
631
cpuset_t active_cpus, dmask;
632
633
error = vm_active_cpus(ctx, &active_cpus);
634
assert(CPU_ISSET(vcpu_id(vcpu), &active_cpus));
635
636
vmrun.vm_exit = &vme;
637
vmrun.cpuset = &dmask;
638
vmrun.cpusetsize = sizeof(dmask);
639
640
while (1) {
641
error = vm_run(vcpu, &vmrun);
642
if (error != 0)
643
break;
644
645
exitcode = vme.exitcode;
646
if (exitcode >= VM_EXITCODE_MAX ||
647
vmexit_handlers[exitcode] == NULL) {
648
warnx("vm_loop: unexpected exitcode 0x%x", exitcode);
649
exit(BHYVE_EXIT_ERROR);
650
}
651
652
rc = (*vmexit_handlers[exitcode])(ctx, vcpu, &vmrun);
653
654
switch (rc) {
655
case VMEXIT_CONTINUE:
656
break;
657
case VMEXIT_ABORT:
658
abort();
659
default:
660
exit(BHYVE_EXIT_ERROR);
661
}
662
}
663
EPRINTLN("vm_run error %d, errno %d", error, errno);
664
}
665
666
static int
667
num_vcpus_allowed(struct vmctx *ctx, struct vcpu *vcpu)
668
{
669
uint16_t sockets, cores, threads, maxcpus;
670
int tmp, error;
671
672
/*
673
* The guest is allowed to spinup more than one processor only if the
674
* UNRESTRICTED_GUEST capability is available.
675
*/
676
error = vm_get_capability(vcpu, VM_CAP_UNRESTRICTED_GUEST, &tmp);
677
if (error != 0)
678
return (1);
679
680
error = vm_get_topology(ctx, &sockets, &cores, &threads, &maxcpus);
681
if (error == 0)
682
return (maxcpus);
683
else
684
return (1);
685
}
686
687
static struct vmctx *
688
do_open(const char *vmname)
689
{
690
struct vmctx *ctx;
691
int error, flags;
692
bool romboot, monitor;
693
694
monitor = get_config_bool_default("monitor", false);
695
romboot = bootrom_boot();
696
697
/*
698
* If we don't have a boot ROM, the guest context must have been
699
* initialized by bhyveload(8) or equivalent.
700
*/
701
ctx = vm_openf(vmname, romboot ? VMMAPI_OPEN_REINIT : 0);
702
if (ctx == NULL) {
703
if (errno != ENOENT)
704
err(4, "vm_openf");
705
if (!romboot)
706
errx(4, "no bootrom was configured");
707
flags = VMMAPI_OPEN_CREATE;
708
if (monitor)
709
flags |= VMMAPI_OPEN_CREATE_DESTROY_ON_CLOSE;
710
ctx = vm_openf(vmname, flags);
711
if (ctx == NULL)
712
err(4, "vm_openf");
713
}
714
715
#ifndef WITHOUT_CAPSICUM
716
if (vm_limit_rights(ctx) != 0)
717
err(EX_OSERR, "vm_limit_rights");
718
#endif
719
720
error = vm_set_topology(ctx, cpu_sockets, cpu_cores, cpu_threads, 0);
721
if (error)
722
errx(EX_OSERR, "vm_set_topology");
723
return (ctx);
724
}
725
726
bool
727
bhyve_parse_config_option(const char *option)
728
{
729
const char *value;
730
char *path;
731
732
value = strchr(option, '=');
733
if (value == NULL || value[1] == '\0')
734
return (false);
735
path = strndup(option, value - option);
736
if (path == NULL)
737
err(4, "Failed to allocate memory");
738
set_config_value(path, value + 1);
739
free(path);
740
return (true);
741
}
742
743
void
744
bhyve_parse_simple_config_file(const char *path)
745
{
746
FILE *fp;
747
char *line, *cp;
748
size_t linecap;
749
unsigned int lineno;
750
751
fp = fopen(path, "r");
752
if (fp == NULL)
753
err(4, "Failed to open configuration file %s", path);
754
line = NULL;
755
linecap = 0;
756
lineno = 1;
757
for (lineno = 1; getline(&line, &linecap, fp) > 0; lineno++) {
758
if (*line == '#' || *line == '\n')
759
continue;
760
cp = strchr(line, '\n');
761
if (cp != NULL)
762
*cp = '\0';
763
if (!bhyve_parse_config_option(line))
764
errx(4, "%s line %u: invalid config option '%s'", path,
765
lineno, line);
766
}
767
free(line);
768
fclose(fp);
769
}
770
771
#ifdef BHYVE_GDB
772
void
773
bhyve_parse_gdb_options(const char *opt)
774
{
775
const char *sport;
776
char *colon;
777
778
if (opt[0] == 'w') {
779
set_config_bool("gdb.wait", true);
780
opt++;
781
}
782
783
colon = strrchr(opt, ':');
784
if (colon == NULL) {
785
sport = opt;
786
} else {
787
*colon = '\0';
788
colon++;
789
sport = colon;
790
set_config_value("gdb.address", opt);
791
}
792
793
set_config_value("gdb.port", sport);
794
}
795
#endif
796
797
int
798
main(int argc, char *argv[])
799
{
800
int error, status;
801
int max_vcpus, memflags;
802
struct vcpu *bsp;
803
struct vmctx *ctx;
804
size_t memsize;
805
const char *value, *vmname;
806
#ifdef BHYVE_SNAPSHOT
807
struct restore_state rstate;
808
#endif
809
810
bhyve_init_config();
811
bhyve_optparse(argc, argv);
812
argc -= optind;
813
argv += optind;
814
815
if (argc > 1)
816
bhyve_usage(1);
817
818
#ifdef BHYVE_SNAPSHOT
819
if (restore_file != NULL) {
820
error = load_restore_file(restore_file, &rstate);
821
if (error) {
822
fprintf(stderr, "Failed to read checkpoint info from "
823
"file: '%s'.\n", restore_file);
824
exit(BHYVE_EXIT_ERROR);
825
}
826
vmname = lookup_vmname(&rstate);
827
if (vmname != NULL)
828
set_config_value("name", vmname);
829
}
830
#endif
831
832
if (argc == 1)
833
set_config_value("name", argv[0]);
834
835
vmname = get_config_value("name");
836
if (vmname == NULL)
837
bhyve_usage(1);
838
839
if (get_config_bool_default("config.dump", false)) {
840
dump_config();
841
exit(BHYVE_EXIT_POWEROFF);
842
}
843
844
calc_topology();
845
build_vcpumaps();
846
847
value = get_config_value("memory.size");
848
error = vm_parse_memsize(value, &memsize);
849
if (error)
850
errx(EX_USAGE, "invalid memsize '%s'", value);
851
852
ctx = do_open(vmname);
853
854
#ifdef BHYVE_SNAPSHOT
855
if (restore_file != NULL) {
856
guest_ncpus = lookup_guest_ncpus(&rstate);
857
memflags = lookup_memflags(&rstate);
858
memsize = lookup_memsize(&rstate);
859
}
860
861
if (guest_ncpus < 1) {
862
fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
863
exit(BHYVE_EXIT_ERROR);
864
}
865
#endif
866
867
calc_mem_affinity(memsize);
868
memflags = 0;
869
if (get_config_bool_default("memory.wired", false))
870
memflags |= VM_MEM_F_WIRED;
871
if (get_config_bool_default("memory.guest_in_core", false))
872
memflags |= VM_MEM_F_INCORE;
873
vm_set_memflags(ctx, memflags);
874
error = vm_setup_memory_domains(ctx, VM_MMAP_ALL, guest_domains,
875
guest_ndomains);
876
if (error) {
877
fprintf(stderr, "Unable to setup memory (%d)\n", errno);
878
exit(BHYVE_EXIT_ERROR);
879
}
880
881
set_vcpu_affinities();
882
init_mem(guest_ncpus);
883
init_bootrom(ctx);
884
885
if (get_config_bool_default("monitor", false)) {
886
while (1) {
887
pid_t child = fork();
888
if (child == -1) {
889
EPRINTLN("Monitor mode fork failed: %s",
890
strerror(errno));
891
exit(BHYVE_EXIT_ERROR);
892
}
893
if (child == 0)
894
break;
895
while ((error = waitpid(child, &status, 0)) == -1 && errno == EINTR)
896
;
897
if (error == -1) {
898
EPRINTLN("Monitor mode wait failed: %s",
899
strerror(errno));
900
exit(BHYVE_EXIT_ERROR);
901
}
902
if (WIFSIGNALED(status)) {
903
EPRINTLN("Child process was killed by signal %d",
904
WTERMSIG(status));
905
exit(BHYVE_EXIT_ERROR);
906
} else {
907
status = WEXITSTATUS(status);
908
if (status != BHYVE_EXIT_RESET)
909
exit(status);
910
}
911
if (vm_reinit(ctx) != 0) {
912
EPRINTLN("Monitor mode reinit failed: %s",
913
strerror(errno));
914
exit(BHYVE_EXIT_ERROR);
915
};
916
}
917
}
918
919
bsp = vm_vcpu_open(ctx, BSP);
920
max_vcpus = num_vcpus_allowed(ctx, bsp);
921
if (guest_ncpus > max_vcpus) {
922
fprintf(stderr, "%d vCPUs requested but only %d available\n",
923
guest_ncpus, max_vcpus);
924
exit(BHYVE_EXIT_ERROR);
925
}
926
927
bhyve_init_vcpu(bsp);
928
929
/* Allocate per-VCPU resources. */
930
vcpu_info = calloc(guest_ncpus, sizeof(*vcpu_info));
931
for (int vcpuid = 0; vcpuid < guest_ncpus; vcpuid++) {
932
vcpu_info[vcpuid].ctx = ctx;
933
vcpu_info[vcpuid].vcpuid = vcpuid;
934
if (vcpuid == BSP)
935
vcpu_info[vcpuid].vcpu = bsp;
936
else
937
vcpu_info[vcpuid].vcpu = vm_vcpu_open(ctx, vcpuid);
938
}
939
940
if (bhyve_init_platform(ctx, bsp) != 0)
941
exit(BHYVE_EXIT_ERROR);
942
943
if (qemu_fwcfg_init(ctx) != 0) {
944
fprintf(stderr, "qemu fwcfg initialization error\n");
945
exit(BHYVE_EXIT_ERROR);
946
}
947
948
if (qemu_fwcfg_add_file("opt/bhyve/hw.ncpu", sizeof(guest_ncpus),
949
&guest_ncpus) != 0) {
950
fprintf(stderr, "Could not add qemu fwcfg opt/bhyve/hw.ncpu\n");
951
exit(BHYVE_EXIT_ERROR);
952
}
953
954
/*
955
* Exit if a device emulation finds an error in its initialization
956
*/
957
if (init_pci(ctx) != 0) {
958
EPRINTLN("Device emulation initialization error: %s",
959
strerror(errno));
960
exit(BHYVE_EXIT_ERROR);
961
}
962
if (init_tpm(ctx) != 0) {
963
EPRINTLN("Failed to init TPM device");
964
exit(BHYVE_EXIT_ERROR);
965
}
966
967
/*
968
* Initialize after PCI, to allow a bootrom file to reserve the high
969
* region.
970
*/
971
if (get_config_bool("acpi_tables"))
972
vmgenc_init(ctx);
973
974
#ifdef BHYVE_GDB
975
init_gdb(ctx);
976
#endif
977
978
/*
979
* Add all vCPUs.
980
*/
981
for (int vcpuid = 0; vcpuid < guest_ncpus; vcpuid++)
982
bhyve_start_vcpu(vcpu_info[vcpuid].vcpu, vcpuid == BSP);
983
984
#ifdef BHYVE_SNAPSHOT
985
if (restore_file != NULL) {
986
FPRINTLN(stdout, "Pausing pci devs...");
987
if (vm_pause_devices() != 0) {
988
EPRINTLN("Failed to pause PCI device state.");
989
exit(BHYVE_EXIT_ERROR);
990
}
991
992
FPRINTLN(stdout, "Restoring vm mem...");
993
if (restore_vm_mem(ctx, &rstate) != 0) {
994
EPRINTLN("Failed to restore VM memory.");
995
exit(BHYVE_EXIT_ERROR);
996
}
997
998
FPRINTLN(stdout, "Restoring pci devs...");
999
if (vm_restore_devices(&rstate) != 0) {
1000
EPRINTLN("Failed to restore PCI device state.");
1001
exit(BHYVE_EXIT_ERROR);
1002
}
1003
1004
FPRINTLN(stdout, "Restoring kernel structs...");
1005
if (vm_restore_kern_structs(ctx, &rstate) != 0) {
1006
EPRINTLN("Failed to restore kernel structs.");
1007
exit(BHYVE_EXIT_ERROR);
1008
}
1009
1010
FPRINTLN(stdout, "Resuming pci devs...");
1011
if (vm_resume_devices() != 0) {
1012
EPRINTLN("Failed to resume PCI device state.");
1013
exit(BHYVE_EXIT_ERROR);
1014
}
1015
}
1016
#endif
1017
1018
if (bhyve_init_platform_late(ctx, bsp) != 0)
1019
exit(BHYVE_EXIT_ERROR);
1020
1021
/*
1022
* Change the proc title to include the VM name.
1023
*/
1024
setproctitle("%s", vmname);
1025
1026
#ifdef BHYVE_SNAPSHOT
1027
/*
1028
* checkpointing thread for communication with bhyvectl
1029
*/
1030
if (init_checkpoint_thread(ctx) != 0)
1031
errx(EX_OSERR, "Failed to start checkpoint thread");
1032
#endif
1033
1034
#ifndef WITHOUT_CAPSICUM
1035
caph_cache_catpages();
1036
1037
if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
1038
errx(EX_OSERR, "Unable to apply rights for sandbox");
1039
1040
if (caph_enter() == -1)
1041
errx(EX_OSERR, "cap_enter() failed");
1042
#endif
1043
1044
#ifdef BHYVE_SNAPSHOT
1045
if (restore_file != NULL) {
1046
destroy_restore_state(&rstate);
1047
if (vm_restore_time(ctx) < 0)
1048
err(EX_OSERR, "Unable to restore time");
1049
1050
for (int vcpuid = 0; vcpuid < guest_ncpus; vcpuid++)
1051
vm_resume_cpu(vcpu_info[vcpuid].vcpu);
1052
} else
1053
#endif
1054
vm_resume_cpu(bsp);
1055
1056
/*
1057
* Head off to the main event dispatch loop
1058
*/
1059
mevent_dispatch();
1060
1061
exit(BHYVE_EXIT_ERROR);
1062
}
1063
1064