Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/usr.sbin/bhyve/amd64/e820.c
105585 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
5
* Author: Corvin Köhne <[email protected]>
6
*/
7
8
#include <sys/types.h>
9
#include <sys/queue.h>
10
11
#include <machine/vmm.h>
12
13
#include <assert.h>
14
#include <err.h>
15
#include <errno.h>
16
#include <stdio.h>
17
#include <stdlib.h>
18
#include <string.h>
19
20
#include "debug.h"
21
#include "e820.h"
22
#include "qemu_fwcfg.h"
23
24
/*
25
* E820 always uses 64 bit entries. Emulation code will use vm_paddr_t since it
26
* works on physical addresses. If vm_paddr_t is larger than uint64_t E820 can't
27
* hold all possible physical addresses and we can get into trouble.
28
*/
29
static_assert(sizeof(vm_paddr_t) <= sizeof(uint64_t),
30
"Unable to represent physical memory by E820 table");
31
32
#define E820_FWCFG_FILE_NAME "etc/e820"
33
34
#define KB (1024UL)
35
#define MB (1024 * KB)
36
#define GB (1024 * MB)
37
38
/*
39
* Fix E820 memory holes:
40
* [ A0000, C0000) VGA
41
* [ C0000, 100000) ROM
42
*/
43
#define E820_VGA_MEM_BASE 0xA0000
44
#define E820_VGA_MEM_END 0xC0000
45
#define E820_ROM_MEM_BASE 0xC0000
46
#define E820_ROM_MEM_END 0x100000
47
48
struct e820_element {
49
TAILQ_ENTRY(e820_element) chain;
50
uint64_t base;
51
uint64_t end;
52
enum e820_memory_type type;
53
};
54
static TAILQ_HEAD(e820_table, e820_element) e820_table = TAILQ_HEAD_INITIALIZER(
55
e820_table);
56
57
static struct e820_element *
58
e820_element_alloc(uint64_t base, uint64_t end, enum e820_memory_type type)
59
{
60
struct e820_element *element;
61
62
element = calloc(1, sizeof(*element));
63
if (element == NULL) {
64
return (NULL);
65
}
66
67
element->base = base;
68
element->end = end;
69
element->type = type;
70
71
return (element);
72
}
73
74
static const char *
75
e820_get_type_name(const enum e820_memory_type type)
76
{
77
switch (type) {
78
case E820_TYPE_MEMORY:
79
return ("RAM");
80
case E820_TYPE_RESERVED:
81
return ("Reserved");
82
case E820_TYPE_ACPI:
83
return ("ACPI");
84
case E820_TYPE_NVS:
85
return ("NVS");
86
default:
87
return ("Unknown");
88
}
89
}
90
91
void
92
e820_dump_table(void)
93
{
94
struct e820_element *element;
95
uint64_t i;
96
97
EPRINTLN("E820 map:");
98
99
i = 0;
100
TAILQ_FOREACH(element, &e820_table, chain) {
101
EPRINTLN(" (%4lu) [%16lx, %16lx] %s", i,
102
element->base, element->end,
103
e820_get_type_name(element->type));
104
105
++i;
106
}
107
}
108
109
static struct qemu_fwcfg_item *
110
e820_get_fwcfg_item(void)
111
{
112
struct qemu_fwcfg_item *fwcfg_item;
113
struct e820_element *element;
114
struct e820_entry *entries;
115
int count, i;
116
117
count = 0;
118
TAILQ_FOREACH(element, &e820_table, chain) {
119
++count;
120
}
121
if (count == 0) {
122
warnx("%s: E820 table empty", __func__);
123
return (NULL);
124
}
125
126
fwcfg_item = calloc(1, sizeof(struct qemu_fwcfg_item));
127
if (fwcfg_item == NULL) {
128
return (NULL);
129
}
130
131
fwcfg_item->size = count * sizeof(struct e820_entry);
132
fwcfg_item->data = calloc(count, sizeof(struct e820_entry));
133
if (fwcfg_item->data == NULL) {
134
free(fwcfg_item);
135
return (NULL);
136
}
137
138
i = 0;
139
entries = (struct e820_entry *)fwcfg_item->data;
140
TAILQ_FOREACH(element, &e820_table, chain) {
141
struct e820_entry *entry = &entries[i];
142
143
entry->base = element->base;
144
entry->length = element->end - element->base;
145
entry->type = element->type;
146
147
++i;
148
}
149
150
return (fwcfg_item);
151
}
152
153
static int
154
e820_add_entry(const uint64_t base, const uint64_t end,
155
const enum e820_memory_type type)
156
{
157
struct e820_element *new_element;
158
struct e820_element *element;
159
struct e820_element *sib_element;
160
struct e820_element *ram_element;
161
162
assert(end >= base);
163
164
new_element = e820_element_alloc(base, end, type);
165
if (new_element == NULL) {
166
return (ENOMEM);
167
}
168
169
/*
170
* E820 table should always be sorted in ascending order. Therefore,
171
* search for a range whose end is larger than the base parameter.
172
*/
173
TAILQ_FOREACH(element, &e820_table, chain) {
174
if (element->end > base) {
175
break;
176
}
177
}
178
179
/*
180
* System memory requires special handling.
181
*/
182
if (type == E820_TYPE_MEMORY) {
183
/*
184
* base is larger than of any existing element. Add new system
185
* memory at the end of the table.
186
*/
187
if (element == NULL) {
188
TAILQ_INSERT_TAIL(&e820_table, new_element, chain);
189
return (0);
190
}
191
192
/*
193
* System memory shouldn't overlap with any existing element.
194
*/
195
assert(end >= element->base);
196
197
TAILQ_INSERT_BEFORE(element, new_element, chain);
198
199
return (0);
200
}
201
202
/*
203
* If some one tries to allocate a specific address, it could happen, that
204
* this address is not allocatable. Therefore, do some checks. If the
205
* address is not allocatable, don't panic. The user may have a fallback and
206
* tries to allocate another address. This is true for the GVT-d emulation
207
* which tries to reuse the host address of the graphics stolen memory and
208
* falls back to allocating the highest address below 4 GB.
209
*/
210
if (element == NULL || element->type != E820_TYPE_MEMORY ||
211
(base < element->base || end > element->end))
212
return (ENOMEM);
213
214
if (base == element->base && end == element->end) {
215
/*
216
* The new entry replaces an existing one.
217
*
218
* Old table:
219
* [ 0x1000, 0x4000] RAM <-- element
220
* New table:
221
* [ 0x1000, 0x4000] Reserved
222
*/
223
TAILQ_INSERT_BEFORE(element, new_element, chain);
224
TAILQ_REMOVE(&e820_table, element, chain);
225
free(element);
226
} else if (base == element->base) {
227
/*
228
* New element at system memory base boundary. Add new
229
* element before current and adjust the base of the old
230
* element.
231
*
232
* Old table:
233
* [ 0x1000, 0x4000] RAM <-- element
234
* New table:
235
* [ 0x1000, 0x2000] Reserved
236
* [ 0x2000, 0x4000] RAM <-- element
237
*/
238
TAILQ_INSERT_BEFORE(element, new_element, chain);
239
element->base = end;
240
} else if (end == element->end) {
241
/*
242
* New element at system memory end boundary. Add new
243
* element after current and adjust the end of the
244
* current element.
245
*
246
* Old table:
247
* [ 0x1000, 0x4000] RAM <-- element
248
* New table:
249
* [ 0x1000, 0x3000] RAM <-- element
250
* [ 0x3000, 0x4000] Reserved
251
*/
252
TAILQ_INSERT_AFTER(&e820_table, element, new_element, chain);
253
element->end = base;
254
} else {
255
/*
256
* New element inside system memory entry. Split it by
257
* adding a system memory element and the new element
258
* before current.
259
*
260
* Old table:
261
* [ 0x1000, 0x4000] RAM <-- element
262
* New table:
263
* [ 0x1000, 0x2000] RAM
264
* [ 0x2000, 0x3000] Reserved
265
* [ 0x3000, 0x4000] RAM <-- element
266
*/
267
ram_element = e820_element_alloc(element->base, base,
268
E820_TYPE_MEMORY);
269
if (ram_element == NULL) {
270
return (ENOMEM);
271
}
272
TAILQ_INSERT_BEFORE(element, ram_element, chain);
273
TAILQ_INSERT_BEFORE(element, new_element, chain);
274
element->base = end;
275
}
276
277
/*
278
* If the previous element has the same type and ends at our base
279
* boundary, we can merge both entries.
280
*/
281
sib_element = TAILQ_PREV(new_element, e820_table, chain);
282
if (sib_element != NULL &&
283
sib_element->type == new_element->type &&
284
sib_element->end == new_element->base) {
285
new_element->base = sib_element->base;
286
TAILQ_REMOVE(&e820_table, sib_element, chain);
287
free(sib_element);
288
}
289
290
/*
291
* If the next element has the same type and starts at our end
292
* boundary, we can merge both entries.
293
*/
294
sib_element = TAILQ_NEXT(new_element, chain);
295
if (sib_element != NULL &&
296
sib_element->type == new_element->type &&
297
sib_element->base == new_element->end) {
298
/* Merge new element into subsequent one. */
299
new_element->end = sib_element->end;
300
TAILQ_REMOVE(&e820_table, sib_element, chain);
301
free(sib_element);
302
}
303
304
return (0);
305
}
306
307
static int
308
e820_add_memory_hole(const uint64_t base, const uint64_t end)
309
{
310
struct e820_element *element;
311
struct e820_element *ram_element;
312
313
assert(end >= base);
314
315
/*
316
* E820 table should be always sorted in ascending order. Therefore,
317
* search for an element which end is larger than the base parameter.
318
*/
319
TAILQ_FOREACH(element, &e820_table, chain) {
320
if (element->end > base) {
321
break;
322
}
323
}
324
325
if (element == NULL || end <= element->base) {
326
/* Nothing to do. Hole already exists */
327
return (0);
328
}
329
330
/* Memory holes are only allowed in system memory */
331
assert(element->type == E820_TYPE_MEMORY);
332
333
if (base == element->base) {
334
/*
335
* New hole at system memory base boundary.
336
*
337
* Old table:
338
* [ 0x1000, 0x4000] RAM
339
* New table:
340
* [ 0x2000, 0x4000] RAM
341
*/
342
element->base = end;
343
} else if (end == element->end) {
344
/*
345
* New hole at system memory end boundary.
346
*
347
* Old table:
348
* [ 0x1000, 0x4000] RAM
349
* New table:
350
* [ 0x1000, 0x3000] RAM
351
*/
352
element->end = base;
353
} else {
354
/*
355
* New hole inside system memory entry. Split the system memory.
356
*
357
* Old table:
358
* [ 0x1000, 0x4000] RAM <-- element
359
* New table:
360
* [ 0x1000, 0x2000] RAM
361
* [ 0x3000, 0x4000] RAM <-- element
362
*/
363
ram_element = e820_element_alloc(element->base, base,
364
E820_TYPE_MEMORY);
365
if (ram_element == NULL) {
366
return (ENOMEM);
367
}
368
TAILQ_INSERT_BEFORE(element, ram_element, chain);
369
element->base = end;
370
}
371
372
return (0);
373
}
374
375
static uint64_t
376
e820_alloc_highest(const uint64_t max_address, const uint64_t length,
377
const uint64_t alignment, const enum e820_memory_type type)
378
{
379
struct e820_element *element;
380
381
TAILQ_FOREACH_REVERSE(element, &e820_table, e820_table, chain) {
382
uint64_t address, base, end;
383
384
end = MIN(max_address, element->end);
385
base = roundup2(element->base, alignment);
386
387
/*
388
* If end - length == 0, we would allocate memory at address 0. This
389
* address is mostly unusable and we should avoid allocating it.
390
* Therefore, search for another block in that case.
391
*/
392
if (element->type != E820_TYPE_MEMORY || end < base ||
393
end - base < length || end - length == 0) {
394
continue;
395
}
396
397
address = rounddown2(end - length, alignment);
398
399
if (e820_add_entry(address, address + length, type) != 0) {
400
return (0);
401
}
402
403
return (address);
404
}
405
406
return (0);
407
}
408
409
static uint64_t
410
e820_alloc_lowest(const uint64_t min_address, const uint64_t length,
411
const uint64_t alignment, const enum e820_memory_type type)
412
{
413
struct e820_element *element;
414
415
TAILQ_FOREACH(element, &e820_table, chain) {
416
uint64_t base, end;
417
418
end = element->end;
419
base = MAX(min_address, roundup2(element->base, alignment));
420
421
/*
422
* If base == 0, we would allocate memory at address 0. This
423
* address is mostly unusable and we should avoid allocating it.
424
* Therefore, search for another block in that case.
425
*/
426
if (element->type != E820_TYPE_MEMORY || end < base ||
427
end - base < length || base == 0) {
428
continue;
429
}
430
431
if (e820_add_entry(base, base + length, type) != 0) {
432
return (0);
433
}
434
435
return (base);
436
}
437
438
return (0);
439
}
440
441
uint64_t
442
e820_alloc(const uint64_t address, const uint64_t length,
443
const uint64_t alignment, const enum e820_memory_type type,
444
const enum e820_allocation_strategy strategy)
445
{
446
assert(powerof2(alignment));
447
assert((address & (alignment - 1)) == 0);
448
449
switch (strategy) {
450
case E820_ALLOCATE_ANY:
451
/*
452
* Allocate any address. Therefore, ignore the address parameter
453
* and reuse the code path for allocating the lowest address.
454
*/
455
return (e820_alloc_lowest(0, length, alignment, type));
456
case E820_ALLOCATE_LOWEST:
457
return (e820_alloc_lowest(address, length, alignment, type));
458
case E820_ALLOCATE_HIGHEST:
459
return (e820_alloc_highest(address, length, alignment, type));
460
case E820_ALLOCATE_SPECIFIC:
461
if (e820_add_entry(address, address + length, type) != 0) {
462
return (0);
463
}
464
465
return (address);
466
}
467
468
return (0);
469
}
470
471
int
472
e820_init(struct vmctx *const ctx)
473
{
474
uint64_t lowmem_size, highmem_size;
475
int error;
476
477
TAILQ_INIT(&e820_table);
478
479
lowmem_size = vm_get_lowmem_size(ctx);
480
error = e820_add_entry(0, lowmem_size, E820_TYPE_MEMORY);
481
if (error) {
482
warnx("%s: Could not add lowmem", __func__);
483
return (error);
484
}
485
486
highmem_size = vm_get_highmem_size(ctx);
487
if (highmem_size != 0) {
488
error = e820_add_entry(4 * GB, 4 * GB + highmem_size,
489
E820_TYPE_MEMORY);
490
if (error) {
491
warnx("%s: Could not add highmem", __func__);
492
return (error);
493
}
494
}
495
496
error = e820_add_memory_hole(E820_VGA_MEM_BASE, E820_VGA_MEM_END);
497
if (error) {
498
warnx("%s: Could not add VGA memory", __func__);
499
return (error);
500
}
501
502
error = e820_add_memory_hole(E820_ROM_MEM_BASE, E820_ROM_MEM_END);
503
if (error) {
504
warnx("%s: Could not add ROM area", __func__);
505
return (error);
506
}
507
508
return (0);
509
}
510
511
int
512
e820_finalize(void)
513
{
514
struct qemu_fwcfg_item *e820_fwcfg_item;
515
int error;
516
517
e820_fwcfg_item = e820_get_fwcfg_item();
518
if (e820_fwcfg_item == NULL) {
519
warnx("invalid e820 table");
520
return (ENOMEM);
521
}
522
error = qemu_fwcfg_add_file("etc/e820",
523
e820_fwcfg_item->size, e820_fwcfg_item->data);
524
if (error != 0) {
525
warnx("could not add qemu fwcfg etc/e820");
526
free(e820_fwcfg_item->data);
527
free(e820_fwcfg_item);
528
return (error);
529
}
530
free(e820_fwcfg_item);
531
532
return (0);
533
}
534
535