Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/dev/acpica/acpi_pxm.c
39507 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2010 Hudson River Trading LLC
5
* Written by: John H. Baldwin <[email protected]>
6
* All rights reserved.
7
*
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions
10
* are met:
11
* 1. Redistributions of source code must retain the above copyright
12
* notice, this list of conditions and the following disclaimer.
13
* 2. Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in the
15
* documentation and/or other materials provided with the distribution.
16
*
17
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
* SUCH DAMAGE.
28
*/
29
30
#include <sys/cdefs.h>
31
#include "opt_vm.h"
32
33
#include <sys/param.h>
34
#include <sys/systm.h>
35
#include <sys/bus.h>
36
#include <sys/kernel.h>
37
#include <sys/lock.h>
38
#include <sys/mutex.h>
39
#include <sys/smp.h>
40
#include <sys/vmmeter.h>
41
#include <vm/vm.h>
42
#include <vm/pmap.h>
43
#include <vm/vm_param.h>
44
#include <vm/vm_page.h>
45
#include <vm/vm_phys.h>
46
47
#include <contrib/dev/acpica/include/acpi.h>
48
#include <contrib/dev/acpica/include/aclocal.h>
49
#include <contrib/dev/acpica/include/actables.h>
50
51
#include <machine/md_var.h>
52
53
#include <dev/acpica/acpivar.h>
54
55
#if MAXMEMDOM > 1
56
static struct cpu_info {
57
bool enabled:1;
58
bool has_memory:1;
59
int domain;
60
int id;
61
} *cpus;
62
63
static int max_cpus;
64
static int last_cpu;
65
66
struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1];
67
int num_mem;
68
69
static ACPI_TABLE_SRAT *srat;
70
static vm_paddr_t srat_physaddr;
71
72
static int domain_pxm[MAXMEMDOM];
73
static int ndomain;
74
static vm_paddr_t maxphyaddr;
75
76
static ACPI_TABLE_SLIT *slit;
77
static vm_paddr_t slit_physaddr;
78
static int vm_locality_table[MAXMEMDOM * MAXMEMDOM];
79
80
static void srat_walk_table(acpi_subtable_handler *handler, void *arg);
81
82
/*
83
* SLIT parsing.
84
*/
85
86
static void
87
slit_parse_table(ACPI_TABLE_SLIT *s)
88
{
89
int i, j;
90
int i_domain, j_domain;
91
int offset = 0;
92
uint8_t e;
93
94
/*
95
* This maps the SLIT data into the VM-domain centric view.
96
* There may be sparse entries in the PXM namespace, so
97
* remap them to a VM-domain ID and if it doesn't exist,
98
* skip it.
99
*
100
* It should result in a packed 2d array of VM-domain
101
* locality information entries.
102
*/
103
104
if (bootverbose)
105
printf("SLIT.Localities: %d\n", (int) s->LocalityCount);
106
for (i = 0; i < s->LocalityCount; i++) {
107
i_domain = acpi_map_pxm_to_vm_domainid(i);
108
if (i_domain < 0)
109
continue;
110
111
if (bootverbose)
112
printf("%d: ", i);
113
for (j = 0; j < s->LocalityCount; j++) {
114
j_domain = acpi_map_pxm_to_vm_domainid(j);
115
if (j_domain < 0)
116
continue;
117
e = s->Entry[i * s->LocalityCount + j];
118
if (bootverbose)
119
printf("%d ", (int) e);
120
/* 255 == "no locality information" */
121
if (e == 255)
122
vm_locality_table[offset] = -1;
123
else
124
vm_locality_table[offset] = e;
125
offset++;
126
}
127
if (bootverbose)
128
printf("\n");
129
}
130
}
131
132
/*
133
* Look for an ACPI System Locality Distance Information Table ("SLIT")
134
*/
135
static int
136
parse_slit(void)
137
{
138
139
if (resource_disabled("slit", 0)) {
140
return (-1);
141
}
142
143
slit_physaddr = acpi_find_table(ACPI_SIG_SLIT);
144
if (slit_physaddr == 0) {
145
return (-1);
146
}
147
148
/*
149
* Make a pass over the table to populate the cpus[] and
150
* mem_info[] tables.
151
*/
152
slit = acpi_map_table(slit_physaddr, ACPI_SIG_SLIT);
153
slit_parse_table(slit);
154
acpi_unmap_table(slit);
155
slit = NULL;
156
157
return (0);
158
}
159
160
/*
161
* SRAT parsing.
162
*/
163
164
/*
165
* Returns true if a memory range overlaps with at least one range in
166
* phys_avail[].
167
*/
168
static int
169
overlaps_phys_avail(vm_paddr_t start, vm_paddr_t end)
170
{
171
int i;
172
173
for (i = 0; phys_avail[i] != 0 && phys_avail[i + 1] != 0; i += 2) {
174
if (phys_avail[i + 1] <= start)
175
continue;
176
if (phys_avail[i] < end)
177
return (1);
178
break;
179
}
180
return (0);
181
}
182
183
/*
184
* On x86 we can use the cpuid to index the cpus array, but on arm64
185
* we have an ACPI Processor UID with a larger range.
186
*
187
* Use this variable to indicate if the cpus can be stored by index.
188
*/
189
#ifdef __aarch64__
190
static const int cpus_use_indexing = 0;
191
#else
192
static const int cpus_use_indexing = 1;
193
#endif
194
195
/*
196
* Find CPU by processor ID (APIC ID on x86, Processor UID on arm64)
197
*/
198
static struct cpu_info *
199
cpu_find(int cpuid)
200
{
201
int i;
202
203
if (cpus_use_indexing) {
204
if (cpuid <= last_cpu && cpus[cpuid].enabled)
205
return (&cpus[cpuid]);
206
} else {
207
for (i = 0; i <= last_cpu; i++)
208
if (cpus[i].id == cpuid)
209
return (&cpus[i]);
210
}
211
return (NULL);
212
}
213
214
/*
215
* Find CPU by pcpu pointer.
216
*/
217
static struct cpu_info *
218
cpu_get_info(struct pcpu *pc)
219
{
220
struct cpu_info *cpup;
221
int id;
222
223
#ifdef __aarch64__
224
id = pc->pc_acpi_id;
225
#else
226
id = pc->pc_apic_id;
227
#endif
228
cpup = cpu_find(id);
229
if (cpup == NULL)
230
panic("SRAT: CPU with ID %u is not known", id);
231
return (cpup);
232
}
233
234
/*
235
* Add proximity information for a new CPU.
236
*/
237
static struct cpu_info *
238
cpu_add(int cpuid, int domain)
239
{
240
struct cpu_info *cpup;
241
242
if (cpus_use_indexing) {
243
if (cpuid >= max_cpus)
244
return (NULL);
245
last_cpu = imax(last_cpu, cpuid);
246
cpup = &cpus[cpuid];
247
} else {
248
if (last_cpu >= max_cpus - 1)
249
return (NULL);
250
cpup = &cpus[++last_cpu];
251
}
252
cpup->domain = domain;
253
cpup->id = cpuid;
254
cpup->enabled = 1;
255
return (cpup);
256
}
257
258
static void
259
srat_parse_entry(ACPI_SUBTABLE_HEADER *entry, void *arg)
260
{
261
ACPI_SRAT_CPU_AFFINITY *cpu;
262
ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic;
263
ACPI_SRAT_MEM_AFFINITY *mem;
264
ACPI_SRAT_GICC_AFFINITY *gicc;
265
static struct cpu_info *cpup;
266
uint64_t base, length;
267
int domain, i, slot;
268
269
switch (entry->Type) {
270
case ACPI_SRAT_TYPE_CPU_AFFINITY:
271
cpu = (ACPI_SRAT_CPU_AFFINITY *)entry;
272
domain = cpu->ProximityDomainLo |
273
cpu->ProximityDomainHi[0] << 8 |
274
cpu->ProximityDomainHi[1] << 16 |
275
cpu->ProximityDomainHi[2] << 24;
276
if (bootverbose)
277
printf("SRAT: Found CPU APIC ID %u domain %d: %s\n",
278
cpu->ApicId, domain,
279
(cpu->Flags & ACPI_SRAT_CPU_ENABLED) ?
280
"enabled" : "disabled");
281
if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED))
282
break;
283
cpup = cpu_find(cpu->ApicId);
284
if (cpup != NULL) {
285
printf("SRAT: Duplicate local APIC ID %u\n",
286
cpu->ApicId);
287
*(int *)arg = ENXIO;
288
break;
289
}
290
cpup = cpu_add(cpu->ApicId, domain);
291
if (cpup == NULL)
292
printf("SRAT: Ignoring local APIC ID %u (too high)\n",
293
cpu->ApicId);
294
break;
295
case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
296
x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)entry;
297
if (bootverbose)
298
printf("SRAT: Found CPU APIC ID %u domain %d: %s\n",
299
x2apic->ApicId, x2apic->ProximityDomain,
300
(x2apic->Flags & ACPI_SRAT_CPU_ENABLED) ?
301
"enabled" : "disabled");
302
if (!(x2apic->Flags & ACPI_SRAT_CPU_ENABLED))
303
break;
304
KASSERT(cpu_find(x2apic->ApicId) == NULL,
305
("Duplicate local APIC ID %u", x2apic->ApicId));
306
cpup = cpu_add(x2apic->ApicId, x2apic->ProximityDomain);
307
if (cpup == NULL)
308
printf("SRAT: Ignoring local APIC ID %u (too high)\n",
309
x2apic->ApicId);
310
break;
311
case ACPI_SRAT_TYPE_GICC_AFFINITY:
312
gicc = (ACPI_SRAT_GICC_AFFINITY *)entry;
313
if (bootverbose)
314
printf("SRAT: Found CPU UID %u domain %d: %s\n",
315
gicc->AcpiProcessorUid, gicc->ProximityDomain,
316
(gicc->Flags & ACPI_SRAT_GICC_ENABLED) ?
317
"enabled" : "disabled");
318
if (!(gicc->Flags & ACPI_SRAT_GICC_ENABLED))
319
break;
320
KASSERT(cpu_find(gicc->AcpiProcessorUid) == NULL,
321
("Duplicate CPU UID %u", gicc->AcpiProcessorUid));
322
cpup = cpu_add(gicc->AcpiProcessorUid, gicc->ProximityDomain);
323
if (cpup == NULL)
324
printf("SRAT: Ignoring CPU UID %u (too high)\n",
325
gicc->AcpiProcessorUid);
326
break;
327
case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
328
mem = (ACPI_SRAT_MEM_AFFINITY *)entry;
329
base = mem->BaseAddress;
330
length = mem->Length;
331
domain = mem->ProximityDomain;
332
333
if (bootverbose)
334
printf(
335
"SRAT: Found memory domain %d addr 0x%jx len 0x%jx: %s\n",
336
domain, (uintmax_t)base, (uintmax_t)length,
337
(mem->Flags & ACPI_SRAT_MEM_ENABLED) ?
338
"enabled" : "disabled");
339
if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED))
340
break;
341
if (base >= maxphyaddr ||
342
!overlaps_phys_avail(base, base + length)) {
343
printf("SRAT: Ignoring memory at addr 0x%jx\n",
344
(uintmax_t)base);
345
break;
346
}
347
if (num_mem == VM_PHYSSEG_MAX) {
348
printf("SRAT: Too many memory regions\n");
349
*(int *)arg = ENXIO;
350
break;
351
}
352
slot = num_mem;
353
for (i = 0; i < num_mem; i++) {
354
if (mem_info[i].domain == domain) {
355
/* Try to extend an existing segment. */
356
if (base == mem_info[i].end) {
357
mem_info[i].end += length;
358
return;
359
}
360
if (base + length == mem_info[i].start) {
361
mem_info[i].start -= length;
362
return;
363
}
364
}
365
if (mem_info[i].end <= base)
366
continue;
367
if (mem_info[i].start < base + length) {
368
printf("SRAT: Overlapping memory entries\n");
369
*(int *)arg = ENXIO;
370
return;
371
}
372
slot = i;
373
}
374
for (i = num_mem; i > slot; i--)
375
mem_info[i] = mem_info[i - 1];
376
mem_info[slot].start = base;
377
mem_info[slot].end = base + length;
378
mem_info[slot].domain = domain;
379
num_mem++;
380
break;
381
}
382
}
383
384
/*
385
* Ensure each memory domain has at least one CPU and that each CPU
386
* has at least one memory domain.
387
*/
388
static int
389
check_domains(void)
390
{
391
int found, i, j;
392
393
for (i = 0; i < num_mem; i++) {
394
found = 0;
395
for (j = 0; j <= last_cpu; j++)
396
if (cpus[j].enabled &&
397
cpus[j].domain == mem_info[i].domain) {
398
cpus[j].has_memory = 1;
399
found++;
400
}
401
if (!found) {
402
printf("SRAT: No CPU found for memory domain %d\n",
403
mem_info[i].domain);
404
return (ENXIO);
405
}
406
}
407
for (i = 0; i <= last_cpu; i++)
408
if (cpus[i].enabled && !cpus[i].has_memory) {
409
found = 0;
410
for (j = 0; j < num_mem && !found; j++) {
411
if (mem_info[j].domain == cpus[i].domain)
412
found = 1;
413
}
414
if (!found) {
415
if (bootverbose)
416
printf("SRAT: mem dom %d is empty\n",
417
cpus[i].domain);
418
mem_info[num_mem].start = 0;
419
mem_info[num_mem].end = 0;
420
mem_info[num_mem].domain = cpus[i].domain;
421
num_mem++;
422
}
423
}
424
return (0);
425
}
426
427
/*
428
* Check that the SRAT memory regions cover all of the regions in
429
* phys_avail[].
430
*/
431
static int
432
check_phys_avail(void)
433
{
434
vm_paddr_t address;
435
int i, j;
436
437
/* j is the current offset into phys_avail[]. */
438
address = phys_avail[0];
439
j = 0;
440
for (i = 0; i < num_mem; i++) {
441
/*
442
* Consume as many phys_avail[] entries as fit in this
443
* region.
444
*/
445
while (address >= mem_info[i].start &&
446
address <= mem_info[i].end) {
447
/*
448
* If we cover the rest of this phys_avail[] entry,
449
* advance to the next entry.
450
*/
451
if (phys_avail[j + 1] <= mem_info[i].end) {
452
j += 2;
453
if (phys_avail[j] == 0 &&
454
phys_avail[j + 1] == 0) {
455
return (0);
456
}
457
address = phys_avail[j];
458
} else
459
address = mem_info[i].end + 1;
460
}
461
}
462
printf("SRAT: No memory region found for 0x%jx - 0x%jx\n",
463
(uintmax_t)phys_avail[j], (uintmax_t)phys_avail[j + 1]);
464
return (ENXIO);
465
}
466
467
/*
468
* Renumber the memory domains to be compact and zero-based if not
469
* already. Returns an error if there are too many domains.
470
*/
471
static int
472
renumber_domains(void)
473
{
474
int i, j, slot;
475
476
/* Enumerate all the domains. */
477
ndomain = 0;
478
for (i = 0; i < num_mem; i++) {
479
/* See if this domain is already known. */
480
for (j = 0; j < ndomain; j++) {
481
if (domain_pxm[j] >= mem_info[i].domain)
482
break;
483
}
484
if (j < ndomain && domain_pxm[j] == mem_info[i].domain)
485
continue;
486
487
if (ndomain >= MAXMEMDOM) {
488
ndomain = 1;
489
printf("SRAT: Too many memory domains\n");
490
return (EFBIG);
491
}
492
493
/* Insert the new domain at slot 'j'. */
494
slot = j;
495
for (j = ndomain; j > slot; j--)
496
domain_pxm[j] = domain_pxm[j - 1];
497
domain_pxm[slot] = mem_info[i].domain;
498
ndomain++;
499
}
500
501
/* Renumber each domain to its index in the sorted 'domain_pxm' list. */
502
for (i = 0; i < ndomain; i++) {
503
/*
504
* If the domain is already the right value, no need
505
* to renumber.
506
*/
507
if (domain_pxm[i] == i)
508
continue;
509
510
/* Walk the cpu[] and mem_info[] arrays to renumber. */
511
for (j = 0; j < num_mem; j++)
512
if (mem_info[j].domain == domain_pxm[i])
513
mem_info[j].domain = i;
514
for (j = 0; j <= last_cpu; j++)
515
if (cpus[j].enabled && cpus[j].domain == domain_pxm[i])
516
cpus[j].domain = i;
517
}
518
519
return (0);
520
}
521
522
/*
523
* Look for an ACPI System Resource Affinity Table ("SRAT"),
524
* allocate space for cpu information, and initialize globals.
525
*/
526
int
527
acpi_pxm_init(int ncpus, vm_paddr_t maxphys)
528
{
529
unsigned int idx, size;
530
vm_paddr_t addr;
531
532
if (resource_disabled("srat", 0))
533
return (-1);
534
535
max_cpus = ncpus;
536
last_cpu = -1;
537
maxphyaddr = maxphys;
538
srat_physaddr = acpi_find_table(ACPI_SIG_SRAT);
539
if (srat_physaddr == 0)
540
return (-1);
541
542
/*
543
* Allocate data structure:
544
*
545
* Find the last physical memory region and steal some memory from
546
* it. This is done because at this point in the boot process
547
* malloc is still not usable.
548
*/
549
for (idx = 0; phys_avail[idx + 1] != 0; idx += 2);
550
KASSERT(idx != 0, ("phys_avail is empty!"));
551
idx -= 2;
552
553
size = sizeof(*cpus) * max_cpus;
554
addr = trunc_page(phys_avail[idx + 1] - size);
555
KASSERT(addr >= phys_avail[idx],
556
("Not enough memory for SRAT table items"));
557
phys_avail[idx + 1] = addr - 1;
558
559
/*
560
* We cannot rely on PHYS_TO_DMAP because this code is also used in
561
* i386, so use pmap_mapbios to map the memory, this will end up using
562
* the default memory attribute (WB), and the DMAP when available.
563
*/
564
cpus = (struct cpu_info *)pmap_mapbios(addr, size);
565
bzero(cpus, size);
566
return (0);
567
}
568
569
static int
570
parse_srat(void)
571
{
572
int error;
573
574
/*
575
* Make a pass over the table to populate the cpus[] and
576
* mem_info[] tables.
577
*/
578
srat = acpi_map_table(srat_physaddr, ACPI_SIG_SRAT);
579
error = 0;
580
srat_walk_table(srat_parse_entry, &error);
581
acpi_unmap_table(srat);
582
srat = NULL;
583
if (error || check_domains() != 0 || check_phys_avail() != 0 ||
584
renumber_domains() != 0) {
585
srat_physaddr = 0;
586
return (-1);
587
}
588
589
return (0);
590
}
591
592
static void
593
init_mem_locality(void)
594
{
595
int i;
596
597
/*
598
* For now, assume -1 == "no locality information for
599
* this pairing.
600
*/
601
for (i = 0; i < MAXMEMDOM * MAXMEMDOM; i++)
602
vm_locality_table[i] = -1;
603
}
604
605
/*
606
* Parse SRAT and SLIT to save proximity info. Don't do
607
* anything if SRAT is not available.
608
*/
609
void
610
acpi_pxm_parse_tables(void)
611
{
612
613
if (srat_physaddr == 0)
614
return;
615
if (parse_srat() < 0)
616
return;
617
init_mem_locality();
618
(void)parse_slit();
619
}
620
621
/*
622
* Use saved data from SRAT/SLIT to update memory locality.
623
*/
624
void
625
acpi_pxm_set_mem_locality(void)
626
{
627
628
if (srat_physaddr == 0)
629
return;
630
vm_phys_register_domains(ndomain, mem_info, vm_locality_table);
631
}
632
633
static void
634
srat_walk_table(acpi_subtable_handler *handler, void *arg)
635
{
636
637
acpi_walk_subtables(srat + 1, (char *)srat + srat->Header.Length,
638
handler, arg);
639
}
640
641
/*
642
* Set up per-CPU domain IDs from information saved in 'cpus' and tear down data
643
* structures allocated by acpi_pxm_init().
644
*/
645
void
646
acpi_pxm_set_cpu_locality(void)
647
{
648
struct cpu_info *cpu;
649
struct pcpu *pc;
650
u_int i;
651
652
if (srat_physaddr == 0)
653
return;
654
for (i = 0; i < MAXCPU; i++) {
655
if (CPU_ABSENT(i))
656
continue;
657
pc = pcpu_find(i);
658
KASSERT(pc != NULL, ("no pcpu data for CPU %u", i));
659
cpu = cpu_get_info(pc);
660
pc->pc_domain = vm_ndomains > 1 ? cpu->domain : 0;
661
CPU_SET(i, &cpuset_domain[pc->pc_domain]);
662
if (bootverbose)
663
printf("SRAT: CPU %u has memory domain %d\n", i,
664
pc->pc_domain);
665
}
666
/* XXXMJ the page is leaked. */
667
pmap_unmapbios(cpus, sizeof(*cpus) * max_cpus);
668
srat_physaddr = 0;
669
cpus = NULL;
670
}
671
672
int
673
acpi_pxm_get_cpu_locality(int apic_id)
674
{
675
struct cpu_info *cpu;
676
677
cpu = cpu_find(apic_id);
678
if (cpu == NULL)
679
panic("SRAT: CPU with ID %u is not known", apic_id);
680
return (cpu->domain);
681
}
682
683
/*
684
* Map a _PXM value to a VM domain ID.
685
*
686
* Returns the domain ID, or -1 if no domain ID was found.
687
*/
688
int
689
acpi_map_pxm_to_vm_domainid(int pxm)
690
{
691
int i;
692
693
for (i = 0; i < ndomain; i++) {
694
if (domain_pxm[i] == pxm)
695
return (vm_ndomains > 1 ? i : 0);
696
}
697
698
return (-1);
699
}
700
701
#else /* MAXMEMDOM == 1 */
702
703
int
704
acpi_map_pxm_to_vm_domainid(int pxm)
705
{
706
707
return (-1);
708
}
709
710
#endif /* MAXMEMDOM > 1 */
711
712