Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/topology.c
26493 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* CPU/APIC topology
4
*
5
* The APIC IDs describe the system topology in multiple domain levels.
6
* The CPUID topology parser provides the information which part of the
7
* APIC ID is associated to the individual levels:
8
*
9
* [PACKAGE][DIEGRP][DIE][TILE][MODULE][CORE][THREAD]
10
*
11
* The root space contains the package (socket) IDs.
12
*
13
* Not enumerated levels consume 0 bits space, but conceptually they are
14
* always represented. If e.g. only CORE and THREAD levels are enumerated
15
* then the DIE, MODULE and TILE have the same physical ID as the PACKAGE.
16
*
17
* If SMT is not supported, then the THREAD domain is still used. It then
18
* has the same physical ID as the CORE domain and is the only child of
19
* the core domain.
20
*
21
* This allows a unified view on the system independent of the enumerated
22
* domain levels without requiring any conditionals in the code.
23
*/
24
#define pr_fmt(fmt) "CPU topo: " fmt
25
#include <linux/cpu.h>
26
27
#include <xen/xen.h>
28
29
#include <asm/apic.h>
30
#include <asm/hypervisor.h>
31
#include <asm/io_apic.h>
32
#include <asm/mpspec.h>
33
#include <asm/msr.h>
34
#include <asm/smp.h>
35
36
#include "cpu.h"
37
38
/*
39
* Map cpu index to physical APIC ID
40
*/
41
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID);
42
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID);
43
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
44
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
45
46
/* Bitmap of physically present CPUs. */
47
DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly;
48
49
/* Used for CPU number allocation and parallel CPU bringup */
50
u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, };
51
52
/* Bitmaps to mark registered APICs at each topology domain */
53
static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init;
54
55
/*
56
* Keep track of assigned, disabled and rejected CPUs. Present assigned
57
* with 1 as CPU #0 is reserved for the boot CPU.
58
*/
59
static struct {
60
unsigned int nr_assigned_cpus;
61
unsigned int nr_disabled_cpus;
62
unsigned int nr_rejected_cpus;
63
u32 boot_cpu_apic_id;
64
u32 real_bsp_apic_id;
65
} topo_info __ro_after_init = {
66
.nr_assigned_cpus = 1,
67
.boot_cpu_apic_id = BAD_APICID,
68
.real_bsp_apic_id = BAD_APICID,
69
};
70
71
#define domain_weight(_dom) bitmap_weight(apic_maps[_dom].map, MAX_LOCAL_APIC)
72
73
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
74
{
75
return phys_id == (u64)cpuid_to_apicid[cpu];
76
}
77
78
#ifdef CONFIG_SMP
79
static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
80
{
81
if (!(apicid & (__max_threads_per_core - 1)))
82
cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
83
}
84
#else
85
static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
86
#endif
87
88
/*
89
* Convert the APIC ID to a domain level ID by masking out the low bits
90
* below the domain level @dom.
91
*/
92
static inline u32 topo_apicid(u32 apicid, enum x86_topology_domains dom)
93
{
94
if (dom == TOPO_SMT_DOMAIN)
95
return apicid;
96
return apicid & (UINT_MAX << x86_topo_system.dom_shifts[dom - 1]);
97
}
98
99
static int topo_lookup_cpuid(u32 apic_id)
100
{
101
int i;
102
103
/* CPU# to APICID mapping is persistent once it is established */
104
for (i = 0; i < topo_info.nr_assigned_cpus; i++) {
105
if (cpuid_to_apicid[i] == apic_id)
106
return i;
107
}
108
return -ENODEV;
109
}
110
111
static __init int topo_get_cpunr(u32 apic_id)
112
{
113
int cpu = topo_lookup_cpuid(apic_id);
114
115
if (cpu >= 0)
116
return cpu;
117
118
return topo_info.nr_assigned_cpus++;
119
}
120
121
static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
122
{
123
#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
124
early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
125
early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
126
#endif
127
set_cpu_present(cpu, true);
128
}
129
130
static __init bool check_for_real_bsp(u32 apic_id)
131
{
132
bool is_bsp = false, has_apic_base = boot_cpu_data.x86 >= 6;
133
u64 msr;
134
135
/*
136
* There is no real good way to detect whether this a kdump()
137
* kernel, but except on the Voyager SMP monstrosity which is not
138
* longer supported, the real BSP APIC ID is the first one which is
139
* enumerated by firmware. That allows to detect whether the boot
140
* CPU is the real BSP. If it is not, then do not register the APIC
141
* because sending INIT to the real BSP would reset the whole
142
* system.
143
*
144
* The first APIC ID which is enumerated by firmware is detectable
145
* because the boot CPU APIC ID is registered before that without
146
* invoking this code.
147
*/
148
if (topo_info.real_bsp_apic_id != BAD_APICID)
149
return false;
150
151
/*
152
* Check whether the enumeration order is broken by evaluating the
153
* BSP bit in the APICBASE MSR. If the CPU does not have the
154
* APICBASE MSR then the BSP detection is not possible and the
155
* kernel must rely on the firmware enumeration order.
156
*/
157
if (has_apic_base) {
158
rdmsrq(MSR_IA32_APICBASE, msr);
159
is_bsp = !!(msr & MSR_IA32_APICBASE_BSP);
160
}
161
162
if (apic_id == topo_info.boot_cpu_apic_id) {
163
/*
164
* If the boot CPU has the APIC BSP bit set then the
165
* firmware enumeration is agreeing. If the CPU does not
166
* have the APICBASE MSR then the only choice is to trust
167
* the enumeration order.
168
*/
169
if (is_bsp || !has_apic_base) {
170
topo_info.real_bsp_apic_id = apic_id;
171
return false;
172
}
173
/*
174
* If the boot APIC is enumerated first, but the APICBASE
175
* MSR does not have the BSP bit set, then there is no way
176
* to discover the real BSP here. Assume a crash kernel and
177
* limit the number of CPUs to 1 as an INIT to the real BSP
178
* would reset the machine.
179
*/
180
pr_warn("Enumerated BSP APIC %x is not marked in APICBASE MSR\n", apic_id);
181
pr_warn("Assuming crash kernel. Limiting to one CPU to prevent machine INIT\n");
182
set_nr_cpu_ids(1);
183
goto fwbug;
184
}
185
186
pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x != %x\n",
187
topo_info.boot_cpu_apic_id, apic_id);
188
189
if (is_bsp) {
190
/*
191
* The boot CPU has the APIC BSP bit set. Use it and complain
192
* about the broken firmware enumeration.
193
*/
194
topo_info.real_bsp_apic_id = topo_info.boot_cpu_apic_id;
195
goto fwbug;
196
}
197
198
pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n");
199
200
topo_info.real_bsp_apic_id = apic_id;
201
return true;
202
203
fwbug:
204
pr_warn(FW_BUG "APIC enumeration order not specification compliant\n");
205
return false;
206
}
207
208
static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level,
209
unsigned long *map)
210
{
211
unsigned int id, end, cnt = 0;
212
213
/* Calculate the exclusive end */
214
end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]);
215
216
/* Unfortunately there is no bitmap_weight_range() */
217
for (id = find_next_bit(map, end, lvlid); id < end; id = find_next_bit(map, end, ++id))
218
cnt++;
219
return cnt;
220
}
221
222
static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
223
{
224
int cpu, dom;
225
226
if (present) {
227
set_bit(apic_id, phys_cpu_present_map);
228
229
/*
230
* Double registration is valid in case of the boot CPU
231
* APIC because that is registered before the enumeration
232
* of the APICs via firmware parsers or VM guest
233
* mechanisms.
234
*/
235
if (apic_id == topo_info.boot_cpu_apic_id)
236
cpu = 0;
237
else
238
cpu = topo_get_cpunr(apic_id);
239
240
cpuid_to_apicid[cpu] = apic_id;
241
topo_set_cpuids(cpu, apic_id, acpi_id);
242
} else {
243
u32 pkgid = topo_apicid(apic_id, TOPO_PKG_DOMAIN);
244
245
/*
246
* Check for present APICs in the same package when running
247
* on bare metal. Allow the bogosity in a guest.
248
*/
249
if (hypervisor_is_type(X86_HYPER_NATIVE) &&
250
topo_unit_count(pkgid, TOPO_PKG_DOMAIN, phys_cpu_present_map)) {
251
pr_info_once("Ignoring hot-pluggable APIC ID %x in present package.\n",
252
apic_id);
253
topo_info.nr_rejected_cpus++;
254
return;
255
}
256
257
topo_info.nr_disabled_cpus++;
258
}
259
260
/*
261
* Register present and possible CPUs in the domain
262
* maps. cpu_possible_map will be updated in
263
* topology_init_possible_cpus() after enumeration is done.
264
*/
265
for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
266
set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
267
}
268
269
/**
270
* topology_register_apic - Register an APIC in early topology maps
271
* @apic_id: The APIC ID to set up
272
* @acpi_id: The ACPI ID associated to the APIC
273
* @present: True if the corresponding CPU is present
274
*/
275
void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present)
276
{
277
if (apic_id >= MAX_LOCAL_APIC) {
278
pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1);
279
topo_info.nr_rejected_cpus++;
280
return;
281
}
282
283
if (check_for_real_bsp(apic_id)) {
284
topo_info.nr_rejected_cpus++;
285
return;
286
}
287
288
/* CPU numbers exhausted? */
289
if (apic_id != topo_info.boot_cpu_apic_id && topo_info.nr_assigned_cpus >= nr_cpu_ids) {
290
pr_warn_once("CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids);
291
topo_info.nr_rejected_cpus++;
292
return;
293
}
294
295
topo_register_apic(apic_id, acpi_id, present);
296
}
297
298
/**
299
* topology_register_boot_apic - Register the boot CPU APIC
300
* @apic_id: The APIC ID to set up
301
*
302
* Separate so CPU #0 can be assigned
303
*/
304
void __init topology_register_boot_apic(u32 apic_id)
305
{
306
WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID);
307
308
topo_info.boot_cpu_apic_id = apic_id;
309
topo_register_apic(apic_id, CPU_ACPIID_INVALID, true);
310
}
311
312
/**
313
* topology_get_logical_id - Retrieve the logical ID at a given topology domain level
314
* @apicid: The APIC ID for which to lookup the logical ID
315
* @at_level: The topology domain level to use
316
*
317
* @apicid must be a full APIC ID, not the normalized variant. It's valid to have
318
* all bits below the domain level specified by @at_level to be clear. So both
319
* real APIC IDs and backshifted normalized APIC IDs work correctly.
320
*
321
* Returns:
322
* - >= 0: The requested logical ID
323
* - -ERANGE: @apicid is out of range
324
* - -ENODEV: @apicid is not registered
325
*/
326
int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level)
327
{
328
/* Remove the bits below @at_level to get the proper level ID of @apicid */
329
unsigned int lvlid = topo_apicid(apicid, at_level);
330
331
if (lvlid >= MAX_LOCAL_APIC)
332
return -ERANGE;
333
if (!test_bit(lvlid, apic_maps[at_level].map))
334
return -ENODEV;
335
/* Get the number of set bits before @lvlid. */
336
return bitmap_weight(apic_maps[at_level].map, lvlid);
337
}
338
EXPORT_SYMBOL_GPL(topology_get_logical_id);
339
340
/**
341
* topology_unit_count - Retrieve the count of specified units at a given topology domain level
342
* @apicid: The APIC ID which specifies the search range
343
* @which_units: The domain level specifying the units to count
344
* @at_level: The domain level at which @which_units have to be counted
345
*
346
* This returns the number of possible units according to the enumerated
347
* information.
348
*
349
* E.g. topology_count_units(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN)
350
* counts the number of possible cores in the package to which @apicid
351
* belongs.
352
*
353
* @at_level must obviously be greater than @which_level to produce useful
354
* results. If @at_level is equal to @which_units the result is
355
* unsurprisingly 1. If @at_level is less than @which_units the results
356
* is by definition undefined and the function returns 0.
357
*/
358
unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units,
359
enum x86_topology_domains at_level)
360
{
361
/* Remove the bits below @at_level to get the proper level ID of @apicid */
362
unsigned int lvlid = topo_apicid(apicid, at_level);
363
364
if (lvlid >= MAX_LOCAL_APIC)
365
return 0;
366
if (!test_bit(lvlid, apic_maps[at_level].map))
367
return 0;
368
if (which_units > at_level)
369
return 0;
370
if (which_units == at_level)
371
return 1;
372
return topo_unit_count(lvlid, at_level, apic_maps[which_units].map);
373
}
374
375
#ifdef CONFIG_ACPI_HOTPLUG_CPU
376
/**
377
* topology_hotplug_apic - Handle a physical hotplugged APIC after boot
378
* @apic_id: The APIC ID to set up
379
* @acpi_id: The ACPI ID associated to the APIC
380
*/
381
int topology_hotplug_apic(u32 apic_id, u32 acpi_id)
382
{
383
int cpu;
384
385
if (apic_id >= MAX_LOCAL_APIC)
386
return -EINVAL;
387
388
/* Reject if the APIC ID was not registered during enumeration. */
389
if (!test_bit(apic_id, apic_maps[TOPO_SMT_DOMAIN].map))
390
return -ENODEV;
391
392
cpu = topo_lookup_cpuid(apic_id);
393
if (cpu < 0)
394
return -ENOSPC;
395
396
set_bit(apic_id, phys_cpu_present_map);
397
topo_set_cpuids(cpu, apic_id, acpi_id);
398
cpu_mark_primary_thread(cpu, apic_id);
399
return cpu;
400
}
401
402
/**
403
* topology_hotunplug_apic - Remove a physical hotplugged APIC after boot
404
* @cpu: The CPU number for which the APIC ID is removed
405
*/
406
void topology_hotunplug_apic(unsigned int cpu)
407
{
408
u32 apic_id = cpuid_to_apicid[cpu];
409
410
if (apic_id == BAD_APICID)
411
return;
412
413
per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
414
clear_bit(apic_id, phys_cpu_present_map);
415
set_cpu_present(cpu, false);
416
}
417
#endif
418
419
#ifdef CONFIG_X86_LOCAL_APIC
420
static unsigned int max_possible_cpus __initdata = NR_CPUS;
421
422
/**
423
* topology_apply_cmdline_limits_early - Apply topology command line limits early
424
*
425
* Ensure that command line limits are in effect before firmware parsing
426
* takes place.
427
*/
428
void __init topology_apply_cmdline_limits_early(void)
429
{
430
unsigned int possible = nr_cpu_ids;
431
432
/* 'maxcpus=0' 'nosmp' 'nolapic' */
433
if (!setup_max_cpus || apic_is_disabled)
434
possible = 1;
435
436
/* 'possible_cpus=N' */
437
possible = min_t(unsigned int, max_possible_cpus, possible);
438
439
if (possible < nr_cpu_ids) {
440
pr_info("Limiting to %u possible CPUs\n", possible);
441
set_nr_cpu_ids(possible);
442
}
443
}
444
445
static __init bool restrict_to_up(void)
446
{
447
if (!smp_found_config)
448
return true;
449
/*
450
* XEN PV is special as it does not advertise the local APIC
451
* properly, but provides a fake topology for it so that the
452
* infrastructure works. So don't apply the restrictions vs. APIC
453
* here.
454
*/
455
if (xen_pv_domain())
456
return false;
457
458
return apic_is_disabled;
459
}
460
461
void __init topology_init_possible_cpus(void)
462
{
463
unsigned int assigned = topo_info.nr_assigned_cpus;
464
unsigned int disabled = topo_info.nr_disabled_cpus;
465
unsigned int cnta, cntb, cpu, allowed = 1;
466
unsigned int total = assigned + disabled;
467
u32 apicid, firstid;
468
469
/*
470
* If there was no APIC registered, then fake one so that the
471
* topology bitmap is populated. That ensures that the code below
472
* is valid and the various query interfaces can be used
473
* unconditionally. This does not affect the actual APIC code in
474
* any way because either the local APIC address has not been
475
* registered or the local APIC was disabled on the command line.
476
*/
477
if (topo_info.boot_cpu_apic_id == BAD_APICID)
478
topology_register_boot_apic(0);
479
480
if (!restrict_to_up()) {
481
if (WARN_ON_ONCE(assigned > nr_cpu_ids)) {
482
disabled += assigned - nr_cpu_ids;
483
assigned = nr_cpu_ids;
484
}
485
allowed = min_t(unsigned int, total, nr_cpu_ids);
486
}
487
488
if (total > allowed)
489
pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed);
490
491
assigned = min_t(unsigned int, allowed, assigned);
492
disabled = allowed - assigned;
493
494
topo_info.nr_assigned_cpus = assigned;
495
topo_info.nr_disabled_cpus = disabled;
496
497
total_cpus = allowed;
498
set_nr_cpu_ids(allowed);
499
500
cnta = domain_weight(TOPO_PKG_DOMAIN);
501
cntb = domain_weight(TOPO_DIE_DOMAIN);
502
__max_logical_packages = cnta;
503
__max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta));
504
505
pr_info("Max. logical packages: %3u\n", cnta);
506
pr_info("Max. logical dies: %3u\n", cntb);
507
pr_info("Max. dies per package: %3u\n", __max_dies_per_package);
508
509
cnta = domain_weight(TOPO_CORE_DOMAIN);
510
cntb = domain_weight(TOPO_SMT_DOMAIN);
511
/*
512
* Can't use order delta here as order(cnta) can be equal
513
* order(cntb) even if cnta != cntb.
514
*/
515
__max_threads_per_core = DIV_ROUND_UP(cntb, cnta);
516
pr_info("Max. threads per core: %3u\n", __max_threads_per_core);
517
518
firstid = find_first_bit(apic_maps[TOPO_SMT_DOMAIN].map, MAX_LOCAL_APIC);
519
__num_cores_per_package = topology_unit_count(firstid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN);
520
pr_info("Num. cores per package: %3u\n", __num_cores_per_package);
521
__num_threads_per_package = topology_unit_count(firstid, TOPO_SMT_DOMAIN, TOPO_PKG_DOMAIN);
522
pr_info("Num. threads per package: %3u\n", __num_threads_per_package);
523
524
pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled);
525
if (topo_info.nr_rejected_cpus)
526
pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus);
527
528
init_cpu_present(cpumask_of(0));
529
init_cpu_possible(cpumask_of(0));
530
531
/* Assign CPU numbers to non-present CPUs */
532
for (apicid = 0; disabled; disabled--, apicid++) {
533
apicid = find_next_andnot_bit(apic_maps[TOPO_SMT_DOMAIN].map, phys_cpu_present_map,
534
MAX_LOCAL_APIC, apicid);
535
if (apicid >= MAX_LOCAL_APIC)
536
break;
537
cpuid_to_apicid[topo_info.nr_assigned_cpus++] = apicid;
538
}
539
540
for (cpu = 0; cpu < allowed; cpu++) {
541
apicid = cpuid_to_apicid[cpu];
542
543
set_cpu_possible(cpu, true);
544
545
if (apicid == BAD_APICID)
546
continue;
547
548
cpu_mark_primary_thread(cpu, apicid);
549
set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map));
550
}
551
}
552
553
/*
554
* Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed.
555
*/
556
void __init topology_reset_possible_cpus_up(void)
557
{
558
init_cpu_present(cpumask_of(0));
559
init_cpu_possible(cpumask_of(0));
560
561
bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC);
562
if (topo_info.boot_cpu_apic_id != BAD_APICID)
563
set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map);
564
}
565
566
static int __init setup_possible_cpus(char *str)
567
{
568
get_option(&str, &max_possible_cpus);
569
return 0;
570
}
571
early_param("possible_cpus", setup_possible_cpus);
572
#endif
573
574