Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/topology.c
50377 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* CPU/APIC topology
4
*
5
* The APIC IDs describe the system topology in multiple domain levels.
6
* The CPUID topology parser provides the information which part of the
7
* APIC ID is associated to the individual levels:
8
*
9
* [PACKAGE][DIEGRP][DIE][TILE][MODULE][CORE][THREAD]
10
*
11
* The root space contains the package (socket) IDs.
12
*
13
* Not enumerated levels consume 0 bits space, but conceptually they are
14
* always represented. If e.g. only CORE and THREAD levels are enumerated
15
* then the DIE, MODULE and TILE have the same physical ID as the PACKAGE.
16
*
17
* If SMT is not supported, then the THREAD domain is still used. It then
18
* has the same physical ID as the CORE domain and is the only child of
19
* the core domain.
20
*
21
* This allows a unified view on the system independent of the enumerated
22
* domain levels without requiring any conditionals in the code.
23
*/
24
#define pr_fmt(fmt) "CPU topo: " fmt
25
#include <linux/cpu.h>
26
27
#include <xen/xen.h>
28
29
#include <asm/apic.h>
30
#include <asm/io_apic.h>
31
#include <asm/mpspec.h>
32
#include <asm/msr.h>
33
#include <asm/smp.h>
34
35
#include "cpu.h"
36
37
/*
38
* Map cpu index to physical APIC ID
39
*/
40
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID);
41
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID);
42
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
43
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
44
45
/* Bitmap of physically present CPUs. */
46
DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly;
47
48
/* Used for CPU number allocation and parallel CPU bringup */
49
u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, };
50
51
/* Bitmaps to mark registered APICs at each topology domain */
52
static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init;
53
54
/*
55
* Keep track of assigned, disabled and rejected CPUs. Present assigned
56
* with 1 as CPU #0 is reserved for the boot CPU.
57
*/
58
static struct {
59
unsigned int nr_assigned_cpus;
60
unsigned int nr_disabled_cpus;
61
unsigned int nr_rejected_cpus;
62
u32 boot_cpu_apic_id;
63
u32 real_bsp_apic_id;
64
} topo_info __ro_after_init = {
65
.nr_assigned_cpus = 1,
66
.boot_cpu_apic_id = BAD_APICID,
67
.real_bsp_apic_id = BAD_APICID,
68
};
69
70
#define domain_weight(_dom) bitmap_weight(apic_maps[_dom].map, MAX_LOCAL_APIC)
71
72
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
73
{
74
return phys_id == (u64)cpuid_to_apicid[cpu];
75
}
76
77
static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
78
{
79
if (!(apicid & (__max_threads_per_core - 1)))
80
cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
81
}
82
83
/*
84
* Convert the APIC ID to a domain level ID by masking out the low bits
85
* below the domain level @dom.
86
*/
87
static inline u32 topo_apicid(u32 apicid, enum x86_topology_domains dom)
88
{
89
if (dom == TOPO_SMT_DOMAIN)
90
return apicid;
91
return apicid & (UINT_MAX << x86_topo_system.dom_shifts[dom - 1]);
92
}
93
94
static int topo_lookup_cpuid(u32 apic_id)
95
{
96
int i;
97
98
/* CPU# to APICID mapping is persistent once it is established */
99
for (i = 0; i < topo_info.nr_assigned_cpus; i++) {
100
if (cpuid_to_apicid[i] == apic_id)
101
return i;
102
}
103
return -ENODEV;
104
}
105
106
static __init int topo_get_cpunr(u32 apic_id)
107
{
108
int cpu = topo_lookup_cpuid(apic_id);
109
110
if (cpu >= 0)
111
return cpu;
112
113
return topo_info.nr_assigned_cpus++;
114
}
115
116
static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
117
{
118
#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
119
early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
120
early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
121
#endif
122
set_cpu_present(cpu, true);
123
}
124
125
static __init bool check_for_real_bsp(u32 apic_id)
126
{
127
bool is_bsp = false, has_apic_base = boot_cpu_data.x86 >= 6;
128
u64 msr;
129
130
/*
131
* There is no real good way to detect whether this a kdump()
132
* kernel, but except on the Voyager SMP monstrosity which is not
133
* longer supported, the real BSP APIC ID is the first one which is
134
* enumerated by firmware. That allows to detect whether the boot
135
* CPU is the real BSP. If it is not, then do not register the APIC
136
* because sending INIT to the real BSP would reset the whole
137
* system.
138
*
139
* The first APIC ID which is enumerated by firmware is detectable
140
* because the boot CPU APIC ID is registered before that without
141
* invoking this code.
142
*/
143
if (topo_info.real_bsp_apic_id != BAD_APICID)
144
return false;
145
146
/*
147
* Check whether the enumeration order is broken by evaluating the
148
* BSP bit in the APICBASE MSR. If the CPU does not have the
149
* APICBASE MSR then the BSP detection is not possible and the
150
* kernel must rely on the firmware enumeration order.
151
*/
152
if (has_apic_base) {
153
rdmsrq(MSR_IA32_APICBASE, msr);
154
is_bsp = !!(msr & MSR_IA32_APICBASE_BSP);
155
}
156
157
if (apic_id == topo_info.boot_cpu_apic_id) {
158
/*
159
* If the boot CPU has the APIC BSP bit set then the
160
* firmware enumeration is agreeing. If the CPU does not
161
* have the APICBASE MSR then the only choice is to trust
162
* the enumeration order.
163
*/
164
if (is_bsp || !has_apic_base) {
165
topo_info.real_bsp_apic_id = apic_id;
166
return false;
167
}
168
/*
169
* If the boot APIC is enumerated first, but the APICBASE
170
* MSR does not have the BSP bit set, then there is no way
171
* to discover the real BSP here. Assume a crash kernel and
172
* limit the number of CPUs to 1 as an INIT to the real BSP
173
* would reset the machine.
174
*/
175
pr_warn("Enumerated BSP APIC %x is not marked in APICBASE MSR\n", apic_id);
176
pr_warn("Assuming crash kernel. Limiting to one CPU to prevent machine INIT\n");
177
set_nr_cpu_ids(1);
178
goto fwbug;
179
}
180
181
pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x != %x\n",
182
topo_info.boot_cpu_apic_id, apic_id);
183
184
if (is_bsp) {
185
/*
186
* The boot CPU has the APIC BSP bit set. Use it and complain
187
* about the broken firmware enumeration.
188
*/
189
topo_info.real_bsp_apic_id = topo_info.boot_cpu_apic_id;
190
goto fwbug;
191
}
192
193
pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n");
194
195
topo_info.real_bsp_apic_id = apic_id;
196
return true;
197
198
fwbug:
199
pr_warn(FW_BUG "APIC enumeration order not specification compliant\n");
200
return false;
201
}
202
203
static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level,
204
unsigned long *map)
205
{
206
unsigned int id, end, cnt = 0;
207
208
/* Calculate the exclusive end */
209
end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]);
210
211
/* Unfortunately there is no bitmap_weight_range() */
212
for (id = find_next_bit(map, end, lvlid); id < end; id = find_next_bit(map, end, ++id))
213
cnt++;
214
return cnt;
215
}
216
217
static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
218
{
219
int cpu, dom;
220
221
if (present) {
222
set_bit(apic_id, phys_cpu_present_map);
223
224
/*
225
* Double registration is valid in case of the boot CPU
226
* APIC because that is registered before the enumeration
227
* of the APICs via firmware parsers or VM guest
228
* mechanisms.
229
*/
230
if (apic_id == topo_info.boot_cpu_apic_id)
231
cpu = 0;
232
else
233
cpu = topo_get_cpunr(apic_id);
234
235
cpuid_to_apicid[cpu] = apic_id;
236
topo_set_cpuids(cpu, apic_id, acpi_id);
237
} else {
238
topo_info.nr_disabled_cpus++;
239
}
240
241
/*
242
* Register present and possible CPUs in the domain
243
* maps. cpu_possible_map will be updated in
244
* topology_init_possible_cpus() after enumeration is done.
245
*/
246
for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
247
set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
248
}
249
250
/**
251
* topology_register_apic - Register an APIC in early topology maps
252
* @apic_id: The APIC ID to set up
253
* @acpi_id: The ACPI ID associated to the APIC
254
* @present: True if the corresponding CPU is present
255
*/
256
void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present)
257
{
258
if (apic_id >= MAX_LOCAL_APIC) {
259
pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1);
260
topo_info.nr_rejected_cpus++;
261
return;
262
}
263
264
if (check_for_real_bsp(apic_id)) {
265
topo_info.nr_rejected_cpus++;
266
return;
267
}
268
269
/* CPU numbers exhausted? */
270
if (apic_id != topo_info.boot_cpu_apic_id && topo_info.nr_assigned_cpus >= nr_cpu_ids) {
271
pr_warn_once("CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids);
272
topo_info.nr_rejected_cpus++;
273
return;
274
}
275
276
topo_register_apic(apic_id, acpi_id, present);
277
}
278
279
/**
280
* topology_register_boot_apic - Register the boot CPU APIC
281
* @apic_id: The APIC ID to set up
282
*
283
* Separate so CPU #0 can be assigned
284
*/
285
void __init topology_register_boot_apic(u32 apic_id)
286
{
287
WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID);
288
289
topo_info.boot_cpu_apic_id = apic_id;
290
topo_register_apic(apic_id, CPU_ACPIID_INVALID, true);
291
}
292
293
/**
294
* topology_get_logical_id - Retrieve the logical ID at a given topology domain level
295
* @apicid: The APIC ID for which to lookup the logical ID
296
* @at_level: The topology domain level to use
297
*
298
* @apicid must be a full APIC ID, not the normalized variant. It's valid to have
299
* all bits below the domain level specified by @at_level to be clear. So both
300
* real APIC IDs and backshifted normalized APIC IDs work correctly.
301
*
302
* Returns:
303
* - >= 0: The requested logical ID
304
* - -ERANGE: @apicid is out of range
305
* - -ENODEV: @apicid is not registered
306
*/
307
int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level)
308
{
309
/* Remove the bits below @at_level to get the proper level ID of @apicid */
310
unsigned int lvlid = topo_apicid(apicid, at_level);
311
312
if (lvlid >= MAX_LOCAL_APIC)
313
return -ERANGE;
314
if (!test_bit(lvlid, apic_maps[at_level].map))
315
return -ENODEV;
316
/* Get the number of set bits before @lvlid. */
317
return bitmap_weight(apic_maps[at_level].map, lvlid);
318
}
319
EXPORT_SYMBOL_GPL(topology_get_logical_id);
320
321
/**
322
* topology_unit_count - Retrieve the count of specified units at a given topology domain level
323
* @apicid: The APIC ID which specifies the search range
324
* @which_units: The domain level specifying the units to count
325
* @at_level: The domain level at which @which_units have to be counted
326
*
327
* This returns the number of possible units according to the enumerated
328
* information.
329
*
330
* E.g. topology_count_units(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN)
331
* counts the number of possible cores in the package to which @apicid
332
* belongs.
333
*
334
* @at_level must obviously be greater than @which_level to produce useful
335
* results. If @at_level is equal to @which_units the result is
336
* unsurprisingly 1. If @at_level is less than @which_units the results
337
* is by definition undefined and the function returns 0.
338
*/
339
unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units,
340
enum x86_topology_domains at_level)
341
{
342
/* Remove the bits below @at_level to get the proper level ID of @apicid */
343
unsigned int lvlid = topo_apicid(apicid, at_level);
344
345
if (lvlid >= MAX_LOCAL_APIC)
346
return 0;
347
if (!test_bit(lvlid, apic_maps[at_level].map))
348
return 0;
349
if (which_units > at_level)
350
return 0;
351
if (which_units == at_level)
352
return 1;
353
return topo_unit_count(lvlid, at_level, apic_maps[which_units].map);
354
}
355
356
#ifdef CONFIG_SMP
357
int topology_get_primary_thread(unsigned int cpu)
358
{
359
u32 apic_id = cpuid_to_apicid[cpu];
360
361
/*
362
* Get the core domain level APIC id, which is the primary thread
363
* and return the CPU number assigned to it.
364
*/
365
return topo_lookup_cpuid(topo_apicid(apic_id, TOPO_CORE_DOMAIN));
366
}
367
#endif
368
369
#ifdef CONFIG_ACPI_HOTPLUG_CPU
370
/**
371
* topology_hotplug_apic - Handle a physical hotplugged APIC after boot
372
* @apic_id: The APIC ID to set up
373
* @acpi_id: The ACPI ID associated to the APIC
374
*/
375
int topology_hotplug_apic(u32 apic_id, u32 acpi_id)
376
{
377
int cpu;
378
379
if (apic_id >= MAX_LOCAL_APIC)
380
return -EINVAL;
381
382
/* Reject if the APIC ID was not registered during enumeration. */
383
if (!test_bit(apic_id, apic_maps[TOPO_SMT_DOMAIN].map))
384
return -ENODEV;
385
386
cpu = topo_lookup_cpuid(apic_id);
387
if (cpu < 0)
388
return -ENOSPC;
389
390
set_bit(apic_id, phys_cpu_present_map);
391
topo_set_cpuids(cpu, apic_id, acpi_id);
392
cpu_mark_primary_thread(cpu, apic_id);
393
return cpu;
394
}
395
396
/**
397
* topology_hotunplug_apic - Remove a physical hotplugged APIC after boot
398
* @cpu: The CPU number for which the APIC ID is removed
399
*/
400
void topology_hotunplug_apic(unsigned int cpu)
401
{
402
u32 apic_id = cpuid_to_apicid[cpu];
403
404
if (apic_id == BAD_APICID)
405
return;
406
407
per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
408
clear_bit(apic_id, phys_cpu_present_map);
409
set_cpu_present(cpu, false);
410
}
411
#endif
412
413
#ifdef CONFIG_X86_LOCAL_APIC
414
static unsigned int max_possible_cpus __initdata = NR_CPUS;
415
416
/**
417
* topology_apply_cmdline_limits_early - Apply topology command line limits early
418
*
419
* Ensure that command line limits are in effect before firmware parsing
420
* takes place.
421
*/
422
void __init topology_apply_cmdline_limits_early(void)
423
{
424
unsigned int possible = nr_cpu_ids;
425
426
/* 'maxcpus=0' 'nosmp' 'nolapic' */
427
if (!setup_max_cpus || apic_is_disabled)
428
possible = 1;
429
430
/* 'possible_cpus=N' */
431
possible = min_t(unsigned int, max_possible_cpus, possible);
432
433
if (possible < nr_cpu_ids) {
434
pr_info("Limiting to %u possible CPUs\n", possible);
435
set_nr_cpu_ids(possible);
436
}
437
}
438
439
static __init bool restrict_to_up(void)
440
{
441
if (!smp_found_config)
442
return true;
443
/*
444
* XEN PV is special as it does not advertise the local APIC
445
* properly, but provides a fake topology for it so that the
446
* infrastructure works. So don't apply the restrictions vs. APIC
447
* here.
448
*/
449
if (xen_pv_domain())
450
return false;
451
452
return apic_is_disabled;
453
}
454
455
void __init topology_init_possible_cpus(void)
456
{
457
unsigned int assigned = topo_info.nr_assigned_cpus;
458
unsigned int disabled = topo_info.nr_disabled_cpus;
459
unsigned int cnta, cntb, cpu, allowed = 1;
460
unsigned int total = assigned + disabled;
461
u32 apicid, firstid;
462
463
/*
464
* If there was no APIC registered, then fake one so that the
465
* topology bitmap is populated. That ensures that the code below
466
* is valid and the various query interfaces can be used
467
* unconditionally. This does not affect the actual APIC code in
468
* any way because either the local APIC address has not been
469
* registered or the local APIC was disabled on the command line.
470
*/
471
if (topo_info.boot_cpu_apic_id == BAD_APICID)
472
topology_register_boot_apic(0);
473
474
if (!restrict_to_up()) {
475
if (WARN_ON_ONCE(assigned > nr_cpu_ids)) {
476
disabled += assigned - nr_cpu_ids;
477
assigned = nr_cpu_ids;
478
}
479
allowed = min_t(unsigned int, total, nr_cpu_ids);
480
}
481
482
if (total > allowed)
483
pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed);
484
485
assigned = min_t(unsigned int, allowed, assigned);
486
disabled = allowed - assigned;
487
488
topo_info.nr_assigned_cpus = assigned;
489
topo_info.nr_disabled_cpus = disabled;
490
491
total_cpus = allowed;
492
set_nr_cpu_ids(allowed);
493
494
cnta = domain_weight(TOPO_PKG_DOMAIN);
495
cntb = domain_weight(TOPO_DIE_DOMAIN);
496
__max_logical_packages = cnta;
497
__max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta));
498
499
pr_info("Max. logical packages: %3u\n", cnta);
500
pr_info("Max. logical dies: %3u\n", cntb);
501
pr_info("Max. dies per package: %3u\n", __max_dies_per_package);
502
503
cnta = domain_weight(TOPO_CORE_DOMAIN);
504
cntb = domain_weight(TOPO_SMT_DOMAIN);
505
/*
506
* Can't use order delta here as order(cnta) can be equal
507
* order(cntb) even if cnta != cntb.
508
*/
509
__max_threads_per_core = DIV_ROUND_UP(cntb, cnta);
510
pr_info("Max. threads per core: %3u\n", __max_threads_per_core);
511
512
firstid = find_first_bit(apic_maps[TOPO_SMT_DOMAIN].map, MAX_LOCAL_APIC);
513
__num_cores_per_package = topology_unit_count(firstid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN);
514
pr_info("Num. cores per package: %3u\n", __num_cores_per_package);
515
__num_threads_per_package = topology_unit_count(firstid, TOPO_SMT_DOMAIN, TOPO_PKG_DOMAIN);
516
pr_info("Num. threads per package: %3u\n", __num_threads_per_package);
517
518
pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled);
519
if (topo_info.nr_rejected_cpus)
520
pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus);
521
522
init_cpu_present(cpumask_of(0));
523
init_cpu_possible(cpumask_of(0));
524
525
/* Assign CPU numbers to non-present CPUs */
526
for (apicid = 0; disabled; disabled--, apicid++) {
527
apicid = find_next_andnot_bit(apic_maps[TOPO_SMT_DOMAIN].map, phys_cpu_present_map,
528
MAX_LOCAL_APIC, apicid);
529
if (apicid >= MAX_LOCAL_APIC)
530
break;
531
cpuid_to_apicid[topo_info.nr_assigned_cpus++] = apicid;
532
}
533
534
for (cpu = 0; cpu < allowed; cpu++) {
535
apicid = cpuid_to_apicid[cpu];
536
537
set_cpu_possible(cpu, true);
538
539
if (apicid == BAD_APICID)
540
continue;
541
542
cpu_mark_primary_thread(cpu, apicid);
543
set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map));
544
}
545
}
546
547
/*
548
* Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed.
549
*/
550
void __init topology_reset_possible_cpus_up(void)
551
{
552
init_cpu_present(cpumask_of(0));
553
init_cpu_possible(cpumask_of(0));
554
555
bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC);
556
if (topo_info.boot_cpu_apic_id != BAD_APICID)
557
set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map);
558
}
559
560
static int __init setup_possible_cpus(char *str)
561
{
562
get_option(&str, &max_possible_cpus);
563
return 0;
564
}
565
early_param("possible_cpus", setup_possible_cpus);
566
#endif
567
568