Path: blob/master/arch/x86/kernel/apic/x2apic_cluster.c
26481 views
// SPDX-License-Identifier: GPL-2.012#include <linux/cpuhotplug.h>3#include <linux/cpumask.h>4#include <linux/slab.h>5#include <linux/mm.h>67#include <asm/apic.h>89#include "local.h"1011#define apic_cluster(apicid) ((apicid) >> 4)1213/*14* __x2apic_send_IPI_mask() possibly needs to read15* x86_cpu_to_logical_apicid for all online cpus in a sequential way.16* Using per cpu variable would cost one cache line per cpu.17*/18static u32 *x86_cpu_to_logical_apicid __read_mostly;1920static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);21static DEFINE_PER_CPU_READ_MOSTLY(struct cpumask *, cluster_masks);2223static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)24{25return x2apic_enabled();26}2728static void x2apic_send_IPI(int cpu, int vector)29{30u32 dest = x86_cpu_to_logical_apicid[cpu];3132/* x2apic MSRs are special and need a special fence: */33weak_wrmsr_fence();34__x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);35}3637static void38__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)39{40unsigned int cpu, clustercpu;41struct cpumask *tmpmsk;42unsigned long flags;43u32 dest;4445/* x2apic MSRs are special and need a special fence: */46weak_wrmsr_fence();47local_irq_save(flags);4849tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);50cpumask_copy(tmpmsk, mask);51/* If IPI should not be sent to self, clear current CPU */52if (apic_dest != APIC_DEST_ALLINC)53__cpumask_clear_cpu(smp_processor_id(), tmpmsk);5455/* Collapse cpus in a cluster so a single IPI per cluster is sent */56for_each_cpu(cpu, tmpmsk) {57struct cpumask *cmsk = per_cpu(cluster_masks, cpu);5859dest = 0;60for_each_cpu_and(clustercpu, tmpmsk, cmsk)61dest |= x86_cpu_to_logical_apicid[clustercpu];6263if (!dest)64continue;6566__x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);67/* Remove cluster CPUs from tmpmask */68cpumask_andnot(tmpmsk, tmpmsk, cmsk);69}7071local_irq_restore(flags);72}7374static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)75{76__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLINC);77}7879static void80x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)81{82__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);83}8485static u32 x2apic_calc_apicid(unsigned int cpu)86{87return x86_cpu_to_logical_apicid[cpu];88}8990static void init_x2apic_ldr(void)91{92struct cpumask *cmsk = this_cpu_read(cluster_masks);9394BUG_ON(!cmsk);9596cpumask_set_cpu(smp_processor_id(), cmsk);97}9899/*100* As an optimisation during boot, set the cluster_mask for all present101* CPUs at once, to prevent each of them having to iterate over the others102* to find the existing cluster_mask.103*/104static void prefill_clustermask(struct cpumask *cmsk, unsigned int cpu, u32 cluster)105{106int cpu_i;107108for_each_present_cpu(cpu_i) {109struct cpumask **cpu_cmsk = &per_cpu(cluster_masks, cpu_i);110u32 apicid = apic->cpu_present_to_apicid(cpu_i);111112if (apicid == BAD_APICID || cpu_i == cpu || apic_cluster(apicid) != cluster)113continue;114115if (WARN_ON_ONCE(*cpu_cmsk == cmsk))116continue;117118BUG_ON(*cpu_cmsk);119*cpu_cmsk = cmsk;120}121}122123static int alloc_clustermask(unsigned int cpu, u32 cluster, int node)124{125struct cpumask *cmsk = NULL;126unsigned int cpu_i;127128/*129* At boot time, the CPU present mask is stable. The cluster mask is130* allocated for the first CPU in the cluster and propagated to all131* present siblings in the cluster. If the cluster mask is already set132* on entry to this function for a given CPU, there is nothing to do.133*/134if (per_cpu(cluster_masks, cpu))135return 0;136137if (system_state < SYSTEM_RUNNING)138goto alloc;139140/*141* On post boot hotplug for a CPU which was not present at boot time,142* iterate over all possible CPUs (even those which are not present143* any more) to find any existing cluster mask.144*/145for_each_possible_cpu(cpu_i) {146u32 apicid = apic->cpu_present_to_apicid(cpu_i);147148if (apicid != BAD_APICID && apic_cluster(apicid) == cluster) {149cmsk = per_cpu(cluster_masks, cpu_i);150/*151* If the cluster is already initialized, just store152* the mask and return. There's no need to propagate.153*/154if (cmsk) {155per_cpu(cluster_masks, cpu) = cmsk;156return 0;157}158}159}160/*161* No CPU in the cluster has ever been initialized, so fall through to162* the boot time code which will also populate the cluster mask for any163* other CPU in the cluster which is (now) present.164*/165alloc:166cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node);167if (!cmsk)168return -ENOMEM;169per_cpu(cluster_masks, cpu) = cmsk;170prefill_clustermask(cmsk, cpu, cluster);171172return 0;173}174175static int x2apic_prepare_cpu(unsigned int cpu)176{177u32 phys_apicid = apic->cpu_present_to_apicid(cpu);178u32 cluster = apic_cluster(phys_apicid);179u32 logical_apicid = (cluster << 16) | (1 << (phys_apicid & 0xf));180int node = cpu_to_node(cpu);181182x86_cpu_to_logical_apicid[cpu] = logical_apicid;183184if (alloc_clustermask(cpu, cluster, node) < 0)185return -ENOMEM;186187if (!zalloc_cpumask_var_node(&per_cpu(ipi_mask, cpu), GFP_KERNEL, node))188return -ENOMEM;189190return 0;191}192193static int x2apic_dead_cpu(unsigned int dead_cpu)194{195struct cpumask *cmsk = per_cpu(cluster_masks, dead_cpu);196197if (cmsk)198cpumask_clear_cpu(dead_cpu, cmsk);199free_cpumask_var(per_cpu(ipi_mask, dead_cpu));200return 0;201}202203static int x2apic_cluster_probe(void)204{205u32 slots;206207if (!x2apic_mode)208return 0;209210slots = max_t(u32, L1_CACHE_BYTES/sizeof(u32), nr_cpu_ids);211x86_cpu_to_logical_apicid = kcalloc(slots, sizeof(u32), GFP_KERNEL);212if (!x86_cpu_to_logical_apicid)213return 0;214215if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",216x2apic_prepare_cpu, x2apic_dead_cpu) < 0) {217pr_err("Failed to register X2APIC_PREPARE\n");218kfree(x86_cpu_to_logical_apicid);219x86_cpu_to_logical_apicid = NULL;220return 0;221}222init_x2apic_ldr();223return 1;224}225226static struct apic apic_x2apic_cluster __ro_after_init = {227228.name = "cluster x2apic",229.probe = x2apic_cluster_probe,230.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,231232.dest_mode_logical = true,233234.disable_esr = 0,235236.init_apic_ldr = init_x2apic_ldr,237.cpu_present_to_apicid = default_cpu_present_to_apicid,238239.max_apic_id = UINT_MAX,240.x2apic_set_max_apicid = true,241.get_apic_id = x2apic_get_apic_id,242243.calc_dest_apicid = x2apic_calc_apicid,244245.send_IPI = x2apic_send_IPI,246.send_IPI_mask = x2apic_send_IPI_mask,247.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,248.send_IPI_allbutself = x2apic_send_IPI_allbutself,249.send_IPI_all = x2apic_send_IPI_all,250.send_IPI_self = x2apic_send_IPI_self,251.nmi_to_offline_cpu = true,252253.read = native_apic_msr_read,254.write = native_apic_msr_write,255.eoi = native_apic_msr_eoi,256.icr_read = native_x2apic_icr_read,257.icr_write = native_x2apic_icr_write,258};259260apic_driver(apic_x2apic_cluster);261262263