Path: blob/master/arch/powerpc/platforms/pseries/hotplug-cpu.c
26481 views
// SPDX-License-Identifier: GPL-2.0-or-later1/*2* pseries CPU Hotplug infrastructure.3*4* Split out from arch/powerpc/platforms/pseries/setup.c5* arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c6*7* Peter Bergner, IBM March 2001.8* Copyright (C) 2001 IBM.9* Dave Engebretsen, Peter Bergner, and10* Mike Corrigan {engebret|bergner|mikec}@us.ibm.com11* Plus various changes from other IBM teams...12*13* Copyright (C) 2006 Michael Ellerman, IBM Corporation14*/1516#define pr_fmt(fmt) "pseries-hotplug-cpu: " fmt1718#include <linux/kernel.h>19#include <linux/interrupt.h>20#include <linux/delay.h>21#include <linux/sched.h> /* for idle_task_exit */22#include <linux/sched/hotplug.h>23#include <linux/cpu.h>24#include <linux/of.h>25#include <linux/slab.h>26#include <asm/prom.h>27#include <asm/rtas.h>28#include <asm/firmware.h>29#include <asm/machdep.h>30#include <asm/vdso_datapage.h>31#include <asm/xics.h>32#include <asm/xive.h>33#include <asm/plpar_wrappers.h>34#include <asm/topology.h>35#include <asm/systemcfg.h>3637#include "pseries.h"3839/* This version can't take the spinlock, because it never returns */40static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;4142/*43* Record the CPU ids used on each nodes.44* Protected by cpu_add_remove_lock.45*/46static cpumask_var_t node_recorded_ids_map[MAX_NUMNODES];4748static void rtas_stop_self(void)49{50static struct rtas_args args;5152local_irq_disable();5354BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);5556rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL);5758panic("Alas, I survived.\n");59}6061static void pseries_cpu_offline_self(void)62{63unsigned int hwcpu = hard_smp_processor_id();6465local_irq_disable();66idle_task_exit();67if (xive_enabled())68xive_teardown_cpu();69else70xics_teardown_cpu();7172unregister_slb_shadow(hwcpu);73unregister_vpa(hwcpu);74rtas_stop_self();7576/* Should never get here... */77BUG();78for(;;);79}8081static int pseries_cpu_disable(void)82{83int cpu = smp_processor_id();8485set_cpu_online(cpu, false);86#ifdef CONFIG_PPC64_PROC_SYSTEMCFG87systemcfg->processorCount--;88#endif8990/*fix boot_cpuid here*/91if (cpu == boot_cpuid)92boot_cpuid = cpumask_any(cpu_online_mask);9394/* FIXME: abstract this to not be platform specific later on */95if (xive_enabled())96xive_smp_disable_cpu();97else98xics_migrate_irqs_away();99100cleanup_cpu_mmu_context();101102return 0;103}104105/*106* pseries_cpu_die: Wait for the cpu to die.107* @cpu: logical processor id of the CPU whose death we're awaiting.108*109* This function is called from the context of the thread which is performing110* the cpu-offline. Here we wait for long enough to allow the cpu in question111* to self-destroy so that the cpu-offline thread can send the CPU_DEAD112* notifications.113*114* OTOH, pseries_cpu_offline_self() is called by the @cpu when it wants to115* self-destruct.116*/117static void pseries_cpu_die(unsigned int cpu)118{119int cpu_status = 1;120unsigned int pcpu = get_hard_smp_processor_id(cpu);121unsigned long timeout = jiffies + msecs_to_jiffies(120000);122123while (true) {124cpu_status = smp_query_cpu_stopped(pcpu);125if (cpu_status == QCSS_STOPPED ||126cpu_status == QCSS_HARDWARE_ERROR)127break;128129if (time_after(jiffies, timeout)) {130pr_warn("CPU %i (hwid %i) didn't die after 120 seconds\n",131cpu, pcpu);132timeout = jiffies + msecs_to_jiffies(120000);133}134135cond_resched();136}137138if (cpu_status == QCSS_HARDWARE_ERROR) {139pr_warn("CPU %i (hwid %i) reported error while dying\n",140cpu, pcpu);141}142143paca_ptrs[cpu]->cpu_start = 0;144}145146/**147* find_cpu_id_range - found a linear ranger of @nthreads free CPU ids.148* @nthreads : the number of threads (cpu ids)149* @assigned_node : the node it belongs to or NUMA_NO_NODE if free ids from any150* node can be peek.151* @cpu_mask: the returned CPU mask.152*153* Returns 0 on success.154*/155static int find_cpu_id_range(unsigned int nthreads, int assigned_node,156cpumask_var_t *cpu_mask)157{158cpumask_var_t candidate_mask;159unsigned int cpu, node;160int rc = -ENOSPC;161162if (!zalloc_cpumask_var(&candidate_mask, GFP_KERNEL))163return -ENOMEM;164165cpumask_clear(*cpu_mask);166for (cpu = 0; cpu < nthreads; cpu++)167cpumask_set_cpu(cpu, *cpu_mask);168169BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));170171/* Get a bitmap of unoccupied slots. */172cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);173174if (assigned_node != NUMA_NO_NODE) {175/*176* Remove free ids previously assigned on the other nodes. We177* can walk only online nodes because once a node became online178* it is not turned offlined back.179*/180for_each_online_node(node) {181if (node == assigned_node)182continue;183cpumask_andnot(candidate_mask, candidate_mask,184node_recorded_ids_map[node]);185}186}187188if (cpumask_empty(candidate_mask))189goto out;190191while (!cpumask_empty(*cpu_mask)) {192if (cpumask_subset(*cpu_mask, candidate_mask))193/* Found a range where we can insert the new cpu(s) */194break;195cpumask_shift_left(*cpu_mask, *cpu_mask, nthreads);196}197198if (!cpumask_empty(*cpu_mask))199rc = 0;200201out:202free_cpumask_var(candidate_mask);203return rc;204}205206/*207* Update cpu_present_mask and paca(s) for a new cpu node. The wrinkle208* here is that a cpu device node may represent multiple logical cpus209* in the SMT case. We must honor the assumption in other code that210* the logical ids for sibling SMT threads x and y are adjacent, such211* that x^1 == y and y^1 == x.212*/213static int pseries_add_processor(struct device_node *np)214{215int len, nthreads, node, cpu, assigned_node;216int rc = 0;217cpumask_var_t cpu_mask;218const __be32 *intserv;219220intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);221if (!intserv)222return 0;223224nthreads = len / sizeof(u32);225226if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL))227return -ENOMEM;228229/*230* Fetch from the DT nodes read by dlpar_configure_connector() the NUMA231* node id the added CPU belongs to.232*/233node = of_node_to_nid(np);234if (node < 0 || !node_possible(node))235node = first_online_node;236237BUG_ON(node == NUMA_NO_NODE);238assigned_node = node;239240cpu_maps_update_begin();241242rc = find_cpu_id_range(nthreads, node, &cpu_mask);243if (rc && nr_node_ids > 1) {244/*245* Try again, considering the free CPU ids from the other node.246*/247node = NUMA_NO_NODE;248rc = find_cpu_id_range(nthreads, NUMA_NO_NODE, &cpu_mask);249}250251if (rc) {252pr_err("Cannot add cpu %pOF; this system configuration"253" supports %d logical cpus.\n", np, num_possible_cpus());254goto out;255}256257for_each_cpu(cpu, cpu_mask) {258BUG_ON(cpu_present(cpu));259set_cpu_present(cpu, true);260set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));261}262263/* Record the newly used CPU ids for the associate node. */264cpumask_or(node_recorded_ids_map[assigned_node],265node_recorded_ids_map[assigned_node], cpu_mask);266267/*268* If node is set to NUMA_NO_NODE, CPU ids have be reused from269* another node, remove them from its mask.270*/271if (node == NUMA_NO_NODE) {272cpu = cpumask_first(cpu_mask);273pr_warn("Reusing free CPU ids %d-%d from another node\n",274cpu, cpu + nthreads - 1);275for_each_online_node(node) {276if (node == assigned_node)277continue;278cpumask_andnot(node_recorded_ids_map[node],279node_recorded_ids_map[node],280cpu_mask);281}282}283284out:285cpu_maps_update_done();286free_cpumask_var(cpu_mask);287return rc;288}289290/*291* Update the present map for a cpu node which is going away, and set292* the hard id in the paca(s) to -1 to be consistent with boot time293* convention for non-present cpus.294*/295static void pseries_remove_processor(struct device_node *np)296{297unsigned int cpu;298int len, nthreads, i;299const __be32 *intserv;300u32 thread;301302intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);303if (!intserv)304return;305306nthreads = len / sizeof(u32);307308cpu_maps_update_begin();309for (i = 0; i < nthreads; i++) {310thread = be32_to_cpu(intserv[i]);311for_each_present_cpu(cpu) {312if (get_hard_smp_processor_id(cpu) != thread)313continue;314BUG_ON(cpu_online(cpu));315set_cpu_present(cpu, false);316set_hard_smp_processor_id(cpu, -1);317update_numa_cpu_lookup_table(cpu, -1);318break;319}320if (cpu >= nr_cpu_ids)321printk(KERN_WARNING "Could not find cpu to remove "322"with physical id 0x%x\n", thread);323}324cpu_maps_update_done();325}326327static int dlpar_offline_cpu(struct device_node *dn)328{329int rc = 0;330unsigned int cpu;331int len, nthreads, i;332const __be32 *intserv;333u32 thread;334335intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);336if (!intserv)337return -EINVAL;338339nthreads = len / sizeof(u32);340341cpu_maps_update_begin();342for (i = 0; i < nthreads; i++) {343thread = be32_to_cpu(intserv[i]);344for_each_present_cpu(cpu) {345if (get_hard_smp_processor_id(cpu) != thread)346continue;347348if (!cpu_online(cpu))349break;350351/*352* device_offline() will return -EBUSY (via cpu_down()) if there353* is only one CPU left. Check it here to fail earlier and with a354* more informative error message, while also retaining the355* cpu_add_remove_lock to be sure that no CPUs are being356* online/offlined during this check.357*/358if (num_online_cpus() == 1) {359pr_warn("Unable to remove last online CPU %pOFn\n", dn);360rc = -EBUSY;361goto out_unlock;362}363364cpu_maps_update_done();365rc = device_offline(get_cpu_device(cpu));366if (rc)367goto out;368cpu_maps_update_begin();369break;370}371if (cpu == num_possible_cpus()) {372pr_warn("Could not find cpu to offline with physical id 0x%x\n",373thread);374}375}376out_unlock:377cpu_maps_update_done();378379out:380return rc;381}382383static int dlpar_online_cpu(struct device_node *dn)384{385int rc = 0;386unsigned int cpu;387int len, nthreads, i;388const __be32 *intserv;389u32 thread;390391intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);392if (!intserv)393return -EINVAL;394395nthreads = len / sizeof(u32);396397cpu_maps_update_begin();398for (i = 0; i < nthreads; i++) {399thread = be32_to_cpu(intserv[i]);400for_each_present_cpu(cpu) {401if (get_hard_smp_processor_id(cpu) != thread)402continue;403404if (!topology_is_primary_thread(cpu)) {405if (cpu_smt_control != CPU_SMT_ENABLED)406break;407if (!topology_smt_thread_allowed(cpu))408break;409}410411cpu_maps_update_done();412find_and_update_cpu_nid(cpu);413rc = device_online(get_cpu_device(cpu));414if (rc) {415dlpar_offline_cpu(dn);416goto out;417}418cpu_maps_update_begin();419420break;421}422if (cpu == num_possible_cpus())423printk(KERN_WARNING "Could not find cpu to online "424"with physical id 0x%x\n", thread);425}426cpu_maps_update_done();427428out:429return rc;430431}432433static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)434{435struct device_node *child = NULL;436u32 my_drc_index;437bool found;438int rc;439440/* Assume cpu doesn't exist */441found = false;442443for_each_child_of_node(parent, child) {444rc = of_property_read_u32(child, "ibm,my-drc-index",445&my_drc_index);446if (rc)447continue;448449if (my_drc_index == drc_index) {450of_node_put(child);451found = true;452break;453}454}455456return found;457}458459static bool drc_info_valid_index(struct device_node *parent, u32 drc_index)460{461struct property *info;462struct of_drc_info drc;463const __be32 *value;464u32 index;465int count, i, j;466467info = of_find_property(parent, "ibm,drc-info", NULL);468if (!info)469return false;470471value = of_prop_next_u32(info, NULL, &count);472473/* First value of ibm,drc-info is number of drc-info records */474if (value)475value++;476else477return false;478479for (i = 0; i < count; i++) {480if (of_read_drc_info_cell(&info, &value, &drc))481return false;482483if (strncmp(drc.drc_type, "CPU", 3))484break;485486if (drc_index > drc.last_drc_index)487continue;488489index = drc.drc_index_start;490for (j = 0; j < drc.num_sequential_elems; j++) {491if (drc_index == index)492return true;493494index += drc.sequential_inc;495}496}497498return false;499}500501static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)502{503bool found = false;504int rc, index;505506if (of_property_present(parent, "ibm,drc-info"))507return drc_info_valid_index(parent, drc_index);508509/* Note that the format of the ibm,drc-indexes array is510* the number of entries in the array followed by the array511* of drc values so we start looking at index = 1.512*/513index = 1;514while (!found) {515u32 drc;516517rc = of_property_read_u32_index(parent, "ibm,drc-indexes",518index++, &drc);519520if (rc)521break;522523if (drc == drc_index)524found = true;525}526527return found;528}529530static int pseries_cpuhp_attach_nodes(struct device_node *dn)531{532struct of_changeset cs;533int ret;534535/*536* This device node is unattached but may have siblings; open-code the537* traversal.538*/539for (of_changeset_init(&cs); dn != NULL; dn = dn->sibling) {540ret = of_changeset_attach_node(&cs, dn);541if (ret)542goto out;543}544545ret = of_changeset_apply(&cs);546out:547of_changeset_destroy(&cs);548return ret;549}550551static ssize_t dlpar_cpu_add(u32 drc_index)552{553struct device_node *dn, *parent;554int rc, saved_rc;555556pr_debug("Attempting to add CPU, drc index: %x\n", drc_index);557558parent = of_find_node_by_path("/cpus");559if (!parent) {560pr_warn("Failed to find CPU root node \"/cpus\"\n");561return -ENODEV;562}563564if (dlpar_cpu_exists(parent, drc_index)) {565of_node_put(parent);566pr_warn("CPU with drc index %x already exists\n", drc_index);567return -EINVAL;568}569570if (!valid_cpu_drc_index(parent, drc_index)) {571of_node_put(parent);572pr_warn("Cannot find CPU (drc index %x) to add.\n", drc_index);573return -EINVAL;574}575576rc = dlpar_acquire_drc(drc_index);577if (rc) {578pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",579rc, drc_index);580of_node_put(parent);581return -EINVAL;582}583584dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);585if (!dn) {586pr_warn("Failed call to configure-connector, drc index: %x\n",587drc_index);588dlpar_release_drc(drc_index);589of_node_put(parent);590return -EINVAL;591}592593rc = pseries_cpuhp_attach_nodes(dn);594595/* Regardless we are done with parent now */596of_node_put(parent);597598if (rc) {599saved_rc = rc;600pr_warn("Failed to attach node %pOFn, rc: %d, drc index: %x\n",601dn, rc, drc_index);602603rc = dlpar_release_drc(drc_index);604if (!rc)605dlpar_free_cc_nodes(dn);606607return saved_rc;608}609610update_numa_distance(dn);611612rc = dlpar_online_cpu(dn);613if (rc) {614saved_rc = rc;615pr_warn("Failed to online cpu %pOFn, rc: %d, drc index: %x\n",616dn, rc, drc_index);617618rc = dlpar_detach_node(dn);619if (!rc)620dlpar_release_drc(drc_index);621622return saved_rc;623}624625pr_debug("Successfully added CPU %pOFn, drc index: %x\n", dn,626drc_index);627return rc;628}629630static unsigned int pseries_cpuhp_cache_use_count(const struct device_node *cachedn)631{632unsigned int use_count = 0;633struct device_node *dn, *tn;634635WARN_ON(!of_node_is_type(cachedn, "cache"));636637for_each_of_cpu_node(dn) {638tn = of_find_next_cache_node(dn);639of_node_put(tn);640if (tn == cachedn)641use_count++;642}643644for_each_node_by_type(dn, "cache") {645tn = of_find_next_cache_node(dn);646of_node_put(tn);647if (tn == cachedn)648use_count++;649}650651return use_count;652}653654static int pseries_cpuhp_detach_nodes(struct device_node *cpudn)655{656struct device_node *dn;657struct of_changeset cs;658int ret = 0;659660of_changeset_init(&cs);661ret = of_changeset_detach_node(&cs, cpudn);662if (ret)663goto out;664665dn = cpudn;666while ((dn = of_find_next_cache_node(dn))) {667if (pseries_cpuhp_cache_use_count(dn) > 1) {668of_node_put(dn);669break;670}671672ret = of_changeset_detach_node(&cs, dn);673of_node_put(dn);674if (ret)675goto out;676}677678ret = of_changeset_apply(&cs);679out:680of_changeset_destroy(&cs);681return ret;682}683684static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)685{686int rc;687688pr_debug("Attempting to remove CPU %pOFn, drc index: %x\n",689dn, drc_index);690691rc = dlpar_offline_cpu(dn);692if (rc) {693pr_warn("Failed to offline CPU %pOFn, rc: %d\n", dn, rc);694return -EINVAL;695}696697rc = dlpar_release_drc(drc_index);698if (rc) {699pr_warn("Failed to release drc (%x) for CPU %pOFn, rc: %d\n",700drc_index, dn, rc);701dlpar_online_cpu(dn);702return rc;703}704705rc = pseries_cpuhp_detach_nodes(dn);706if (rc) {707int saved_rc = rc;708709pr_warn("Failed to detach CPU %pOFn, rc: %d", dn, rc);710711rc = dlpar_acquire_drc(drc_index);712if (!rc)713dlpar_online_cpu(dn);714715return saved_rc;716}717718pr_debug("Successfully removed CPU, drc index: %x\n", drc_index);719return 0;720}721722static struct device_node *cpu_drc_index_to_dn(u32 drc_index)723{724struct device_node *dn;725u32 my_index;726int rc;727728for_each_node_by_type(dn, "cpu") {729rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);730if (rc)731continue;732733if (my_index == drc_index)734break;735}736737return dn;738}739740static int dlpar_cpu_remove_by_index(u32 drc_index)741{742struct device_node *dn;743int rc;744745dn = cpu_drc_index_to_dn(drc_index);746if (!dn) {747pr_warn("Cannot find CPU (drc index %x) to remove\n",748drc_index);749return -ENODEV;750}751752rc = dlpar_cpu_remove(dn, drc_index);753of_node_put(dn);754return rc;755}756757int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)758{759u32 drc_index;760int rc;761762drc_index = be32_to_cpu(hp_elog->_drc_u.drc_index);763764lock_device_hotplug();765766switch (hp_elog->action) {767case PSERIES_HP_ELOG_ACTION_REMOVE:768if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {769rc = dlpar_cpu_remove_by_index(drc_index);770/*771* Setting the isolation state of an UNISOLATED/CONFIGURED772* device to UNISOLATE is a no-op, but the hypervisor can773* use it as a hint that the CPU removal failed.774*/775if (rc)776dlpar_unisolate_drc(drc_index);777}778else779rc = -EINVAL;780break;781case PSERIES_HP_ELOG_ACTION_ADD:782if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)783rc = dlpar_cpu_add(drc_index);784else785rc = -EINVAL;786break;787default:788pr_err("Invalid action (%d) specified\n", hp_elog->action);789rc = -EINVAL;790break;791}792793unlock_device_hotplug();794return rc;795}796797#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE798799static ssize_t dlpar_cpu_probe(const char *buf, size_t count)800{801u32 drc_index;802int rc;803804rc = kstrtou32(buf, 0, &drc_index);805if (rc)806return -EINVAL;807808rc = dlpar_cpu_add(drc_index);809810return rc ? rc : count;811}812813static ssize_t dlpar_cpu_release(const char *buf, size_t count)814{815struct device_node *dn;816u32 drc_index;817int rc;818819dn = of_find_node_by_path(buf);820if (!dn)821return -EINVAL;822823rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);824if (rc) {825of_node_put(dn);826return -EINVAL;827}828829rc = dlpar_cpu_remove(dn, drc_index);830of_node_put(dn);831832return rc ? rc : count;833}834835#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */836837static int pseries_smp_notifier(struct notifier_block *nb,838unsigned long action, void *data)839{840struct of_reconfig_data *rd = data;841int err = 0;842843switch (action) {844case OF_RECONFIG_ATTACH_NODE:845err = pseries_add_processor(rd->dn);846break;847case OF_RECONFIG_DETACH_NODE:848pseries_remove_processor(rd->dn);849break;850}851return notifier_from_errno(err);852}853854static struct notifier_block pseries_smp_nb = {855.notifier_call = pseries_smp_notifier,856};857858void __init pseries_cpu_hotplug_init(void)859{860int qcss_tok;861862rtas_stop_self_token = rtas_function_token(RTAS_FN_STOP_SELF);863qcss_tok = rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE);864865if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE ||866qcss_tok == RTAS_UNKNOWN_SERVICE) {867printk(KERN_INFO "CPU Hotplug not supported by firmware "868"- disabling.\n");869return;870}871872smp_ops->cpu_offline_self = pseries_cpu_offline_self;873smp_ops->cpu_disable = pseries_cpu_disable;874smp_ops->cpu_die = pseries_cpu_die;875}876877static int __init pseries_dlpar_init(void)878{879unsigned int node;880881#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE882ppc_md.cpu_probe = dlpar_cpu_probe;883ppc_md.cpu_release = dlpar_cpu_release;884#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */885886/* Processors can be added/removed only on LPAR */887if (firmware_has_feature(FW_FEATURE_LPAR)) {888for_each_node(node) {889if (!alloc_cpumask_var_node(&node_recorded_ids_map[node],890GFP_KERNEL, node))891return -ENOMEM;892893/* Record ids of CPU added at boot time */894cpumask_copy(node_recorded_ids_map[node],895cpumask_of_node(node));896}897898of_reconfig_notifier_register(&pseries_smp_nb);899}900901return 0;902}903machine_arch_initcall(pseries, pseries_dlpar_init);904905906