// SPDX-License-Identifier: GPL-2.0-or-later1/*2*3* Copyright (c) 2005 Linas Vepstas <[email protected]>4*/56#include <linux/delay.h>7#include <linux/list.h>8#include <linux/sched.h>9#include <linux/semaphore.h>10#include <linux/pci.h>11#include <linux/slab.h>12#include <linux/kthread.h>13#include <asm/eeh_event.h>14#include <asm/ppc-pci.h>1516/** Overview:17* EEH error states may be detected within exception handlers;18* however, the recovery processing needs to occur asynchronously19* in a normal kernel context and not an interrupt context.20* This pair of routines creates an event and queues it onto a21* work-queue, where a worker thread can drive recovery.22*/2324static DEFINE_SPINLOCK(eeh_eventlist_lock);25static DECLARE_COMPLETION(eeh_eventlist_event);26static LIST_HEAD(eeh_eventlist);2728/**29* eeh_event_handler - Dispatch EEH events.30* @dummy - unused31*32* The detection of a frozen slot can occur inside an interrupt,33* where it can be hard to do anything about it. The goal of this34* routine is to pull these detection events out of the context35* of the interrupt handler, and re-dispatch them for processing36* at a later time in a normal context.37*/38static int eeh_event_handler(void * dummy)39{40unsigned long flags;41struct eeh_event *event;4243while (!kthread_should_stop()) {44if (wait_for_completion_interruptible(&eeh_eventlist_event))45break;4647/* Fetch EEH event from the queue */48spin_lock_irqsave(&eeh_eventlist_lock, flags);49event = NULL;50if (!list_empty(&eeh_eventlist)) {51event = list_entry(eeh_eventlist.next,52struct eeh_event, list);53list_del(&event->list);54}55spin_unlock_irqrestore(&eeh_eventlist_lock, flags);56if (!event)57continue;5859/* We might have event without binding PE */60if (event->pe)61eeh_handle_normal_event(event->pe);62else63eeh_handle_special_event();6465kfree(event);66}6768return 0;69}7071/**72* eeh_event_init - Start kernel thread to handle EEH events73*74* This routine is called to start the kernel thread for processing75* EEH event.76*/77int eeh_event_init(void)78{79struct task_struct *t;80int ret = 0;8182t = kthread_run(eeh_event_handler, NULL, "eehd");83if (IS_ERR(t)) {84ret = PTR_ERR(t);85pr_err("%s: Failed to start EEH daemon (%d)\n",86__func__, ret);87return ret;88}8990return 0;91}9293/**94* eeh_send_failure_event - Generate a PCI error event95* @pe: EEH PE96*97* This routine can be called within an interrupt context;98* the actual event will be delivered in a normal context99* (from a workqueue).100*/101int __eeh_send_failure_event(struct eeh_pe *pe)102{103unsigned long flags;104struct eeh_event *event;105106event = kzalloc(sizeof(*event), GFP_ATOMIC);107if (!event) {108pr_err("EEH: out of memory, event not handled\n");109return -ENOMEM;110}111event->pe = pe;112113/*114* Mark the PE as recovering before inserting it in the queue.115* This prevents the PE from being free()ed by a hotplug driver116* while the PE is sitting in the event queue.117*/118if (pe) {119#ifdef CONFIG_STACKTRACE120/*121* Save the current stack trace so we can dump it from the122* event handler thread.123*/124pe->trace_entries = stack_trace_save(pe->stack_trace,125ARRAY_SIZE(pe->stack_trace), 0);126#endif /* CONFIG_STACKTRACE */127128eeh_pe_state_mark(pe, EEH_PE_RECOVERING);129}130131/* We may or may not be called in an interrupt context */132spin_lock_irqsave(&eeh_eventlist_lock, flags);133list_add(&event->list, &eeh_eventlist);134spin_unlock_irqrestore(&eeh_eventlist_lock, flags);135136/* For EEH deamon to knick in */137complete(&eeh_eventlist_event);138139return 0;140}141142int eeh_send_failure_event(struct eeh_pe *pe)143{144/*145* If we've manually suppressed recovery events via debugfs146* then just drop it on the floor.147*/148if (eeh_debugfs_no_recover) {149pr_err("EEH: Event dropped due to no_recover setting\n");150return 0;151}152153return __eeh_send_failure_event(pe);154}155156/**157* eeh_remove_event - Remove EEH event from the queue158* @pe: Event binding to the PE159* @force: Event will be removed unconditionally160*161* On PowerNV platform, we might have subsequent coming events162* is part of the former one. For that case, those subsequent163* coming events are totally duplicated and unnecessary, thus164* they should be removed.165*/166void eeh_remove_event(struct eeh_pe *pe, bool force)167{168unsigned long flags;169struct eeh_event *event, *tmp;170171/*172* If we have NULL PE passed in, we have dead IOC173* or we're sure we can report all existing errors174* by the caller.175*176* With "force", the event with associated PE that177* have been isolated, the event won't be removed178* to avoid event lost.179*/180spin_lock_irqsave(&eeh_eventlist_lock, flags);181list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) {182if (!force && event->pe &&183(event->pe->state & EEH_PE_ISOLATED))184continue;185186if (!pe) {187list_del(&event->list);188kfree(event);189} else if (pe->type & EEH_PE_PHB) {190if (event->pe && event->pe->phb == pe->phb) {191list_del(&event->list);192kfree(event);193}194} else if (event->pe == pe) {195list_del(&event->list);196kfree(event);197}198}199spin_unlock_irqrestore(&eeh_eventlist_lock, flags);200}201202203