Path: blob/master/drivers/infiniband/hw/ehca/ehca_irq.c
15112 views
/*1* IBM eServer eHCA Infiniband device driver for Linux on POWER2*3* Functions for EQs, NEQs and interrupts4*5* Authors: Heiko J Schick <[email protected]>6* Khadija Souissi <[email protected]>7* Hoang-Nam Nguyen <[email protected]>8* Joachim Fenkes <[email protected]>9*10* Copyright (c) 2005 IBM Corporation11*12* All rights reserved.13*14* This source code is distributed under a dual license of GPL v2.0 and OpenIB15* BSD.16*17* OpenIB BSD License18*19* Redistribution and use in source and binary forms, with or without20* modification, are permitted provided that the following conditions are met:21*22* Redistributions of source code must retain the above copyright notice, this23* list of conditions and the following disclaimer.24*25* Redistributions in binary form must reproduce the above copyright notice,26* this list of conditions and the following disclaimer in the documentation27* and/or other materials28* provided with the distribution.29*30* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"31* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE32* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE33* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE34* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR35* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF36* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR37* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER38* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)39* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE40* POSSIBILITY OF SUCH DAMAGE.41*/4243#include <linux/slab.h>4445#include "ehca_classes.h"46#include "ehca_irq.h"47#include "ehca_iverbs.h"48#include "ehca_tools.h"49#include "hcp_if.h"50#include "hipz_fns.h"51#include "ipz_pt_fn.h"5253#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)54#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31)55#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7)56#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31)57#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31)58#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63)59#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63)6061#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)62#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7)63#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15)64#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)65#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16)66#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23)6768#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63)69#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7)7071static void queue_comp_task(struct ehca_cq *__cq);7273static struct ehca_comp_pool *pool;7475static inline void comp_event_callback(struct ehca_cq *cq)76{77if (!cq->ib_cq.comp_handler)78return;7980spin_lock(&cq->cb_lock);81cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);82spin_unlock(&cq->cb_lock);8384return;85}8687static void print_error_data(struct ehca_shca *shca, void *data,88u64 *rblock, int length)89{90u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);91u64 resource = rblock[1];9293switch (type) {94case 0x1: /* Queue Pair */95{96struct ehca_qp *qp = (struct ehca_qp *)data;9798/* only print error data if AER is set */99if (rblock[6] == 0)100return;101102ehca_err(&shca->ib_device,103"QP 0x%x (resource=%llx) has errors.",104qp->ib_qp.qp_num, resource);105break;106}107case 0x4: /* Completion Queue */108{109struct ehca_cq *cq = (struct ehca_cq *)data;110111ehca_err(&shca->ib_device,112"CQ 0x%x (resource=%llx) has errors.",113cq->cq_number, resource);114break;115}116default:117ehca_err(&shca->ib_device,118"Unknown error type: %llx on %s.",119type, shca->ib_device.name);120break;121}122123ehca_err(&shca->ib_device, "Error data is available: %llx.", resource);124ehca_err(&shca->ib_device, "EHCA ----- error data begin "125"---------------------------------------------------");126ehca_dmp(rblock, length, "resource=%llx", resource);127ehca_err(&shca->ib_device, "EHCA ----- error data end "128"----------------------------------------------------");129130return;131}132133int ehca_error_data(struct ehca_shca *shca, void *data,134u64 resource)135{136137unsigned long ret;138u64 *rblock;139unsigned long block_count;140141rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);142if (!rblock) {143ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");144ret = -ENOMEM;145goto error_data1;146}147148/* rblock must be 4K aligned and should be 4K large */149ret = hipz_h_error_data(shca->ipz_hca_handle,150resource,151rblock,152&block_count);153154if (ret == H_R_STATE)155ehca_err(&shca->ib_device,156"No error data is available: %llx.", resource);157else if (ret == H_SUCCESS) {158int length;159160length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);161162if (length > EHCA_PAGESIZE)163length = EHCA_PAGESIZE;164165print_error_data(shca, data, rblock, length);166} else167ehca_err(&shca->ib_device,168"Error data could not be fetched: %llx", resource);169170ehca_free_fw_ctrlblock(rblock);171172error_data1:173return ret;174175}176177static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,178enum ib_event_type event_type)179{180struct ib_event event;181182/* PATH_MIG without the QP ever having been armed is false alarm */183if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)184return;185186event.device = &shca->ib_device;187event.event = event_type;188189if (qp->ext_type == EQPT_SRQ) {190if (!qp->ib_srq.event_handler)191return;192193event.element.srq = &qp->ib_srq;194qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context);195} else {196if (!qp->ib_qp.event_handler)197return;198199event.element.qp = &qp->ib_qp;200qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);201}202}203204static void qp_event_callback(struct ehca_shca *shca, u64 eqe,205enum ib_event_type event_type, int fatal)206{207struct ehca_qp *qp;208u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);209210read_lock(&ehca_qp_idr_lock);211qp = idr_find(&ehca_qp_idr, token);212if (qp)213atomic_inc(&qp->nr_events);214read_unlock(&ehca_qp_idr_lock);215216if (!qp)217return;218219if (fatal)220ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);221222dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ?223IB_EVENT_SRQ_ERR : event_type);224225/*226* eHCA only processes one WQE at a time for SRQ base QPs,227* so the last WQE has been processed as soon as the QP enters228* error state.229*/230if (fatal && qp->ext_type == EQPT_SRQBASE)231dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);232233if (atomic_dec_and_test(&qp->nr_events))234wake_up(&qp->wait_completion);235return;236}237238static void cq_event_callback(struct ehca_shca *shca,239u64 eqe)240{241struct ehca_cq *cq;242u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);243244read_lock(&ehca_cq_idr_lock);245cq = idr_find(&ehca_cq_idr, token);246if (cq)247atomic_inc(&cq->nr_events);248read_unlock(&ehca_cq_idr_lock);249250if (!cq)251return;252253ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);254255if (atomic_dec_and_test(&cq->nr_events))256wake_up(&cq->wait_completion);257258return;259}260261static void parse_identifier(struct ehca_shca *shca, u64 eqe)262{263u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);264265switch (identifier) {266case 0x02: /* path migrated */267qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0);268break;269case 0x03: /* communication established */270qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0);271break;272case 0x04: /* send queue drained */273qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0);274break;275case 0x05: /* QP error */276case 0x06: /* QP error */277qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1);278break;279case 0x07: /* CQ error */280case 0x08: /* CQ error */281cq_event_callback(shca, eqe);282break;283case 0x09: /* MRMWPTE error */284ehca_err(&shca->ib_device, "MRMWPTE error.");285break;286case 0x0A: /* port event */287ehca_err(&shca->ib_device, "Port event.");288break;289case 0x0B: /* MR access error */290ehca_err(&shca->ib_device, "MR access error.");291break;292case 0x0C: /* EQ error */293ehca_err(&shca->ib_device, "EQ error.");294break;295case 0x0D: /* P/Q_Key mismatch */296ehca_err(&shca->ib_device, "P/Q_Key mismatch.");297break;298case 0x10: /* sampling complete */299ehca_err(&shca->ib_device, "Sampling complete.");300break;301case 0x11: /* unaffiliated access error */302ehca_err(&shca->ib_device, "Unaffiliated access error.");303break;304case 0x12: /* path migrating */305ehca_err(&shca->ib_device, "Path migrating.");306break;307case 0x13: /* interface trace stopped */308ehca_err(&shca->ib_device, "Interface trace stopped.");309break;310case 0x14: /* first error capture info available */311ehca_info(&shca->ib_device, "First error capture available");312break;313case 0x15: /* SRQ limit reached */314qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0);315break;316default:317ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",318identifier, shca->ib_device.name);319break;320}321322return;323}324325static void dispatch_port_event(struct ehca_shca *shca, int port_num,326enum ib_event_type type, const char *msg)327{328struct ib_event event;329330ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);331event.device = &shca->ib_device;332event.event = type;333event.element.port_num = port_num;334ib_dispatch_event(&event);335}336337static void notify_port_conf_change(struct ehca_shca *shca, int port_num)338{339struct ehca_sma_attr new_attr;340struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;341342ehca_query_sma_attr(shca, port_num, &new_attr);343344if (new_attr.sm_sl != old_attr->sm_sl ||345new_attr.sm_lid != old_attr->sm_lid)346dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,347"SM changed");348349if (new_attr.lid != old_attr->lid ||350new_attr.lmc != old_attr->lmc)351dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,352"LID changed");353354if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||355memcmp(new_attr.pkeys, old_attr->pkeys,356sizeof(u16) * new_attr.pkey_tbl_len))357dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,358"P_Key changed");359360*old_attr = new_attr;361}362363/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */364static int replay_modify_qp(struct ehca_sport *sport)365{366int aqp1_destroyed;367unsigned long flags;368369spin_lock_irqsave(&sport->mod_sqp_lock, flags);370371aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI];372373if (sport->ibqp_sqp[IB_QPT_SMI])374ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);375if (!aqp1_destroyed)376ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);377378spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);379380return aqp1_destroyed;381}382383static void parse_ec(struct ehca_shca *shca, u64 eqe)384{385u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);386u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);387u8 spec_event;388struct ehca_sport *sport = &shca->sport[port - 1];389390switch (ec) {391case 0x30: /* port availability change */392if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {393/* only replay modify_qp calls in autodetect mode;394* if AQP1 was destroyed, the port is already down395* again and we can drop the event.396*/397if (ehca_nr_ports < 0)398if (replay_modify_qp(sport))399break;400401sport->port_state = IB_PORT_ACTIVE;402dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,403"is active");404ehca_query_sma_attr(shca, port, &sport->saved_attr);405} else {406sport->port_state = IB_PORT_DOWN;407dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,408"is inactive");409}410break;411case 0x31:412/* port configuration change413* disruptive change is caused by414* LID, PKEY or SM change415*/416if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {417ehca_warn(&shca->ib_device, "disruptive port "418"%d configuration change", port);419420sport->port_state = IB_PORT_DOWN;421dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,422"is inactive");423424sport->port_state = IB_PORT_ACTIVE;425dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,426"is active");427ehca_query_sma_attr(shca, port,428&sport->saved_attr);429} else430notify_port_conf_change(shca, port);431break;432case 0x32: /* adapter malfunction */433ehca_err(&shca->ib_device, "Adapter malfunction.");434break;435case 0x33: /* trace stopped */436ehca_err(&shca->ib_device, "Traced stopped.");437break;438case 0x34: /* util async event */439spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe);440if (spec_event == 0x80) /* client reregister required */441dispatch_port_event(shca, port,442IB_EVENT_CLIENT_REREGISTER,443"client reregister req.");444else445ehca_warn(&shca->ib_device, "Unknown util async "446"event %x on port %x", spec_event, port);447break;448default:449ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",450ec, shca->ib_device.name);451break;452}453454return;455}456457static inline void reset_eq_pending(struct ehca_cq *cq)458{459u64 CQx_EP;460struct h_galpa gal = cq->galpas.kernel;461462hipz_galpa_store_cq(gal, cqx_ep, 0x0);463CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));464465return;466}467468irqreturn_t ehca_interrupt_neq(int irq, void *dev_id)469{470struct ehca_shca *shca = (struct ehca_shca*)dev_id;471472tasklet_hi_schedule(&shca->neq.interrupt_task);473474return IRQ_HANDLED;475}476477void ehca_tasklet_neq(unsigned long data)478{479struct ehca_shca *shca = (struct ehca_shca*)data;480struct ehca_eqe *eqe;481u64 ret;482483eqe = ehca_poll_eq(shca, &shca->neq);484485while (eqe) {486if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))487parse_ec(shca, eqe->entry);488489eqe = ehca_poll_eq(shca, &shca->neq);490}491492ret = hipz_h_reset_event(shca->ipz_hca_handle,493shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);494495if (ret != H_SUCCESS)496ehca_err(&shca->ib_device, "Can't clear notification events.");497498return;499}500501irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)502{503struct ehca_shca *shca = (struct ehca_shca*)dev_id;504505tasklet_hi_schedule(&shca->eq.interrupt_task);506507return IRQ_HANDLED;508}509510511static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)512{513u64 eqe_value;514u32 token;515struct ehca_cq *cq;516517eqe_value = eqe->entry;518ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value);519if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {520ehca_dbg(&shca->ib_device, "Got completion event");521token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);522read_lock(&ehca_cq_idr_lock);523cq = idr_find(&ehca_cq_idr, token);524if (cq)525atomic_inc(&cq->nr_events);526read_unlock(&ehca_cq_idr_lock);527if (cq == NULL) {528ehca_err(&shca->ib_device,529"Invalid eqe for non-existing cq token=%x",530token);531return;532}533reset_eq_pending(cq);534if (ehca_scaling_code)535queue_comp_task(cq);536else {537comp_event_callback(cq);538if (atomic_dec_and_test(&cq->nr_events))539wake_up(&cq->wait_completion);540}541} else {542ehca_dbg(&shca->ib_device, "Got non completion event");543parse_identifier(shca, eqe_value);544}545}546547void ehca_process_eq(struct ehca_shca *shca, int is_irq)548{549struct ehca_eq *eq = &shca->eq;550struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;551u64 eqe_value, ret;552int eqe_cnt, i;553int eq_empty = 0;554555spin_lock(&eq->irq_spinlock);556if (is_irq) {557const int max_query_cnt = 100;558int query_cnt = 0;559int int_state = 1;560do {561int_state = hipz_h_query_int_state(562shca->ipz_hca_handle, eq->ist);563query_cnt++;564iosync();565} while (int_state && query_cnt < max_query_cnt);566if (unlikely((query_cnt == max_query_cnt)))567ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",568int_state, query_cnt);569}570571/* read out all eqes */572eqe_cnt = 0;573do {574u32 token;575eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq);576if (!eqe_cache[eqe_cnt].eqe)577break;578eqe_value = eqe_cache[eqe_cnt].eqe->entry;579if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {580token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);581read_lock(&ehca_cq_idr_lock);582eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);583if (eqe_cache[eqe_cnt].cq)584atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);585read_unlock(&ehca_cq_idr_lock);586if (!eqe_cache[eqe_cnt].cq) {587ehca_err(&shca->ib_device,588"Invalid eqe for non-existing cq "589"token=%x", token);590continue;591}592} else593eqe_cache[eqe_cnt].cq = NULL;594eqe_cnt++;595} while (eqe_cnt < EHCA_EQE_CACHE_SIZE);596if (!eqe_cnt) {597if (is_irq)598ehca_dbg(&shca->ib_device,599"No eqe found for irq event");600goto unlock_irq_spinlock;601} else if (!is_irq) {602ret = hipz_h_eoi(eq->ist);603if (ret != H_SUCCESS)604ehca_err(&shca->ib_device,605"bad return code EOI -rc = %lld\n", ret);606ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);607}608if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))609ehca_dbg(&shca->ib_device, "too many eqes for one irq event");610/* enable irq for new packets */611for (i = 0; i < eqe_cnt; i++) {612if (eq->eqe_cache[i].cq)613reset_eq_pending(eq->eqe_cache[i].cq);614}615/* check eq */616spin_lock(&eq->spinlock);617eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));618spin_unlock(&eq->spinlock);619/* call completion handler for cached eqes */620for (i = 0; i < eqe_cnt; i++)621if (eq->eqe_cache[i].cq) {622if (ehca_scaling_code)623queue_comp_task(eq->eqe_cache[i].cq);624else {625struct ehca_cq *cq = eq->eqe_cache[i].cq;626comp_event_callback(cq);627if (atomic_dec_and_test(&cq->nr_events))628wake_up(&cq->wait_completion);629}630} else {631ehca_dbg(&shca->ib_device, "Got non completion event");632parse_identifier(shca, eq->eqe_cache[i].eqe->entry);633}634/* poll eq if not empty */635if (eq_empty)636goto unlock_irq_spinlock;637do {638struct ehca_eqe *eqe;639eqe = ehca_poll_eq(shca, &shca->eq);640if (!eqe)641break;642process_eqe(shca, eqe);643} while (1);644645unlock_irq_spinlock:646spin_unlock(&eq->irq_spinlock);647}648649void ehca_tasklet_eq(unsigned long data)650{651ehca_process_eq((struct ehca_shca*)data, 1);652}653654static inline int find_next_online_cpu(struct ehca_comp_pool *pool)655{656int cpu;657unsigned long flags;658659WARN_ON_ONCE(!in_interrupt());660if (ehca_debug_level >= 3)661ehca_dmp(cpu_online_mask, cpumask_size(), "");662663spin_lock_irqsave(&pool->last_cpu_lock, flags);664cpu = cpumask_next(pool->last_cpu, cpu_online_mask);665if (cpu >= nr_cpu_ids)666cpu = cpumask_first(cpu_online_mask);667pool->last_cpu = cpu;668spin_unlock_irqrestore(&pool->last_cpu_lock, flags);669670return cpu;671}672673static void __queue_comp_task(struct ehca_cq *__cq,674struct ehca_cpu_comp_task *cct)675{676unsigned long flags;677678spin_lock_irqsave(&cct->task_lock, flags);679spin_lock(&__cq->task_lock);680681if (__cq->nr_callbacks == 0) {682__cq->nr_callbacks++;683list_add_tail(&__cq->entry, &cct->cq_list);684cct->cq_jobs++;685wake_up(&cct->wait_queue);686} else687__cq->nr_callbacks++;688689spin_unlock(&__cq->task_lock);690spin_unlock_irqrestore(&cct->task_lock, flags);691}692693static void queue_comp_task(struct ehca_cq *__cq)694{695int cpu_id;696struct ehca_cpu_comp_task *cct;697int cq_jobs;698unsigned long flags;699700cpu_id = find_next_online_cpu(pool);701BUG_ON(!cpu_online(cpu_id));702703cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);704BUG_ON(!cct);705706spin_lock_irqsave(&cct->task_lock, flags);707cq_jobs = cct->cq_jobs;708spin_unlock_irqrestore(&cct->task_lock, flags);709if (cq_jobs > 0) {710cpu_id = find_next_online_cpu(pool);711cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);712BUG_ON(!cct);713}714715__queue_comp_task(__cq, cct);716}717718static void run_comp_task(struct ehca_cpu_comp_task *cct)719{720struct ehca_cq *cq;721unsigned long flags;722723spin_lock_irqsave(&cct->task_lock, flags);724725while (!list_empty(&cct->cq_list)) {726cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);727spin_unlock_irqrestore(&cct->task_lock, flags);728729comp_event_callback(cq);730if (atomic_dec_and_test(&cq->nr_events))731wake_up(&cq->wait_completion);732733spin_lock_irqsave(&cct->task_lock, flags);734spin_lock(&cq->task_lock);735cq->nr_callbacks--;736if (!cq->nr_callbacks) {737list_del_init(cct->cq_list.next);738cct->cq_jobs--;739}740spin_unlock(&cq->task_lock);741}742743spin_unlock_irqrestore(&cct->task_lock, flags);744}745746static int comp_task(void *__cct)747{748struct ehca_cpu_comp_task *cct = __cct;749int cql_empty;750DECLARE_WAITQUEUE(wait, current);751752set_current_state(TASK_INTERRUPTIBLE);753while (!kthread_should_stop()) {754add_wait_queue(&cct->wait_queue, &wait);755756spin_lock_irq(&cct->task_lock);757cql_empty = list_empty(&cct->cq_list);758spin_unlock_irq(&cct->task_lock);759if (cql_empty)760schedule();761else762__set_current_state(TASK_RUNNING);763764remove_wait_queue(&cct->wait_queue, &wait);765766spin_lock_irq(&cct->task_lock);767cql_empty = list_empty(&cct->cq_list);768spin_unlock_irq(&cct->task_lock);769if (!cql_empty)770run_comp_task(__cct);771772set_current_state(TASK_INTERRUPTIBLE);773}774__set_current_state(TASK_RUNNING);775776return 0;777}778779static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,780int cpu)781{782struct ehca_cpu_comp_task *cct;783784cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);785spin_lock_init(&cct->task_lock);786INIT_LIST_HEAD(&cct->cq_list);787init_waitqueue_head(&cct->wait_queue);788cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);789790return cct->task;791}792793static void destroy_comp_task(struct ehca_comp_pool *pool,794int cpu)795{796struct ehca_cpu_comp_task *cct;797struct task_struct *task;798unsigned long flags_cct;799800cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);801802spin_lock_irqsave(&cct->task_lock, flags_cct);803804task = cct->task;805cct->task = NULL;806cct->cq_jobs = 0;807808spin_unlock_irqrestore(&cct->task_lock, flags_cct);809810if (task)811kthread_stop(task);812}813814static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu)815{816struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);817LIST_HEAD(list);818struct ehca_cq *cq;819unsigned long flags_cct;820821spin_lock_irqsave(&cct->task_lock, flags_cct);822823list_splice_init(&cct->cq_list, &list);824825while (!list_empty(&list)) {826cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);827828list_del(&cq->entry);829__queue_comp_task(cq, this_cpu_ptr(pool->cpu_comp_tasks));830}831832spin_unlock_irqrestore(&cct->task_lock, flags_cct);833834}835836static int __cpuinit comp_pool_callback(struct notifier_block *nfb,837unsigned long action,838void *hcpu)839{840unsigned int cpu = (unsigned long)hcpu;841struct ehca_cpu_comp_task *cct;842843switch (action) {844case CPU_UP_PREPARE:845case CPU_UP_PREPARE_FROZEN:846ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);847if (!create_comp_task(pool, cpu)) {848ehca_gen_err("Can't create comp_task for cpu: %x", cpu);849return notifier_from_errno(-ENOMEM);850}851break;852case CPU_UP_CANCELED:853case CPU_UP_CANCELED_FROZEN:854ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);855cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);856kthread_bind(cct->task, cpumask_any(cpu_online_mask));857destroy_comp_task(pool, cpu);858break;859case CPU_ONLINE:860case CPU_ONLINE_FROZEN:861ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);862cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);863kthread_bind(cct->task, cpu);864wake_up_process(cct->task);865break;866case CPU_DOWN_PREPARE:867case CPU_DOWN_PREPARE_FROZEN:868ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);869break;870case CPU_DOWN_FAILED:871case CPU_DOWN_FAILED_FROZEN:872ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);873break;874case CPU_DEAD:875case CPU_DEAD_FROZEN:876ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);877destroy_comp_task(pool, cpu);878take_over_work(pool, cpu);879break;880}881882return NOTIFY_OK;883}884885static struct notifier_block comp_pool_callback_nb __cpuinitdata = {886.notifier_call = comp_pool_callback,887.priority = 0,888};889890int ehca_create_comp_pool(void)891{892int cpu;893struct task_struct *task;894895if (!ehca_scaling_code)896return 0;897898pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);899if (pool == NULL)900return -ENOMEM;901902spin_lock_init(&pool->last_cpu_lock);903pool->last_cpu = cpumask_any(cpu_online_mask);904905pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);906if (pool->cpu_comp_tasks == NULL) {907kfree(pool);908return -EINVAL;909}910911for_each_online_cpu(cpu) {912task = create_comp_task(pool, cpu);913if (task) {914kthread_bind(task, cpu);915wake_up_process(task);916}917}918919register_hotcpu_notifier(&comp_pool_callback_nb);920921printk(KERN_INFO "eHCA scaling code enabled\n");922923return 0;924}925926void ehca_destroy_comp_pool(void)927{928int i;929930if (!ehca_scaling_code)931return;932933unregister_hotcpu_notifier(&comp_pool_callback_nb);934935for_each_online_cpu(i)936destroy_comp_task(pool, i);937938free_percpu(pool->cpu_comp_tasks);939kfree(pool);940}941942943