Path: blob/master/drivers/misc/sgi-gru/grukservices.c
15111 views
/*1* SN Platform GRU Driver2*3* KERNEL SERVICES THAT USE THE GRU4*5* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.6*7* This program is free software; you can redistribute it and/or modify8* it under the terms of the GNU General Public License as published by9* the Free Software Foundation; either version 2 of the License, or10* (at your option) any later version.11*12* This program is distributed in the hope that it will be useful,13* but WITHOUT ANY WARRANTY; without even the implied warranty of14* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the15* GNU General Public License for more details.16*17* You should have received a copy of the GNU General Public License18* along with this program; if not, write to the Free Software19* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA20*/2122#include <linux/kernel.h>23#include <linux/errno.h>24#include <linux/slab.h>25#include <linux/mm.h>26#include <linux/spinlock.h>27#include <linux/device.h>28#include <linux/miscdevice.h>29#include <linux/proc_fs.h>30#include <linux/interrupt.h>31#include <linux/uaccess.h>32#include <linux/delay.h>33#include <asm/io_apic.h>34#include "gru.h"35#include "grulib.h"36#include "grutables.h"37#include "grukservices.h"38#include "gru_instructions.h"39#include <asm/uv/uv_hub.h>4041/*42* Kernel GRU Usage43*44* The following is an interim algorithm for management of kernel GRU45* resources. This will likely be replaced when we better understand the46* kernel/user requirements.47*48* Blade percpu resources reserved for kernel use. These resources are49* reserved whenever the the kernel context for the blade is loaded. Note50* that the kernel context is not guaranteed to be always available. It is51* loaded on demand & can be stolen by a user if the user demand exceeds the52* kernel demand. The kernel can always reload the kernel context but53* a SLEEP may be required!!!.54*55* Async Overview:56*57* Each blade has one "kernel context" that owns GRU kernel resources58* located on the blade. Kernel drivers use GRU resources in this context59* for sending messages, zeroing memory, etc.60*61* The kernel context is dynamically loaded on demand. If it is not in62* use by the kernel, the kernel context can be unloaded & given to a user.63* The kernel context will be reloaded when needed. This may require that64* a context be stolen from a user.65* NOTE: frequent unloading/reloading of the kernel context is66* expensive. We are depending on batch schedulers, cpusets, sane67* drivers or some other mechanism to prevent the need for frequent68* stealing/reloading.69*70* The kernel context consists of two parts:71* - 1 CB & a few DSRs that are reserved for each cpu on the blade.72* Each cpu has it's own private resources & does not share them73* with other cpus. These resources are used serially, ie,74* locked, used & unlocked on each call to a function in75* grukservices.76* (Now that we have dynamic loading of kernel contexts, I77* may rethink this & allow sharing between cpus....)78*79* - Additional resources can be reserved long term & used directly80* by UV drivers located in the kernel. Drivers using these GRU81* resources can use asynchronous GRU instructions that send82* interrupts on completion.83* - these resources must be explicitly locked/unlocked84* - locked resources prevent (obviously) the kernel85* context from being unloaded.86* - drivers using these resource directly issue their own87* GRU instruction and must wait/check completion.88*89* When these resources are reserved, the caller can optionally90* associate a wait_queue with the resources and use asynchronous91* GRU instructions. When an async GRU instruction completes, the92* driver will do a wakeup on the event.93*94*/959697#define ASYNC_HAN_TO_BID(h) ((h) - 1)98#define ASYNC_BID_TO_HAN(b) ((b) + 1)99#define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)]100101#define GRU_NUM_KERNEL_CBR 1102#define GRU_NUM_KERNEL_DSR_BYTES 256103#define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \104GRU_CACHE_LINE_BYTES)105106/* GRU instruction attributes for all instructions */107#define IMA IMA_CB_DELAY108109/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */110#define __gru_cacheline_aligned__ \111__attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))112113#define MAGIC 0x1234567887654321UL114115/* Default retry count for GRU errors on kernel instructions */116#define EXCEPTION_RETRY_LIMIT 3117118/* Status of message queue sections */119#define MQS_EMPTY 0120#define MQS_FULL 1121#define MQS_NOOP 2122123/*----------------- RESOURCE MANAGEMENT -------------------------------------*/124/* optimized for x86_64 */125struct message_queue {126union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */127int qlines; /* DW 1 */128long hstatus[2];129void *next __gru_cacheline_aligned__;/* CL 1 */130void *limit;131void *start;132void *start2;133char data ____cacheline_aligned; /* CL 2 */134};135136/* First word in every message - used by mesq interface */137struct message_header {138char present;139char present2;140char lines;141char fill;142};143144#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))145146/*147* Reload the blade's kernel context into a GRU chiplet. Called holding148* the bs_kgts_sema for READ. Will steal user contexts if necessary.149*/150static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)151{152struct gru_state *gru;153struct gru_thread_state *kgts;154void *vaddr;155int ctxnum, ncpus;156157up_read(&bs->bs_kgts_sema);158down_write(&bs->bs_kgts_sema);159160if (!bs->bs_kgts) {161bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);162bs->bs_kgts->ts_user_blade_id = blade_id;163}164kgts = bs->bs_kgts;165166if (!kgts->ts_gru) {167STAT(load_kernel_context);168ncpus = uv_blade_nr_possible_cpus(blade_id);169kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU(170GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs);171kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(172GRU_NUM_KERNEL_DSR_BYTES * ncpus +173bs->bs_async_dsr_bytes);174while (!gru_assign_gru_context(kgts)) {175msleep(1);176gru_steal_context(kgts);177}178gru_load_context(kgts);179gru = bs->bs_kgts->ts_gru;180vaddr = gru->gs_gru_base_vaddr;181ctxnum = kgts->ts_ctxnum;182bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0);183bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0);184}185downgrade_write(&bs->bs_kgts_sema);186}187188/*189* Free all kernel contexts that are not currently in use.190* Returns 0 if all freed, else number of inuse context.191*/192static int gru_free_kernel_contexts(void)193{194struct gru_blade_state *bs;195struct gru_thread_state *kgts;196int bid, ret = 0;197198for (bid = 0; bid < GRU_MAX_BLADES; bid++) {199bs = gru_base[bid];200if (!bs)201continue;202203/* Ignore busy contexts. Don't want to block here. */204if (down_write_trylock(&bs->bs_kgts_sema)) {205kgts = bs->bs_kgts;206if (kgts && kgts->ts_gru)207gru_unload_context(kgts, 0);208bs->bs_kgts = NULL;209up_write(&bs->bs_kgts_sema);210kfree(kgts);211} else {212ret++;213}214}215return ret;216}217218/*219* Lock & load the kernel context for the specified blade.220*/221static struct gru_blade_state *gru_lock_kernel_context(int blade_id)222{223struct gru_blade_state *bs;224int bid;225226STAT(lock_kernel_context);227again:228bid = blade_id < 0 ? uv_numa_blade_id() : blade_id;229bs = gru_base[bid];230231/* Handle the case where migration occurred while waiting for the sema */232down_read(&bs->bs_kgts_sema);233if (blade_id < 0 && bid != uv_numa_blade_id()) {234up_read(&bs->bs_kgts_sema);235goto again;236}237if (!bs->bs_kgts || !bs->bs_kgts->ts_gru)238gru_load_kernel_context(bs, bid);239return bs;240241}242243/*244* Unlock the kernel context for the specified blade. Context is not245* unloaded but may be stolen before next use.246*/247static void gru_unlock_kernel_context(int blade_id)248{249struct gru_blade_state *bs;250251bs = gru_base[blade_id];252up_read(&bs->bs_kgts_sema);253STAT(unlock_kernel_context);254}255256/*257* Reserve & get pointers to the DSR/CBRs reserved for the current cpu.258* - returns with preemption disabled259*/260static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)261{262struct gru_blade_state *bs;263int lcpu;264265BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);266preempt_disable();267bs = gru_lock_kernel_context(-1);268lcpu = uv_blade_processor_id();269*cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;270*dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;271return 0;272}273274/*275* Free the current cpus reserved DSR/CBR resources.276*/277static void gru_free_cpu_resources(void *cb, void *dsr)278{279gru_unlock_kernel_context(uv_numa_blade_id());280preempt_enable();281}282283/*284* Reserve GRU resources to be used asynchronously.285* Note: currently supports only 1 reservation per blade.286*287* input:288* blade_id - blade on which resources should be reserved289* cbrs - number of CBRs290* dsr_bytes - number of DSR bytes needed291* output:292* handle to identify resource293* (0 = async resources already reserved)294*/295unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,296struct completion *cmp)297{298struct gru_blade_state *bs;299struct gru_thread_state *kgts;300int ret = 0;301302bs = gru_base[blade_id];303304down_write(&bs->bs_kgts_sema);305306/* Verify no resources already reserved */307if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs)308goto done;309bs->bs_async_dsr_bytes = dsr_bytes;310bs->bs_async_cbrs = cbrs;311bs->bs_async_wq = cmp;312kgts = bs->bs_kgts;313314/* Resources changed. Unload context if already loaded */315if (kgts && kgts->ts_gru)316gru_unload_context(kgts, 0);317ret = ASYNC_BID_TO_HAN(blade_id);318319done:320up_write(&bs->bs_kgts_sema);321return ret;322}323324/*325* Release async resources previously reserved.326*327* input:328* han - handle to identify resources329*/330void gru_release_async_resources(unsigned long han)331{332struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);333334down_write(&bs->bs_kgts_sema);335bs->bs_async_dsr_bytes = 0;336bs->bs_async_cbrs = 0;337bs->bs_async_wq = NULL;338up_write(&bs->bs_kgts_sema);339}340341/*342* Wait for async GRU instructions to complete.343*344* input:345* han - handle to identify resources346*/347void gru_wait_async_cbr(unsigned long han)348{349struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);350351wait_for_completion(bs->bs_async_wq);352mb();353}354355/*356* Lock previous reserved async GRU resources357*358* input:359* han - handle to identify resources360* output:361* cb - pointer to first CBR362* dsr - pointer to first DSR363*/364void gru_lock_async_resource(unsigned long han, void **cb, void **dsr)365{366struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);367int blade_id = ASYNC_HAN_TO_BID(han);368int ncpus;369370gru_lock_kernel_context(blade_id);371ncpus = uv_blade_nr_possible_cpus(blade_id);372if (cb)373*cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE;374if (dsr)375*dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES;376}377378/*379* Unlock previous reserved async GRU resources380*381* input:382* han - handle to identify resources383*/384void gru_unlock_async_resource(unsigned long han)385{386int blade_id = ASYNC_HAN_TO_BID(han);387388gru_unlock_kernel_context(blade_id);389}390391/*----------------------------------------------------------------------*/392int gru_get_cb_exception_detail(void *cb,393struct control_block_extended_exc_detail *excdet)394{395struct gru_control_block_extended *cbe;396struct gru_thread_state *kgts = NULL;397unsigned long off;398int cbrnum, bid;399400/*401* Locate kgts for cb. This algorithm is SLOW but402* this function is rarely called (ie., almost never).403* Performance does not matter.404*/405for_each_possible_blade(bid) {406if (!gru_base[bid])407break;408kgts = gru_base[bid]->bs_kgts;409if (!kgts || !kgts->ts_gru)410continue;411off = cb - kgts->ts_gru->gs_gru_base_vaddr;412if (off < GRU_SIZE)413break;414kgts = NULL;415}416BUG_ON(!kgts);417cbrnum = thread_cbr_number(kgts, get_cb_number(cb));418cbe = get_cbe(GRUBASE(cb), cbrnum);419gru_flush_cache(cbe); /* CBE not coherent */420sync_core();421excdet->opc = cbe->opccpy;422excdet->exopc = cbe->exopccpy;423excdet->ecause = cbe->ecause;424excdet->exceptdet0 = cbe->idef1upd;425excdet->exceptdet1 = cbe->idef3upd;426gru_flush_cache(cbe);427return 0;428}429430char *gru_get_cb_exception_detail_str(int ret, void *cb,431char *buf, int size)432{433struct gru_control_block_status *gen = (void *)cb;434struct control_block_extended_exc_detail excdet;435436if (ret > 0 && gen->istatus == CBS_EXCEPTION) {437gru_get_cb_exception_detail(cb, &excdet);438snprintf(buf, size,439"GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x,"440"excdet0 0x%lx, excdet1 0x%x", smp_processor_id(),441gen, excdet.opc, excdet.exopc, excdet.ecause,442excdet.exceptdet0, excdet.exceptdet1);443} else {444snprintf(buf, size, "No exception");445}446return buf;447}448449static int gru_wait_idle_or_exception(struct gru_control_block_status *gen)450{451while (gen->istatus >= CBS_ACTIVE) {452cpu_relax();453barrier();454}455return gen->istatus;456}457458static int gru_retry_exception(void *cb)459{460struct gru_control_block_status *gen = (void *)cb;461struct control_block_extended_exc_detail excdet;462int retry = EXCEPTION_RETRY_LIMIT;463464while (1) {465if (gru_wait_idle_or_exception(gen) == CBS_IDLE)466return CBS_IDLE;467if (gru_get_cb_message_queue_substatus(cb))468return CBS_EXCEPTION;469gru_get_cb_exception_detail(cb, &excdet);470if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) ||471(excdet.cbrexecstatus & CBR_EXS_ABORT_OCC))472break;473if (retry-- == 0)474break;475gen->icmd = 1;476gru_flush_cache(gen);477}478return CBS_EXCEPTION;479}480481int gru_check_status_proc(void *cb)482{483struct gru_control_block_status *gen = (void *)cb;484int ret;485486ret = gen->istatus;487if (ret == CBS_EXCEPTION)488ret = gru_retry_exception(cb);489rmb();490return ret;491492}493494int gru_wait_proc(void *cb)495{496struct gru_control_block_status *gen = (void *)cb;497int ret;498499ret = gru_wait_idle_or_exception(gen);500if (ret == CBS_EXCEPTION)501ret = gru_retry_exception(cb);502rmb();503return ret;504}505506void gru_abort(int ret, void *cb, char *str)507{508char buf[GRU_EXC_STR_SIZE];509510panic("GRU FATAL ERROR: %s - %s\n", str,511gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf)));512}513514void gru_wait_abort_proc(void *cb)515{516int ret;517518ret = gru_wait_proc(cb);519if (ret)520gru_abort(ret, cb, "gru_wait_abort");521}522523524/*------------------------------ MESSAGE QUEUES -----------------------------*/525526/* Internal status . These are NOT returned to the user. */527#define MQIE_AGAIN -1 /* try again */528529530/*531* Save/restore the "present" flag that is in the second line of 2-line532* messages533*/534static inline int get_present2(void *p)535{536struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;537return mhdr->present;538}539540static inline void restore_present2(void *p, int val)541{542struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES;543mhdr->present = val;544}545546/*547* Create a message queue.548* qlines - message queue size in cache lines. Includes 2-line header.549*/550int gru_create_message_queue(struct gru_message_queue_desc *mqd,551void *p, unsigned int bytes, int nasid, int vector, int apicid)552{553struct message_queue *mq = p;554unsigned int qlines;555556qlines = bytes / GRU_CACHE_LINE_BYTES - 2;557memset(mq, 0, bytes);558mq->start = &mq->data;559mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES;560mq->next = &mq->data;561mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES;562mq->qlines = qlines;563mq->hstatus[0] = 0;564mq->hstatus[1] = 1;565mq->head = gru_mesq_head(2, qlines / 2 + 1);566mqd->mq = mq;567mqd->mq_gpa = uv_gpa(mq);568mqd->qlines = qlines;569mqd->interrupt_pnode = nasid >> 1;570mqd->interrupt_vector = vector;571mqd->interrupt_apicid = apicid;572return 0;573}574EXPORT_SYMBOL_GPL(gru_create_message_queue);575576/*577* Send a NOOP message to a message queue578* Returns:579* 0 - if queue is full after the send. This is the normal case580* but various races can change this.581* -1 - if mesq sent successfully but queue not full582* >0 - unexpected error. MQE_xxx returned583*/584static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd,585void *mesg)586{587const struct message_header noop_header = {588.present = MQS_NOOP, .lines = 1};589unsigned long m;590int substatus, ret;591struct message_header save_mhdr, *mhdr = mesg;592593STAT(mesq_noop);594save_mhdr = *mhdr;595*mhdr = noop_header;596gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA);597ret = gru_wait(cb);598599if (ret) {600substatus = gru_get_cb_message_queue_substatus(cb);601switch (substatus) {602case CBSS_NO_ERROR:603STAT(mesq_noop_unexpected_error);604ret = MQE_UNEXPECTED_CB_ERR;605break;606case CBSS_LB_OVERFLOWED:607STAT(mesq_noop_lb_overflow);608ret = MQE_CONGESTION;609break;610case CBSS_QLIMIT_REACHED:611STAT(mesq_noop_qlimit_reached);612ret = 0;613break;614case CBSS_AMO_NACKED:615STAT(mesq_noop_amo_nacked);616ret = MQE_CONGESTION;617break;618case CBSS_PUT_NACKED:619STAT(mesq_noop_put_nacked);620m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);621gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1,622IMA);623if (gru_wait(cb) == CBS_IDLE)624ret = MQIE_AGAIN;625else626ret = MQE_UNEXPECTED_CB_ERR;627break;628case CBSS_PAGE_OVERFLOW:629STAT(mesq_noop_page_overflow);630/* fallthru */631default:632BUG();633}634}635*mhdr = save_mhdr;636return ret;637}638639/*640* Handle a gru_mesq full.641*/642static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd,643void *mesg, int lines)644{645union gru_mesqhead mqh;646unsigned int limit, head;647unsigned long avalue;648int half, qlines;649650/* Determine if switching to first/second half of q */651avalue = gru_get_amo_value(cb);652head = gru_get_amo_value_head(cb);653limit = gru_get_amo_value_limit(cb);654655qlines = mqd->qlines;656half = (limit != qlines);657658if (half)659mqh = gru_mesq_head(qlines / 2 + 1, qlines);660else661mqh = gru_mesq_head(2, qlines / 2 + 1);662663/* Try to get lock for switching head pointer */664gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA);665if (gru_wait(cb) != CBS_IDLE)666goto cberr;667if (!gru_get_amo_value(cb)) {668STAT(mesq_qf_locked);669return MQE_QUEUE_FULL;670}671672/* Got the lock. Send optional NOP if queue not full, */673if (head != limit) {674if (send_noop_message(cb, mqd, mesg)) {675gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half),676XTYPE_DW, IMA);677if (gru_wait(cb) != CBS_IDLE)678goto cberr;679STAT(mesq_qf_noop_not_full);680return MQIE_AGAIN;681}682avalue++;683}684685/* Then flip queuehead to other half of queue. */686gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue,687IMA);688if (gru_wait(cb) != CBS_IDLE)689goto cberr;690691/* If not successfully in swapping queue head, clear the hstatus lock */692if (gru_get_amo_value(cb) != avalue) {693STAT(mesq_qf_switch_head_failed);694gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW,695IMA);696if (gru_wait(cb) != CBS_IDLE)697goto cberr;698}699return MQIE_AGAIN;700cberr:701STAT(mesq_qf_unexpected_error);702return MQE_UNEXPECTED_CB_ERR;703}704705/*706* Handle a PUT failure. Note: if message was a 2-line message, one of the707* lines might have successfully have been written. Before sending the708* message, "present" must be cleared in BOTH lines to prevent the receiver709* from prematurely seeing the full message.710*/711static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,712void *mesg, int lines)713{714unsigned long m, *val = mesg, gpa, save;715int ret;716717m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);718if (lines == 2) {719gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA);720if (gru_wait(cb) != CBS_IDLE)721return MQE_UNEXPECTED_CB_ERR;722}723gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);724if (gru_wait(cb) != CBS_IDLE)725return MQE_UNEXPECTED_CB_ERR;726727if (!mqd->interrupt_vector)728return MQE_OK;729730/*731* Send a cross-partition interrupt to the SSI that contains the target732* message queue. Normally, the interrupt is automatically delivered by733* hardware but some error conditions require explicit delivery.734* Use the GRU to deliver the interrupt. Otherwise partition failures735* could cause unrecovered errors.736*/737gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT);738save = *val;739*val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector,740dest_Fixed);741gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA);742ret = gru_wait(cb);743*val = save;744if (ret != CBS_IDLE)745return MQE_UNEXPECTED_CB_ERR;746return MQE_OK;747}748749/*750* Handle a gru_mesq failure. Some of these failures are software recoverable751* or retryable.752*/753static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,754void *mesg, int lines)755{756int substatus, ret = 0;757758substatus = gru_get_cb_message_queue_substatus(cb);759switch (substatus) {760case CBSS_NO_ERROR:761STAT(mesq_send_unexpected_error);762ret = MQE_UNEXPECTED_CB_ERR;763break;764case CBSS_LB_OVERFLOWED:765STAT(mesq_send_lb_overflow);766ret = MQE_CONGESTION;767break;768case CBSS_QLIMIT_REACHED:769STAT(mesq_send_qlimit_reached);770ret = send_message_queue_full(cb, mqd, mesg, lines);771break;772case CBSS_AMO_NACKED:773STAT(mesq_send_amo_nacked);774ret = MQE_CONGESTION;775break;776case CBSS_PUT_NACKED:777STAT(mesq_send_put_nacked);778ret = send_message_put_nacked(cb, mqd, mesg, lines);779break;780case CBSS_PAGE_OVERFLOW:781STAT(mesq_page_overflow);782/* fallthru */783default:784BUG();785}786return ret;787}788789/*790* Send a message to a message queue791* mqd message queue descriptor792* mesg message. ust be vaddr within a GSEG793* bytes message size (<= 2 CL)794*/795int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg,796unsigned int bytes)797{798struct message_header *mhdr;799void *cb;800void *dsr;801int istatus, clines, ret;802803STAT(mesq_send);804BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES);805806clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES);807if (gru_get_cpu_resources(bytes, &cb, &dsr))808return MQE_BUG_NO_RESOURCES;809memcpy(dsr, mesg, bytes);810mhdr = dsr;811mhdr->present = MQS_FULL;812mhdr->lines = clines;813if (clines == 2) {814mhdr->present2 = get_present2(mhdr);815restore_present2(mhdr, MQS_FULL);816}817818do {819ret = MQE_OK;820gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA);821istatus = gru_wait(cb);822if (istatus != CBS_IDLE)823ret = send_message_failure(cb, mqd, dsr, clines);824} while (ret == MQIE_AGAIN);825gru_free_cpu_resources(cb, dsr);826827if (ret)828STAT(mesq_send_failed);829return ret;830}831EXPORT_SYMBOL_GPL(gru_send_message_gpa);832833/*834* Advance the receive pointer for the queue to the next message.835*/836void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg)837{838struct message_queue *mq = mqd->mq;839struct message_header *mhdr = mq->next;840void *next, *pnext;841int half = -1;842int lines = mhdr->lines;843844if (lines == 2)845restore_present2(mhdr, MQS_EMPTY);846mhdr->present = MQS_EMPTY;847848pnext = mq->next;849next = pnext + GRU_CACHE_LINE_BYTES * lines;850if (next == mq->limit) {851next = mq->start;852half = 1;853} else if (pnext < mq->start2 && next >= mq->start2) {854half = 0;855}856857if (half >= 0)858mq->hstatus[half] = 1;859mq->next = next;860}861EXPORT_SYMBOL_GPL(gru_free_message);862863/*864* Get next message from message queue. Return NULL if no message865* present. User must call next_message() to move to next message.866* rmq message queue867*/868void *gru_get_next_message(struct gru_message_queue_desc *mqd)869{870struct message_queue *mq = mqd->mq;871struct message_header *mhdr = mq->next;872int present = mhdr->present;873874/* skip NOOP messages */875while (present == MQS_NOOP) {876gru_free_message(mqd, mhdr);877mhdr = mq->next;878present = mhdr->present;879}880881/* Wait for both halves of 2 line messages */882if (present == MQS_FULL && mhdr->lines == 2 &&883get_present2(mhdr) == MQS_EMPTY)884present = MQS_EMPTY;885886if (!present) {887STAT(mesq_receive_none);888return NULL;889}890891if (mhdr->lines == 2)892restore_present2(mhdr, mhdr->present2);893894STAT(mesq_receive);895return mhdr;896}897EXPORT_SYMBOL_GPL(gru_get_next_message);898899/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/900901/*902* Load a DW from a global GPA. The GPA can be a memory or MMR address.903*/904int gru_read_gpa(unsigned long *value, unsigned long gpa)905{906void *cb;907void *dsr;908int ret, iaa;909910STAT(read_gpa);911if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))912return MQE_BUG_NO_RESOURCES;913iaa = gpa >> 62;914gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA);915ret = gru_wait(cb);916if (ret == CBS_IDLE)917*value = *(unsigned long *)dsr;918gru_free_cpu_resources(cb, dsr);919return ret;920}921EXPORT_SYMBOL_GPL(gru_read_gpa);922923924/*925* Copy a block of data using the GRU resources926*/927int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,928unsigned int bytes)929{930void *cb;931void *dsr;932int ret;933934STAT(copy_gpa);935if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))936return MQE_BUG_NO_RESOURCES;937gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr),938XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA);939ret = gru_wait(cb);940gru_free_cpu_resources(cb, dsr);941return ret;942}943EXPORT_SYMBOL_GPL(gru_copy_gpa);944945/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/946/* Temp - will delete after we gain confidence in the GRU */947948static int quicktest0(unsigned long arg)949{950unsigned long word0;951unsigned long word1;952void *cb;953void *dsr;954unsigned long *p;955int ret = -EIO;956957if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr))958return MQE_BUG_NO_RESOURCES;959p = dsr;960word0 = MAGIC;961word1 = 0;962963gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);964if (gru_wait(cb) != CBS_IDLE) {965printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id());966goto done;967}968969if (*p != MAGIC) {970printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p);971goto done;972}973gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);974if (gru_wait(cb) != CBS_IDLE) {975printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id());976goto done;977}978979if (word0 != word1 || word1 != MAGIC) {980printk(KERN_DEBUG981"GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n",982smp_processor_id(), word1, MAGIC);983goto done;984}985ret = 0;986987done:988gru_free_cpu_resources(cb, dsr);989return ret;990}991992#define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1)))993994static int quicktest1(unsigned long arg)995{996struct gru_message_queue_desc mqd;997void *p, *mq;998unsigned long *dw;999int i, ret = -EIO;1000char mes[GRU_CACHE_LINE_BYTES], *m;10011002/* Need 1K cacheline aligned that does not cross page boundary */1003p = kmalloc(4096, 0);1004if (p == NULL)1005return -ENOMEM;1006mq = ALIGNUP(p, 1024);1007memset(mes, 0xee, sizeof(mes));1008dw = mq;10091010gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);1011for (i = 0; i < 6; i++) {1012mes[8] = i;1013do {1014ret = gru_send_message_gpa(&mqd, mes, sizeof(mes));1015} while (ret == MQE_CONGESTION);1016if (ret)1017break;1018}1019if (ret != MQE_QUEUE_FULL || i != 4) {1020printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n",1021smp_processor_id(), ret, i);1022goto done;1023}10241025for (i = 0; i < 6; i++) {1026m = gru_get_next_message(&mqd);1027if (!m || m[8] != i)1028break;1029gru_free_message(&mqd, m);1030}1031if (i != 4) {1032printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n",1033smp_processor_id(), i, m, m ? m[8] : -1);1034goto done;1035}1036ret = 0;10371038done:1039kfree(p);1040return ret;1041}10421043static int quicktest2(unsigned long arg)1044{1045static DECLARE_COMPLETION(cmp);1046unsigned long han;1047int blade_id = 0;1048int numcb = 4;1049int ret = 0;1050unsigned long *buf;1051void *cb0, *cb;1052struct gru_control_block_status *gen;1053int i, k, istatus, bytes;10541055bytes = numcb * 4 * 8;1056buf = kmalloc(bytes, GFP_KERNEL);1057if (!buf)1058return -ENOMEM;10591060ret = -EBUSY;1061han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp);1062if (!han)1063goto done;10641065gru_lock_async_resource(han, &cb0, NULL);1066memset(buf, 0xee, bytes);1067for (i = 0; i < numcb; i++)1068gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0,1069XTYPE_DW, 4, 1, IMA_INTERRUPT);10701071ret = 0;1072k = numcb;1073do {1074gru_wait_async_cbr(han);1075for (i = 0; i < numcb; i++) {1076cb = cb0 + i * GRU_HANDLE_STRIDE;1077istatus = gru_check_status(cb);1078if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS)1079break;1080}1081if (i == numcb)1082continue;1083if (istatus != CBS_IDLE) {1084printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i);1085ret = -EFAULT;1086} else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] ||1087buf[4 * i + 3]) {1088printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n",1089smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]);1090ret = -EIO;1091}1092k--;1093gen = cb;1094gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */1095} while (k);1096BUG_ON(cmp.done);10971098gru_unlock_async_resource(han);1099gru_release_async_resources(han);1100done:1101kfree(buf);1102return ret;1103}11041105#define BUFSIZE 2001106static int quicktest3(unsigned long arg)1107{1108char buf1[BUFSIZE], buf2[BUFSIZE];1109int ret = 0;11101111memset(buf2, 0, sizeof(buf2));1112memset(buf1, get_cycles() & 255, sizeof(buf1));1113gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE);1114if (memcmp(buf1, buf2, BUFSIZE)) {1115printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id());1116ret = -EIO;1117}1118return ret;1119}11201121/*1122* Debugging only. User hook for various kernel tests1123* of driver & gru.1124*/1125int gru_ktest(unsigned long arg)1126{1127int ret = -EINVAL;11281129switch (arg & 0xff) {1130case 0:1131ret = quicktest0(arg);1132break;1133case 1:1134ret = quicktest1(arg);1135break;1136case 2:1137ret = quicktest2(arg);1138break;1139case 3:1140ret = quicktest3(arg);1141break;1142case 99:1143ret = gru_free_kernel_contexts();1144break;1145}1146return ret;11471148}11491150int gru_kservices_init(void)1151{1152return 0;1153}11541155void gru_kservices_exit(void)1156{1157if (gru_free_kernel_contexts())1158BUG();1159}1160116111621163