Path: blob/master/arch/powerpc/platforms/book3s/vas-api.c
26530 views
// SPDX-License-Identifier: GPL-2.0-or-later1/*2* VAS user space API for its accelerators (Only NX-GZIP is supported now)3* Copyright (C) 2019 Haren Myneni, IBM Corp4*/56#define pr_fmt(fmt) "vas-api: " fmt78#include <linux/kernel.h>9#include <linux/device.h>10#include <linux/cdev.h>11#include <linux/fs.h>12#include <linux/slab.h>13#include <linux/uaccess.h>14#include <linux/kthread.h>15#include <linux/sched/signal.h>16#include <linux/mmu_context.h>17#include <linux/io.h>18#include <asm/vas.h>19#include <uapi/asm/vas-api.h>2021/*22* The driver creates the device node that can be used as follows:23* For NX-GZIP24*25* fd = open("/dev/crypto/nx-gzip", O_RDWR);26* rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr);27* paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL).28* vas_copy(&crb, 0, 1);29* vas_paste(paste_addr, 0, 1);30* close(fd) or exit process to close window.31*32* where "vas_copy" and "vas_paste" are defined in copy-paste.h.33* copy/paste returns to the user space directly. So refer NX hardware34* documentation for exact copy/paste usage and completion / error35* conditions.36*/3738/*39* Wrapper object for the nx-gzip device - there is just one instance of40* this node for the whole system.41*/42static struct coproc_dev {43struct cdev cdev;44struct device *device;45char *name;46dev_t devt;47struct class *class;48enum vas_cop_type cop_type;49const struct vas_user_win_ops *vops;50} coproc_device;5152struct coproc_instance {53struct coproc_dev *coproc;54struct vas_window *txwin;55};5657static char *coproc_devnode(const struct device *dev, umode_t *mode)58{59return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));60}6162/*63* Take reference to pid and mm64*/65int get_vas_user_win_ref(struct vas_user_win_ref *task_ref)66{67/*68* Window opened by a child thread may not be closed when69* it exits. So take reference to its pid and release it70* when the window is free by parent thread.71* Acquire a reference to the task's pid to make sure72* pid will not be re-used - needed only for multithread73* applications.74*/75task_ref->pid = get_task_pid(current, PIDTYPE_PID);76/*77* Acquire a reference to the task's mm.78*/79task_ref->mm = get_task_mm(current);80if (!task_ref->mm) {81put_pid(task_ref->pid);82pr_err("pid(%d): mm_struct is not found\n",83current->pid);84return -EPERM;85}8687mmgrab(task_ref->mm);88mmput(task_ref->mm);89/*90* Process closes window during exit. In the case of91* multithread application, the child thread can open92* window and can exit without closing it. So takes tgid93* reference until window closed to make sure tgid is not94* reused.95*/96task_ref->tgid = find_get_pid(task_tgid_vnr(current));9798return 0;99}100101/*102* Successful return must release the task reference with103* put_task_struct104*/105static bool ref_get_pid_and_task(struct vas_user_win_ref *task_ref,106struct task_struct **tskp, struct pid **pidp)107{108struct task_struct *tsk;109struct pid *pid;110111pid = task_ref->pid;112tsk = get_pid_task(pid, PIDTYPE_PID);113if (!tsk) {114pid = task_ref->tgid;115tsk = get_pid_task(pid, PIDTYPE_PID);116/*117* Parent thread (tgid) will be closing window when it118* exits. So should not get here.119*/120if (WARN_ON_ONCE(!tsk))121return false;122}123124/* Return if the task is exiting. */125if (tsk->flags & PF_EXITING) {126put_task_struct(tsk);127return false;128}129130*tskp = tsk;131*pidp = pid;132133return true;134}135136/*137* Update the CSB to indicate a translation error.138*139* User space will be polling on CSB after the request is issued.140* If NX can handle the request without any issues, it updates CSB.141* Whereas if NX encounters page fault, the kernel will handle the142* fault and update CSB with translation error.143*144* If we are unable to update the CSB means copy_to_user failed due to145* invalid csb_addr, send a signal to the process.146*/147void vas_update_csb(struct coprocessor_request_block *crb,148struct vas_user_win_ref *task_ref)149{150struct coprocessor_status_block csb;151struct kernel_siginfo info;152struct task_struct *tsk;153void __user *csb_addr;154struct pid *pid;155int rc;156157/*158* NX user space windows can not be opened for task->mm=NULL159* and faults will not be generated for kernel requests.160*/161if (WARN_ON_ONCE(!task_ref->mm))162return;163164csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);165166memset(&csb, 0, sizeof(csb));167csb.cc = CSB_CC_FAULT_ADDRESS;168csb.ce = CSB_CE_TERMINATION;169csb.cs = 0;170csb.count = 0;171172/*173* NX operates and returns in BE format as defined CRB struct.174* So saves fault_storage_addr in BE as NX pastes in FIFO and175* expects user space to convert to CPU format.176*/177csb.address = crb->stamp.nx.fault_storage_addr;178csb.flags = 0;179180/*181* Process closes send window after all pending NX requests are182* completed. In multi-thread applications, a child thread can183* open a window and can exit without closing it. May be some184* requests are pending or this window can be used by other185* threads later. We should handle faults if NX encounters186* pages faults on these requests. Update CSB with translation187* error and fault address. If csb_addr passed by user space is188* invalid, send SEGV signal to pid saved in window. If the189* child thread is not running, send the signal to tgid.190* Parent thread (tgid) will close this window upon its exit.191*192* pid and mm references are taken when window is opened by193* process (pid). So tgid is used only when child thread opens194* a window and exits without closing it.195*/196197if (!ref_get_pid_and_task(task_ref, &tsk, &pid))198return;199200kthread_use_mm(task_ref->mm);201rc = copy_to_user(csb_addr, &csb, sizeof(csb));202/*203* User space polls on csb.flags (first byte). So add barrier204* then copy first byte with csb flags update.205*/206if (!rc) {207csb.flags = CSB_V;208/* Make sure update to csb.flags is visible now */209smp_mb();210rc = copy_to_user(csb_addr, &csb, sizeof(u8));211}212kthread_unuse_mm(task_ref->mm);213put_task_struct(tsk);214215/* Success */216if (!rc)217return;218219220pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",221csb_addr, pid_vnr(pid));222223clear_siginfo(&info);224info.si_signo = SIGSEGV;225info.si_errno = EFAULT;226info.si_code = SEGV_MAPERR;227info.si_addr = csb_addr;228/*229* process will be polling on csb.flags after request is sent to230* NX. So generally CSB update should not fail except when an231* application passes invalid csb_addr. So an error message will232* be displayed and leave it to user space whether to ignore or233* handle this signal.234*/235rcu_read_lock();236rc = kill_pid_info(SIGSEGV, &info, pid);237rcu_read_unlock();238239pr_devel("pid %d kill_proc_info() rc %d\n", pid_vnr(pid), rc);240}241242void vas_dump_crb(struct coprocessor_request_block *crb)243{244struct data_descriptor_entry *dde;245struct nx_fault_stamp *nx;246247dde = &crb->source;248pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",249be64_to_cpu(dde->address), be32_to_cpu(dde->length),250dde->count, dde->index, dde->flags);251252dde = &crb->target;253pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",254be64_to_cpu(dde->address), be32_to_cpu(dde->length),255dde->count, dde->index, dde->flags);256257nx = &crb->stamp.nx;258pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",259be32_to_cpu(nx->pswid),260be64_to_cpu(crb->stamp.nx.fault_storage_addr),261nx->flags, nx->fault_status);262}263264static int coproc_open(struct inode *inode, struct file *fp)265{266struct coproc_instance *cp_inst;267268cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL);269if (!cp_inst)270return -ENOMEM;271272cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev,273cdev);274fp->private_data = cp_inst;275276return 0;277}278279static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)280{281void __user *uptr = (void __user *)arg;282struct vas_tx_win_open_attr uattr;283struct coproc_instance *cp_inst;284struct vas_window *txwin;285int rc;286287cp_inst = fp->private_data;288289/*290* One window for file descriptor291*/292if (cp_inst->txwin)293return -EEXIST;294295rc = copy_from_user(&uattr, uptr, sizeof(uattr));296if (rc) {297pr_err("copy_from_user() returns %d\n", rc);298return -EFAULT;299}300301if (uattr.version != 1) {302pr_err("Invalid window open API version\n");303return -EINVAL;304}305306if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->open_win) {307pr_err("VAS API is not registered\n");308return -EACCES;309}310311txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags,312cp_inst->coproc->cop_type);313if (IS_ERR(txwin)) {314pr_err_ratelimited("VAS window open failed rc=%ld\n",315PTR_ERR(txwin));316return PTR_ERR(txwin);317}318319mutex_init(&txwin->task_ref.mmap_mutex);320cp_inst->txwin = txwin;321322return 0;323}324325static int coproc_release(struct inode *inode, struct file *fp)326{327struct coproc_instance *cp_inst = fp->private_data;328int rc;329330if (cp_inst->txwin) {331if (cp_inst->coproc->vops &&332cp_inst->coproc->vops->close_win) {333rc = cp_inst->coproc->vops->close_win(cp_inst->txwin);334if (rc)335return rc;336}337cp_inst->txwin = NULL;338}339340kfree(cp_inst);341fp->private_data = NULL;342343/*344* We don't know here if user has other receive windows345* open, so we can't really call clear_thread_tidr().346* So, once the process calls set_thread_tidr(), the347* TIDR value sticks around until process exits, resulting348* in an extra copy in restore_sprs().349*/350351return 0;352}353354/*355* If the executed instruction that caused the fault was a paste, then356* clear regs CR0[EQ], advance NIP, and return 0. Else return error code.357*/358static int do_fail_paste(void)359{360struct pt_regs *regs = current->thread.regs;361u32 instword;362363if (WARN_ON_ONCE(!regs))364return -EINVAL;365366if (WARN_ON_ONCE(!user_mode(regs)))367return -EINVAL;368369/*370* If we couldn't translate the instruction, the driver should371* return success without handling the fault, it will be retried372* or the instruction fetch will fault.373*/374if (get_user(instword, (u32 __user *)(regs->nip)))375return -EAGAIN;376377/*378* Not a paste instruction, driver may fail the fault.379*/380if ((instword & PPC_INST_PASTE_MASK) != PPC_INST_PASTE)381return -ENOENT;382383regs->ccr &= ~0xe0000000; /* Clear CR0[0-2] to fail paste */384regs_add_return_ip(regs, 4); /* Emulate the paste */385386return 0;387}388389/*390* This fault handler is invoked when the core generates page fault on391* the paste address. Happens if the kernel closes window in hypervisor392* (on pseries) due to lost credit or the paste address is not mapped.393*/394static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)395{396struct vm_area_struct *vma = vmf->vma;397struct file *fp = vma->vm_file;398struct coproc_instance *cp_inst = fp->private_data;399struct vas_window *txwin;400vm_fault_t fault;401u64 paste_addr;402int ret;403404/*405* window is not opened. Shouldn't expect this error.406*/407if (!cp_inst || !cp_inst->txwin) {408pr_err("Unexpected fault on paste address with TX window closed\n");409return VM_FAULT_SIGBUS;410}411412txwin = cp_inst->txwin;413/*414* When the LPAR lost credits due to core removal or during415* migration, invalidate the existing mapping for the current416* paste addresses and set windows in-active (zap_vma_pages in417* reconfig_close_windows()).418* New mapping will be done later after migration or new credits419* available. So continue to receive faults if the user space420* issue NX request.421*/422if (txwin->task_ref.vma != vmf->vma) {423pr_err("No previous mapping with paste address\n");424return VM_FAULT_SIGBUS;425}426427/*428* The window may be inactive due to lost credit (Ex: core429* removal with DLPAR). If the window is active again when430* the credit is available, map the new paste address at the431* window virtual address.432*/433scoped_guard(mutex, &txwin->task_ref.mmap_mutex) {434if (txwin->status == VAS_WIN_ACTIVE) {435paste_addr = cp_inst->coproc->vops->paste_addr(txwin);436if (paste_addr) {437fault = vmf_insert_pfn(vma, vma->vm_start,438(paste_addr >> PAGE_SHIFT));439return fault;440}441}442}443444/*445* Received this fault due to closing the actual window.446* It can happen during migration or lost credits.447* Since no mapping, return the paste instruction failure448* to the user space.449*/450ret = do_fail_paste();451/*452* The user space can retry several times until success (needed453* for migration) or should fallback to SW compression or454* manage with the existing open windows if available.455* Looking at sysfs interface, it can determine whether these456* failures are coming during migration or core removal:457* nr_used_credits > nr_total_credits when lost credits458*/459if (!ret || (ret == -EAGAIN))460return VM_FAULT_NOPAGE;461462return VM_FAULT_SIGBUS;463}464465/*466* During mmap() paste address, mapping VMA is saved in VAS window467* struct which is used to unmap during migration if the window is468* still open. But the user space can remove this mapping with469* munmap() before closing the window and the VMA address will470* be invalid. Set VAS window VMA to NULL in this function which471* is called before VMA free.472*/473static void vas_mmap_close(struct vm_area_struct *vma)474{475struct file *fp = vma->vm_file;476struct coproc_instance *cp_inst = fp->private_data;477struct vas_window *txwin;478479/* Should not happen */480if (!cp_inst || !cp_inst->txwin) {481pr_err("No attached VAS window for the paste address mmap\n");482return;483}484485txwin = cp_inst->txwin;486/*487* task_ref.vma is set in coproc_mmap() during mmap paste488* address. So it has to be the same VMA that is getting freed.489*/490if (WARN_ON(txwin->task_ref.vma != vma)) {491pr_err("Invalid paste address mmaping\n");492return;493}494495scoped_guard(mutex, &txwin->task_ref.mmap_mutex)496txwin->task_ref.vma = NULL;497}498499static const struct vm_operations_struct vas_vm_ops = {500.close = vas_mmap_close,501.fault = vas_mmap_fault,502};503504static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)505{506struct coproc_instance *cp_inst = fp->private_data;507struct vas_window *txwin;508unsigned long pfn;509u64 paste_addr;510pgprot_t prot;511int rc;512513txwin = cp_inst->txwin;514515if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {516pr_debug("size 0x%zx, PAGE_SIZE 0x%zx\n",517(vma->vm_end - vma->vm_start), PAGE_SIZE);518return -EINVAL;519}520521/*522* Map complete page to the paste address. So the user523* space should pass 0ULL to the offset parameter.524*/525if (vma->vm_pgoff) {526pr_debug("Page offset unsupported to map paste address\n");527return -EINVAL;528}529530/* Ensure instance has an open send window */531if (!txwin) {532pr_err("No send window open?\n");533return -EINVAL;534}535536if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->paste_addr) {537pr_err("VAS API is not registered\n");538return -EACCES;539}540541/*542* The initial mmap is done after the window is opened543* with ioctl. But before mmap(), this window can be closed in544* the hypervisor due to lost credit (core removal on pseries).545* So if the window is not active, return mmap() failure with546* -EACCES and expects the user space reissue mmap() when it547* is active again or open new window when the credit is available.548* mmap_mutex protects the paste address mmap() with DLPAR549* close/open event and allows mmap() only when the window is550* active.551*/552guard(mutex)(&txwin->task_ref.mmap_mutex);553if (txwin->status != VAS_WIN_ACTIVE) {554pr_err("Window is not active\n");555return -EACCES;556}557558paste_addr = cp_inst->coproc->vops->paste_addr(txwin);559if (!paste_addr) {560pr_err("Window paste address failed\n");561return -EINVAL;562}563564pfn = paste_addr >> PAGE_SHIFT;565566/* flags, page_prot from cxl_mmap(), except we want cachable */567vm_flags_set(vma, VM_IO | VM_PFNMAP);568vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);569570prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY);571572rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,573vma->vm_end - vma->vm_start, prot);574575pr_devel("paste addr %llx at %lx, rc %d\n", paste_addr,576vma->vm_start, rc);577578txwin->task_ref.vma = vma;579vma->vm_ops = &vas_vm_ops;580581return rc;582}583584static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)585{586switch (cmd) {587case VAS_TX_WIN_OPEN:588return coproc_ioc_tx_win_open(fp, arg);589default:590return -EINVAL;591}592}593594static struct file_operations coproc_fops = {595.open = coproc_open,596.release = coproc_release,597.mmap = coproc_mmap,598.unlocked_ioctl = coproc_ioctl,599};600601/*602* Supporting only nx-gzip coprocessor type now, but this API code603* extended to other coprocessor types later.604*/605int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,606const char *name,607const struct vas_user_win_ops *vops)608{609int rc = -EINVAL;610dev_t devno;611612rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name);613if (rc) {614pr_err("Unable to allocate coproc major number: %i\n", rc);615return rc;616}617618pr_devel("%s device allocated, dev [%i,%i]\n", name,619MAJOR(coproc_device.devt), MINOR(coproc_device.devt));620621coproc_device.class = class_create(name);622if (IS_ERR(coproc_device.class)) {623rc = PTR_ERR(coproc_device.class);624pr_err("Unable to create %s class %d\n", name, rc);625goto err_class;626}627coproc_device.class->devnode = coproc_devnode;628coproc_device.cop_type = cop_type;629coproc_device.vops = vops;630631coproc_fops.owner = mod;632cdev_init(&coproc_device.cdev, &coproc_fops);633634devno = MKDEV(MAJOR(coproc_device.devt), 0);635rc = cdev_add(&coproc_device.cdev, devno, 1);636if (rc) {637pr_err("cdev_add() failed %d\n", rc);638goto err_cdev;639}640641coproc_device.device = device_create(coproc_device.class, NULL,642devno, NULL, name, MINOR(devno));643if (IS_ERR(coproc_device.device)) {644rc = PTR_ERR(coproc_device.device);645pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc);646goto err;647}648649pr_devel("Added dev [%d,%d]\n", MAJOR(devno), MINOR(devno));650651return 0;652653err:654cdev_del(&coproc_device.cdev);655err_cdev:656class_destroy(coproc_device.class);657err_class:658unregister_chrdev_region(coproc_device.devt, 1);659return rc;660}661662void vas_unregister_coproc_api(void)663{664dev_t devno;665666cdev_del(&coproc_device.cdev);667devno = MKDEV(MAJOR(coproc_device.devt), 0);668device_destroy(coproc_device.class, devno);669670class_destroy(coproc_device.class);671unregister_chrdev_region(coproc_device.devt, 1);672}673674675