Path: blob/master/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
26516 views
// SPDX-License-Identifier: GPL-2.0 OR MIT1/*2* Copyright 2014-2022 Advanced Micro Devices, Inc.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice shall be included in12* all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR18* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,19* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR20* OTHER DEALINGS IN THE SOFTWARE.21*/2223#include <linux/device.h>24#include <linux/err.h>25#include <linux/fs.h>26#include <linux/file.h>27#include <linux/sched.h>28#include <linux/slab.h>29#include <linux/uaccess.h>30#include <linux/compat.h>31#include <uapi/linux/kfd_ioctl.h>32#include <linux/time.h>33#include <linux/mm.h>34#include <linux/mman.h>35#include <linux/ptrace.h>36#include <linux/dma-buf.h>37#include <linux/processor.h>38#include "kfd_priv.h"39#include "kfd_device_queue_manager.h"40#include "kfd_svm.h"41#include "amdgpu_amdkfd.h"42#include "kfd_smi_events.h"43#include "amdgpu_dma_buf.h"44#include "kfd_debug.h"4546static long kfd_ioctl(struct file *, unsigned int, unsigned long);47static int kfd_open(struct inode *, struct file *);48static int kfd_release(struct inode *, struct file *);49static int kfd_mmap(struct file *, struct vm_area_struct *);5051static const char kfd_dev_name[] = "kfd";5253static const struct file_operations kfd_fops = {54.owner = THIS_MODULE,55.unlocked_ioctl = kfd_ioctl,56.compat_ioctl = compat_ptr_ioctl,57.open = kfd_open,58.release = kfd_release,59.mmap = kfd_mmap,60};6162static int kfd_char_dev_major = -1;63struct device *kfd_device;64static const struct class kfd_class = {65.name = kfd_dev_name,66};6768static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)69{70struct kfd_process_device *pdd;7172mutex_lock(&p->mutex);73pdd = kfd_process_device_data_by_id(p, gpu_id);7475if (pdd)76return pdd;7778mutex_unlock(&p->mutex);79return NULL;80}8182static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)83{84mutex_unlock(&pdd->process->mutex);85}8687int kfd_chardev_init(void)88{89int err = 0;9091kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);92err = kfd_char_dev_major;93if (err < 0)94goto err_register_chrdev;9596err = class_register(&kfd_class);97if (err)98goto err_class_create;99100kfd_device = device_create(&kfd_class, NULL,101MKDEV(kfd_char_dev_major, 0),102NULL, kfd_dev_name);103err = PTR_ERR(kfd_device);104if (IS_ERR(kfd_device))105goto err_device_create;106107return 0;108109err_device_create:110class_unregister(&kfd_class);111err_class_create:112unregister_chrdev(kfd_char_dev_major, kfd_dev_name);113err_register_chrdev:114return err;115}116117void kfd_chardev_exit(void)118{119device_destroy(&kfd_class, MKDEV(kfd_char_dev_major, 0));120class_unregister(&kfd_class);121unregister_chrdev(kfd_char_dev_major, kfd_dev_name);122kfd_device = NULL;123}124125126static int kfd_open(struct inode *inode, struct file *filep)127{128struct kfd_process *process;129bool is_32bit_user_mode;130131if (iminor(inode) != 0)132return -ENODEV;133134is_32bit_user_mode = in_compat_syscall();135136if (is_32bit_user_mode) {137dev_warn(kfd_device,138"Process %d (32-bit) failed to open /dev/kfd\n"139"32-bit processes are not supported by amdkfd\n",140current->pid);141return -EPERM;142}143144process = kfd_create_process(current);145if (IS_ERR(process))146return PTR_ERR(process);147148if (kfd_process_init_cwsr_apu(process, filep)) {149kfd_unref_process(process);150return -EFAULT;151}152153/* filep now owns the reference returned by kfd_create_process */154filep->private_data = process;155156dev_dbg(kfd_device, "process pid %d opened kfd node, compat mode (32 bit) - %d\n",157process->lead_thread->pid, process->is_32bit_user_mode);158159return 0;160}161162static int kfd_release(struct inode *inode, struct file *filep)163{164struct kfd_process *process = filep->private_data;165166if (process)167kfd_unref_process(process);168169return 0;170}171172static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,173void *data)174{175struct kfd_ioctl_get_version_args *args = data;176177args->major_version = KFD_IOCTL_MAJOR_VERSION;178args->minor_version = KFD_IOCTL_MINOR_VERSION;179180return 0;181}182183static int set_queue_properties_from_user(struct queue_properties *q_properties,184struct kfd_ioctl_create_queue_args *args)185{186/*187* Repurpose queue percentage to accommodate new features:188* bit 0-7: queue percentage189* bit 8-15: pm4_target_xcc190*/191if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {192pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");193return -EINVAL;194}195196if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {197pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");198return -EINVAL;199}200201if ((args->ring_base_address) &&202(!access_ok((const void __user *) args->ring_base_address,203sizeof(uint64_t)))) {204pr_err("Can't access ring base address\n");205return -EFAULT;206}207208if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {209pr_err("Ring size must be a power of 2 or 0\n");210return -EINVAL;211}212213if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {214args->ring_size = KFD_MIN_QUEUE_RING_SIZE;215pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");216}217218if (!access_ok((const void __user *) args->read_pointer_address,219sizeof(uint32_t))) {220pr_err("Can't access read pointer\n");221return -EFAULT;222}223224if (!access_ok((const void __user *) args->write_pointer_address,225sizeof(uint32_t))) {226pr_err("Can't access write pointer\n");227return -EFAULT;228}229230if (args->eop_buffer_address &&231!access_ok((const void __user *) args->eop_buffer_address,232sizeof(uint32_t))) {233pr_debug("Can't access eop buffer");234return -EFAULT;235}236237if (args->ctx_save_restore_address &&238!access_ok((const void __user *) args->ctx_save_restore_address,239sizeof(uint32_t))) {240pr_debug("Can't access ctx save restore buffer");241return -EFAULT;242}243244q_properties->is_interop = false;245q_properties->is_gws = false;246q_properties->queue_percent = args->queue_percentage & 0xFF;247/* bit 8-15 are repurposed to be PM4 target XCC */248q_properties->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;249q_properties->priority = args->queue_priority;250q_properties->queue_address = args->ring_base_address;251q_properties->queue_size = args->ring_size;252q_properties->read_ptr = (void __user *)args->read_pointer_address;253q_properties->write_ptr = (void __user *)args->write_pointer_address;254q_properties->eop_ring_buffer_address = args->eop_buffer_address;255q_properties->eop_ring_buffer_size = args->eop_buffer_size;256q_properties->ctx_save_restore_area_address =257args->ctx_save_restore_address;258q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;259q_properties->ctl_stack_size = args->ctl_stack_size;260q_properties->sdma_engine_id = args->sdma_engine_id;261if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||262args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)263q_properties->type = KFD_QUEUE_TYPE_COMPUTE;264else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)265q_properties->type = KFD_QUEUE_TYPE_SDMA;266else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)267q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;268else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID)269q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID;270else271return -ENOTSUPP;272273if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)274q_properties->format = KFD_QUEUE_FORMAT_AQL;275else276q_properties->format = KFD_QUEUE_FORMAT_PM4;277278pr_debug("Queue Percentage: %d, %d\n",279q_properties->queue_percent, args->queue_percentage);280281pr_debug("Queue Priority: %d, %d\n",282q_properties->priority, args->queue_priority);283284pr_debug("Queue Address: 0x%llX, 0x%llX\n",285q_properties->queue_address, args->ring_base_address);286287pr_debug("Queue Size: 0x%llX, %u\n",288q_properties->queue_size, args->ring_size);289290pr_debug("Queue r/w Pointers: %px, %px\n",291q_properties->read_ptr,292q_properties->write_ptr);293294pr_debug("Queue Format: %d\n", q_properties->format);295296pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);297298pr_debug("Queue CTX save area: 0x%llX\n",299q_properties->ctx_save_restore_area_address);300301return 0;302}303304static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,305void *data)306{307struct kfd_ioctl_create_queue_args *args = data;308struct kfd_node *dev;309int err = 0;310unsigned int queue_id;311struct kfd_process_device *pdd;312struct queue_properties q_properties;313uint32_t doorbell_offset_in_process = 0;314315memset(&q_properties, 0, sizeof(struct queue_properties));316317pr_debug("Creating queue ioctl\n");318319err = set_queue_properties_from_user(&q_properties, args);320if (err)321return err;322323pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);324325mutex_lock(&p->mutex);326327pdd = kfd_process_device_data_by_id(p, args->gpu_id);328if (!pdd) {329pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);330err = -EINVAL;331goto err_pdd;332}333dev = pdd->dev;334335pdd = kfd_bind_process_to_device(dev, p);336if (IS_ERR(pdd)) {337err = -ESRCH;338goto err_bind_process;339}340341if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) {342int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) +343kfd_get_num_xgmi_sdma_engines(dev) - 1;344345if (q_properties.sdma_engine_id > max_sdma_eng_id) {346err = -EINVAL;347pr_err("sdma_engine_id %i exceeds maximum id of %i\n",348q_properties.sdma_engine_id, max_sdma_eng_id);349goto err_sdma_engine_id;350}351}352353if (!pdd->qpd.proc_doorbells) {354err = kfd_alloc_process_doorbells(dev->kfd, pdd);355if (err) {356pr_debug("failed to allocate process doorbells\n");357goto err_bind_process;358}359}360361err = kfd_queue_acquire_buffers(pdd, &q_properties);362if (err) {363pr_debug("failed to acquire user queue buffers\n");364goto err_acquire_queue_buf;365}366367pr_debug("Creating queue for process pid %d on gpu 0x%x\n",368p->lead_thread->pid,369dev->id);370371err = pqm_create_queue(&p->pqm, dev, &q_properties, &queue_id,372NULL, NULL, NULL, &doorbell_offset_in_process);373if (err != 0)374goto err_create_queue;375376args->queue_id = queue_id;377378379/* Return gpu_id as doorbell offset for mmap usage */380args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;381args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);382if (KFD_IS_SOC15(dev))383/* On SOC15 ASICs, include the doorbell offset within the384* process doorbell frame, which is 2 pages.385*/386args->doorbell_offset |= doorbell_offset_in_process;387388mutex_unlock(&p->mutex);389390pr_debug("Queue id %d was created successfully\n", args->queue_id);391392pr_debug("Ring buffer address == 0x%016llX\n",393args->ring_base_address);394395pr_debug("Read ptr address == 0x%016llX\n",396args->read_pointer_address);397398pr_debug("Write ptr address == 0x%016llX\n",399args->write_pointer_address);400401kfd_dbg_ev_raise(KFD_EC_MASK(EC_QUEUE_NEW), p, dev, queue_id, false, NULL, 0);402return 0;403404err_create_queue:405kfd_queue_unref_bo_vas(pdd, &q_properties);406kfd_queue_release_buffers(pdd, &q_properties);407err_acquire_queue_buf:408err_sdma_engine_id:409err_bind_process:410err_pdd:411mutex_unlock(&p->mutex);412return err;413}414415static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,416void *data)417{418int retval;419struct kfd_ioctl_destroy_queue_args *args = data;420421pr_debug("Destroying queue id %d for process pid %d\n",422args->queue_id,423p->lead_thread->pid);424425mutex_lock(&p->mutex);426427retval = pqm_destroy_queue(&p->pqm, args->queue_id);428429mutex_unlock(&p->mutex);430return retval;431}432433static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,434void *data)435{436int retval;437struct kfd_ioctl_update_queue_args *args = data;438struct queue_properties properties;439440/*441* Repurpose queue percentage to accommodate new features:442* bit 0-7: queue percentage443* bit 8-15: pm4_target_xcc444*/445if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {446pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");447return -EINVAL;448}449450if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {451pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");452return -EINVAL;453}454455if ((args->ring_base_address) &&456(!access_ok((const void __user *) args->ring_base_address,457sizeof(uint64_t)))) {458pr_err("Can't access ring base address\n");459return -EFAULT;460}461462if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {463pr_err("Ring size must be a power of 2 or 0\n");464return -EINVAL;465}466467if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {468args->ring_size = KFD_MIN_QUEUE_RING_SIZE;469pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");470}471472properties.queue_address = args->ring_base_address;473properties.queue_size = args->ring_size;474properties.queue_percent = args->queue_percentage & 0xFF;475/* bit 8-15 are repurposed to be PM4 target XCC */476properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;477properties.priority = args->queue_priority;478479pr_debug("Updating queue id %d for process pid %d\n",480args->queue_id, p->lead_thread->pid);481482mutex_lock(&p->mutex);483484retval = pqm_update_queue_properties(&p->pqm, args->queue_id, &properties);485486mutex_unlock(&p->mutex);487488return retval;489}490491static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,492void *data)493{494int retval;495const int max_num_cus = 1024;496struct kfd_ioctl_set_cu_mask_args *args = data;497struct mqd_update_info minfo = {0};498uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;499size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);500501if ((args->num_cu_mask % 32) != 0) {502pr_debug("num_cu_mask 0x%x must be a multiple of 32",503args->num_cu_mask);504return -EINVAL;505}506507minfo.cu_mask.count = args->num_cu_mask;508if (minfo.cu_mask.count == 0) {509pr_debug("CU mask cannot be 0");510return -EINVAL;511}512513/* To prevent an unreasonably large CU mask size, set an arbitrary514* limit of max_num_cus bits. We can then just drop any CU mask bits515* past max_num_cus bits and just use the first max_num_cus bits.516*/517if (minfo.cu_mask.count > max_num_cus) {518pr_debug("CU mask cannot be greater than 1024 bits");519minfo.cu_mask.count = max_num_cus;520cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);521}522523minfo.cu_mask.ptr = kzalloc(cu_mask_size, GFP_KERNEL);524if (!minfo.cu_mask.ptr)525return -ENOMEM;526527retval = copy_from_user(minfo.cu_mask.ptr, cu_mask_ptr, cu_mask_size);528if (retval) {529pr_debug("Could not copy CU mask from userspace");530retval = -EFAULT;531goto out;532}533534mutex_lock(&p->mutex);535536retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo);537538mutex_unlock(&p->mutex);539540out:541kfree(minfo.cu_mask.ptr);542return retval;543}544545static int kfd_ioctl_get_queue_wave_state(struct file *filep,546struct kfd_process *p, void *data)547{548struct kfd_ioctl_get_queue_wave_state_args *args = data;549int r;550551mutex_lock(&p->mutex);552553r = pqm_get_wave_state(&p->pqm, args->queue_id,554(void __user *)args->ctl_stack_address,555&args->ctl_stack_used_size,556&args->save_area_used_size);557558mutex_unlock(&p->mutex);559560return r;561}562563static int kfd_ioctl_set_memory_policy(struct file *filep,564struct kfd_process *p, void *data)565{566struct kfd_ioctl_set_memory_policy_args *args = data;567int err = 0;568struct kfd_process_device *pdd;569enum cache_policy default_policy, alternate_policy;570571if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT572&& args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {573return -EINVAL;574}575576if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT577&& args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {578return -EINVAL;579}580581mutex_lock(&p->mutex);582pdd = kfd_process_device_data_by_id(p, args->gpu_id);583if (!pdd) {584pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);585err = -EINVAL;586goto err_pdd;587}588589pdd = kfd_bind_process_to_device(pdd->dev, p);590if (IS_ERR(pdd)) {591err = -ESRCH;592goto out;593}594595default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)596? cache_policy_coherent : cache_policy_noncoherent;597598alternate_policy =599(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)600? cache_policy_coherent : cache_policy_noncoherent;601602if (!pdd->dev->dqm->ops.set_cache_memory_policy(pdd->dev->dqm,603&pdd->qpd,604default_policy,605alternate_policy,606(void __user *)args->alternate_aperture_base,607args->alternate_aperture_size,608args->misc_process_flag))609err = -EINVAL;610611out:612err_pdd:613mutex_unlock(&p->mutex);614615return err;616}617618static int kfd_ioctl_set_trap_handler(struct file *filep,619struct kfd_process *p, void *data)620{621struct kfd_ioctl_set_trap_handler_args *args = data;622int err = 0;623struct kfd_process_device *pdd;624625mutex_lock(&p->mutex);626627pdd = kfd_process_device_data_by_id(p, args->gpu_id);628if (!pdd) {629err = -EINVAL;630goto err_pdd;631}632633pdd = kfd_bind_process_to_device(pdd->dev, p);634if (IS_ERR(pdd)) {635err = -ESRCH;636goto out;637}638639kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);640641out:642err_pdd:643mutex_unlock(&p->mutex);644645return err;646}647648static int kfd_ioctl_dbg_register(struct file *filep,649struct kfd_process *p, void *data)650{651return -EPERM;652}653654static int kfd_ioctl_dbg_unregister(struct file *filep,655struct kfd_process *p, void *data)656{657return -EPERM;658}659660static int kfd_ioctl_dbg_address_watch(struct file *filep,661struct kfd_process *p, void *data)662{663return -EPERM;664}665666/* Parse and generate fixed size data structure for wave control */667static int kfd_ioctl_dbg_wave_control(struct file *filep,668struct kfd_process *p, void *data)669{670return -EPERM;671}672673static int kfd_ioctl_get_clock_counters(struct file *filep,674struct kfd_process *p, void *data)675{676struct kfd_ioctl_get_clock_counters_args *args = data;677struct kfd_process_device *pdd;678679mutex_lock(&p->mutex);680pdd = kfd_process_device_data_by_id(p, args->gpu_id);681mutex_unlock(&p->mutex);682if (pdd)683/* Reading GPU clock counter from KGD */684args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(pdd->dev->adev);685else686/* Node without GPU resource */687args->gpu_clock_counter = 0;688689/* No access to rdtsc. Using raw monotonic time */690args->cpu_clock_counter = ktime_get_raw_ns();691args->system_clock_counter = ktime_get_boottime_ns();692693/* Since the counter is in nano-seconds we use 1GHz frequency */694args->system_clock_freq = 1000000000;695696return 0;697}698699700static int kfd_ioctl_get_process_apertures(struct file *filp,701struct kfd_process *p, void *data)702{703struct kfd_ioctl_get_process_apertures_args *args = data;704struct kfd_process_device_apertures *pAperture;705int i;706707dev_dbg(kfd_device, "get apertures for process pid %d", p->lead_thread->pid);708709args->num_of_nodes = 0;710711mutex_lock(&p->mutex);712/* Run over all pdd of the process */713for (i = 0; i < p->n_pdds; i++) {714struct kfd_process_device *pdd = p->pdds[i];715716pAperture =717&args->process_apertures[args->num_of_nodes];718pAperture->gpu_id = pdd->dev->id;719pAperture->lds_base = pdd->lds_base;720pAperture->lds_limit = pdd->lds_limit;721pAperture->gpuvm_base = pdd->gpuvm_base;722pAperture->gpuvm_limit = pdd->gpuvm_limit;723pAperture->scratch_base = pdd->scratch_base;724pAperture->scratch_limit = pdd->scratch_limit;725726dev_dbg(kfd_device,727"node id %u\n", args->num_of_nodes);728dev_dbg(kfd_device,729"gpu id %u\n", pdd->dev->id);730dev_dbg(kfd_device,731"lds_base %llX\n", pdd->lds_base);732dev_dbg(kfd_device,733"lds_limit %llX\n", pdd->lds_limit);734dev_dbg(kfd_device,735"gpuvm_base %llX\n", pdd->gpuvm_base);736dev_dbg(kfd_device,737"gpuvm_limit %llX\n", pdd->gpuvm_limit);738dev_dbg(kfd_device,739"scratch_base %llX\n", pdd->scratch_base);740dev_dbg(kfd_device,741"scratch_limit %llX\n", pdd->scratch_limit);742743if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS)744break;745}746mutex_unlock(&p->mutex);747748return 0;749}750751static int kfd_ioctl_get_process_apertures_new(struct file *filp,752struct kfd_process *p, void *data)753{754struct kfd_ioctl_get_process_apertures_new_args *args = data;755struct kfd_process_device_apertures *pa;756int ret;757int i;758759dev_dbg(kfd_device, "get apertures for process pid %d",760p->lead_thread->pid);761762if (args->num_of_nodes == 0) {763/* Return number of nodes, so that user space can alloacate764* sufficient memory765*/766mutex_lock(&p->mutex);767args->num_of_nodes = p->n_pdds;768goto out_unlock;769}770771/* Fill in process-aperture information for all available772* nodes, but not more than args->num_of_nodes as that is773* the amount of memory allocated by user774*/775pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures),776GFP_KERNEL);777if (!pa)778return -ENOMEM;779780mutex_lock(&p->mutex);781782if (!p->n_pdds) {783args->num_of_nodes = 0;784kfree(pa);785goto out_unlock;786}787788/* Run over all pdd of the process */789for (i = 0; i < min(p->n_pdds, args->num_of_nodes); i++) {790struct kfd_process_device *pdd = p->pdds[i];791792pa[i].gpu_id = pdd->dev->id;793pa[i].lds_base = pdd->lds_base;794pa[i].lds_limit = pdd->lds_limit;795pa[i].gpuvm_base = pdd->gpuvm_base;796pa[i].gpuvm_limit = pdd->gpuvm_limit;797pa[i].scratch_base = pdd->scratch_base;798pa[i].scratch_limit = pdd->scratch_limit;799800dev_dbg(kfd_device,801"gpu id %u\n", pdd->dev->id);802dev_dbg(kfd_device,803"lds_base %llX\n", pdd->lds_base);804dev_dbg(kfd_device,805"lds_limit %llX\n", pdd->lds_limit);806dev_dbg(kfd_device,807"gpuvm_base %llX\n", pdd->gpuvm_base);808dev_dbg(kfd_device,809"gpuvm_limit %llX\n", pdd->gpuvm_limit);810dev_dbg(kfd_device,811"scratch_base %llX\n", pdd->scratch_base);812dev_dbg(kfd_device,813"scratch_limit %llX\n", pdd->scratch_limit);814}815mutex_unlock(&p->mutex);816817args->num_of_nodes = i;818ret = copy_to_user(819(void __user *)args->kfd_process_device_apertures_ptr,820pa,821(i * sizeof(struct kfd_process_device_apertures)));822kfree(pa);823return ret ? -EFAULT : 0;824825out_unlock:826mutex_unlock(&p->mutex);827return 0;828}829830static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,831void *data)832{833struct kfd_ioctl_create_event_args *args = data;834int err;835836/* For dGPUs the event page is allocated in user mode. The837* handle is passed to KFD with the first call to this IOCTL838* through the event_page_offset field.839*/840if (args->event_page_offset) {841mutex_lock(&p->mutex);842err = kfd_kmap_event_page(p, args->event_page_offset);843mutex_unlock(&p->mutex);844if (err)845return err;846}847848err = kfd_event_create(filp, p, args->event_type,849args->auto_reset != 0, args->node_id,850&args->event_id, &args->event_trigger_data,851&args->event_page_offset,852&args->event_slot_index);853854pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);855return err;856}857858static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,859void *data)860{861struct kfd_ioctl_destroy_event_args *args = data;862863return kfd_event_destroy(p, args->event_id);864}865866static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,867void *data)868{869struct kfd_ioctl_set_event_args *args = data;870871return kfd_set_event(p, args->event_id);872}873874static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,875void *data)876{877struct kfd_ioctl_reset_event_args *args = data;878879return kfd_reset_event(p, args->event_id);880}881882static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,883void *data)884{885struct kfd_ioctl_wait_events_args *args = data;886887return kfd_wait_on_events(p, args->num_events,888(void __user *)args->events_ptr,889(args->wait_for_all != 0),890&args->timeout, &args->wait_result);891}892static int kfd_ioctl_set_scratch_backing_va(struct file *filep,893struct kfd_process *p, void *data)894{895struct kfd_ioctl_set_scratch_backing_va_args *args = data;896struct kfd_process_device *pdd;897struct kfd_node *dev;898long err;899900mutex_lock(&p->mutex);901pdd = kfd_process_device_data_by_id(p, args->gpu_id);902if (!pdd) {903err = -EINVAL;904goto err_pdd;905}906dev = pdd->dev;907908pdd = kfd_bind_process_to_device(dev, p);909if (IS_ERR(pdd)) {910err = PTR_ERR(pdd);911goto bind_process_to_device_fail;912}913914pdd->qpd.sh_hidden_private_base = args->va_addr;915916mutex_unlock(&p->mutex);917918if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&919pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)920dev->kfd2kgd->set_scratch_backing_va(921dev->adev, args->va_addr, pdd->qpd.vmid);922923return 0;924925bind_process_to_device_fail:926err_pdd:927mutex_unlock(&p->mutex);928return err;929}930931static int kfd_ioctl_get_tile_config(struct file *filep,932struct kfd_process *p, void *data)933{934struct kfd_ioctl_get_tile_config_args *args = data;935struct kfd_process_device *pdd;936struct tile_config config;937int err = 0;938939mutex_lock(&p->mutex);940pdd = kfd_process_device_data_by_id(p, args->gpu_id);941mutex_unlock(&p->mutex);942if (!pdd)943return -EINVAL;944945amdgpu_amdkfd_get_tile_config(pdd->dev->adev, &config);946947args->gb_addr_config = config.gb_addr_config;948args->num_banks = config.num_banks;949args->num_ranks = config.num_ranks;950951if (args->num_tile_configs > config.num_tile_configs)952args->num_tile_configs = config.num_tile_configs;953err = copy_to_user((void __user *)args->tile_config_ptr,954config.tile_config_ptr,955args->num_tile_configs * sizeof(uint32_t));956if (err) {957args->num_tile_configs = 0;958return -EFAULT;959}960961if (args->num_macro_tile_configs > config.num_macro_tile_configs)962args->num_macro_tile_configs =963config.num_macro_tile_configs;964err = copy_to_user((void __user *)args->macro_tile_config_ptr,965config.macro_tile_config_ptr,966args->num_macro_tile_configs * sizeof(uint32_t));967if (err) {968args->num_macro_tile_configs = 0;969return -EFAULT;970}971972return 0;973}974975static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,976void *data)977{978struct kfd_ioctl_acquire_vm_args *args = data;979struct kfd_process_device *pdd;980struct file *drm_file;981int ret;982983drm_file = fget(args->drm_fd);984if (!drm_file)985return -EINVAL;986987mutex_lock(&p->mutex);988pdd = kfd_process_device_data_by_id(p, args->gpu_id);989if (!pdd) {990ret = -EINVAL;991goto err_pdd;992}993994if (pdd->drm_file) {995ret = pdd->drm_file == drm_file ? 0 : -EBUSY;996goto err_drm_file;997}998999ret = kfd_process_device_init_vm(pdd, drm_file);1000if (ret)1001goto err_unlock;10021003/* On success, the PDD keeps the drm_file reference */1004mutex_unlock(&p->mutex);10051006return 0;10071008err_unlock:1009err_pdd:1010err_drm_file:1011mutex_unlock(&p->mutex);1012fput(drm_file);1013return ret;1014}10151016bool kfd_dev_is_large_bar(struct kfd_node *dev)1017{1018if (dev->kfd->adev->debug_largebar) {1019pr_debug("Simulate large-bar allocation on non large-bar machine\n");1020return true;1021}10221023if (dev->local_mem_info.local_mem_size_private == 0 &&1024dev->local_mem_info.local_mem_size_public > 0)1025return true;10261027if (dev->local_mem_info.local_mem_size_public == 0 &&1028dev->kfd->adev->gmc.is_app_apu) {1029pr_debug("APP APU, Consider like a large bar system\n");1030return true;1031}10321033return false;1034}10351036static int kfd_ioctl_get_available_memory(struct file *filep,1037struct kfd_process *p, void *data)1038{1039struct kfd_ioctl_get_available_memory_args *args = data;1040struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);10411042if (!pdd)1043return -EINVAL;1044args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev,1045pdd->dev->node_id);1046kfd_unlock_pdd(pdd);1047return 0;1048}10491050static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,1051struct kfd_process *p, void *data)1052{1053struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;1054struct kfd_process_device *pdd;1055void *mem;1056struct kfd_node *dev;1057int idr_handle;1058long err;1059uint64_t offset = args->mmap_offset;1060uint32_t flags = args->flags;10611062if (args->size == 0)1063return -EINVAL;10641065#if IS_ENABLED(CONFIG_HSA_AMD_SVM)1066/* Flush pending deferred work to avoid racing with deferred actions1067* from previous memory map changes (e.g. munmap).1068*/1069svm_range_list_lock_and_flush_work(&p->svms, current->mm);1070mutex_lock(&p->svms.lock);1071mmap_write_unlock(current->mm);1072if (interval_tree_iter_first(&p->svms.objects,1073args->va_addr >> PAGE_SHIFT,1074(args->va_addr + args->size - 1) >> PAGE_SHIFT)) {1075pr_err("Address: 0x%llx already allocated by SVM\n",1076args->va_addr);1077mutex_unlock(&p->svms.lock);1078return -EADDRINUSE;1079}10801081/* When register user buffer check if it has been registered by svm by1082* buffer cpu virtual address.1083*/1084if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) &&1085interval_tree_iter_first(&p->svms.objects,1086args->mmap_offset >> PAGE_SHIFT,1087(args->mmap_offset + args->size - 1) >> PAGE_SHIFT)) {1088pr_err("User Buffer Address: 0x%llx already allocated by SVM\n",1089args->mmap_offset);1090mutex_unlock(&p->svms.lock);1091return -EADDRINUSE;1092}10931094mutex_unlock(&p->svms.lock);1095#endif1096mutex_lock(&p->mutex);1097pdd = kfd_process_device_data_by_id(p, args->gpu_id);1098if (!pdd) {1099err = -EINVAL;1100goto err_pdd;1101}11021103dev = pdd->dev;11041105if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&1106(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&1107!kfd_dev_is_large_bar(dev)) {1108pr_err("Alloc host visible vram on small bar is not allowed\n");1109err = -EINVAL;1110goto err_large_bar;1111}11121113pdd = kfd_bind_process_to_device(dev, p);1114if (IS_ERR(pdd)) {1115err = PTR_ERR(pdd);1116goto err_unlock;1117}11181119if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {1120if (args->size != kfd_doorbell_process_slice(dev->kfd)) {1121err = -EINVAL;1122goto err_unlock;1123}1124offset = kfd_get_process_doorbells(pdd);1125if (!offset) {1126err = -ENOMEM;1127goto err_unlock;1128}1129} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {1130if (args->size != PAGE_SIZE) {1131err = -EINVAL;1132goto err_unlock;1133}1134offset = dev->adev->rmmio_remap.bus_addr;1135if (!offset || (PAGE_SIZE > 4096)) {1136err = -ENOMEM;1137goto err_unlock;1138}1139}11401141err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(1142dev->adev, args->va_addr, args->size,1143pdd->drm_priv, (struct kgd_mem **) &mem, &offset,1144flags, false);11451146if (err)1147goto err_unlock;11481149idr_handle = kfd_process_device_create_obj_handle(pdd, mem);1150if (idr_handle < 0) {1151err = -EFAULT;1152goto err_free;1153}11541155/* Update the VRAM usage count */1156if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {1157uint64_t size = args->size;11581159if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)1160size >>= 1;1161atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);1162}11631164mutex_unlock(&p->mutex);11651166args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);1167args->mmap_offset = offset;11681169/* MMIO is mapped through kfd device1170* Generate a kfd mmap offset1171*/1172if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)1173args->mmap_offset = KFD_MMAP_TYPE_MMIO1174| KFD_MMAP_GPU_ID(args->gpu_id);11751176return 0;11771178err_free:1179amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,1180pdd->drm_priv, NULL);1181err_unlock:1182err_pdd:1183err_large_bar:1184mutex_unlock(&p->mutex);1185return err;1186}11871188static int kfd_ioctl_free_memory_of_gpu(struct file *filep,1189struct kfd_process *p, void *data)1190{1191struct kfd_ioctl_free_memory_of_gpu_args *args = data;1192struct kfd_process_device *pdd;1193void *mem;1194int ret;1195uint64_t size = 0;11961197mutex_lock(&p->mutex);1198/*1199* Safeguard to prevent user space from freeing signal BO.1200* It will be freed at process termination.1201*/1202if (p->signal_handle && (p->signal_handle == args->handle)) {1203pr_err("Free signal BO is not allowed\n");1204ret = -EPERM;1205goto err_unlock;1206}12071208pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));1209if (!pdd) {1210pr_err("Process device data doesn't exist\n");1211ret = -EINVAL;1212goto err_pdd;1213}12141215mem = kfd_process_device_translate_handle(1216pdd, GET_IDR_HANDLE(args->handle));1217if (!mem) {1218ret = -EINVAL;1219goto err_unlock;1220}12211222ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev,1223(struct kgd_mem *)mem, pdd->drm_priv, &size);12241225/* If freeing the buffer failed, leave the handle in place for1226* clean-up during process tear-down.1227*/1228if (!ret)1229kfd_process_device_remove_obj_handle(1230pdd, GET_IDR_HANDLE(args->handle));12311232atomic64_sub(size, &pdd->vram_usage);12331234err_unlock:1235err_pdd:1236mutex_unlock(&p->mutex);1237return ret;1238}12391240static int kfd_ioctl_map_memory_to_gpu(struct file *filep,1241struct kfd_process *p, void *data)1242{1243struct kfd_ioctl_map_memory_to_gpu_args *args = data;1244struct kfd_process_device *pdd, *peer_pdd;1245void *mem;1246struct kfd_node *dev;1247long err = 0;1248int i;1249uint32_t *devices_arr = NULL;12501251if (!args->n_devices) {1252pr_debug("Device IDs array empty\n");1253return -EINVAL;1254}1255if (args->n_success > args->n_devices) {1256pr_debug("n_success exceeds n_devices\n");1257return -EINVAL;1258}12591260devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),1261GFP_KERNEL);1262if (!devices_arr)1263return -ENOMEM;12641265err = copy_from_user(devices_arr,1266(void __user *)args->device_ids_array_ptr,1267args->n_devices * sizeof(*devices_arr));1268if (err != 0) {1269err = -EFAULT;1270goto copy_from_user_failed;1271}12721273mutex_lock(&p->mutex);1274pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));1275if (!pdd) {1276err = -EINVAL;1277goto get_process_device_data_failed;1278}1279dev = pdd->dev;12801281pdd = kfd_bind_process_to_device(dev, p);1282if (IS_ERR(pdd)) {1283err = PTR_ERR(pdd);1284goto bind_process_to_device_failed;1285}12861287mem = kfd_process_device_translate_handle(pdd,1288GET_IDR_HANDLE(args->handle));1289if (!mem) {1290err = -ENOMEM;1291goto get_mem_obj_from_handle_failed;1292}12931294for (i = args->n_success; i < args->n_devices; i++) {1295peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);1296if (!peer_pdd) {1297pr_debug("Getting device by id failed for 0x%x\n",1298devices_arr[i]);1299err = -EINVAL;1300goto get_mem_obj_from_handle_failed;1301}13021303peer_pdd = kfd_bind_process_to_device(peer_pdd->dev, p);1304if (IS_ERR(peer_pdd)) {1305err = PTR_ERR(peer_pdd);1306goto get_mem_obj_from_handle_failed;1307}13081309err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(1310peer_pdd->dev->adev, (struct kgd_mem *)mem,1311peer_pdd->drm_priv);1312if (err) {1313struct pci_dev *pdev = peer_pdd->dev->adev->pdev;13141315dev_err(dev->adev->dev,1316"Failed to map peer:%04x:%02x:%02x.%d mem_domain:%d\n",1317pci_domain_nr(pdev->bus),1318pdev->bus->number,1319PCI_SLOT(pdev->devfn),1320PCI_FUNC(pdev->devfn),1321((struct kgd_mem *)mem)->domain);1322goto map_memory_to_gpu_failed;1323}1324args->n_success = i+1;1325}13261327err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);1328if (err) {1329pr_debug("Sync memory failed, wait interrupted by user signal\n");1330goto sync_memory_failed;1331}13321333mutex_unlock(&p->mutex);13341335/* Flush TLBs after waiting for the page table updates to complete */1336for (i = 0; i < args->n_devices; i++) {1337peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);1338if (WARN_ON_ONCE(!peer_pdd))1339continue;1340kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);1341}1342kfree(devices_arr);13431344return err;13451346get_process_device_data_failed:1347bind_process_to_device_failed:1348get_mem_obj_from_handle_failed:1349map_memory_to_gpu_failed:1350sync_memory_failed:1351mutex_unlock(&p->mutex);1352copy_from_user_failed:1353kfree(devices_arr);13541355return err;1356}13571358static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,1359struct kfd_process *p, void *data)1360{1361struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;1362struct kfd_process_device *pdd, *peer_pdd;1363void *mem;1364long err = 0;1365uint32_t *devices_arr = NULL, i;1366bool flush_tlb;13671368if (!args->n_devices) {1369pr_debug("Device IDs array empty\n");1370return -EINVAL;1371}1372if (args->n_success > args->n_devices) {1373pr_debug("n_success exceeds n_devices\n");1374return -EINVAL;1375}13761377devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),1378GFP_KERNEL);1379if (!devices_arr)1380return -ENOMEM;13811382err = copy_from_user(devices_arr,1383(void __user *)args->device_ids_array_ptr,1384args->n_devices * sizeof(*devices_arr));1385if (err != 0) {1386err = -EFAULT;1387goto copy_from_user_failed;1388}13891390mutex_lock(&p->mutex);1391pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));1392if (!pdd) {1393err = -EINVAL;1394goto bind_process_to_device_failed;1395}13961397mem = kfd_process_device_translate_handle(pdd,1398GET_IDR_HANDLE(args->handle));1399if (!mem) {1400err = -ENOMEM;1401goto get_mem_obj_from_handle_failed;1402}14031404for (i = args->n_success; i < args->n_devices; i++) {1405peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);1406if (!peer_pdd) {1407err = -EINVAL;1408goto get_mem_obj_from_handle_failed;1409}1410err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(1411peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);1412if (err) {1413pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices);1414goto unmap_memory_from_gpu_failed;1415}1416args->n_success = i+1;1417}14181419flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd);1420if (flush_tlb) {1421err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,1422(struct kgd_mem *) mem, true);1423if (err) {1424pr_debug("Sync memory failed, wait interrupted by user signal\n");1425goto sync_memory_failed;1426}1427}14281429/* Flush TLBs after waiting for the page table updates to complete */1430for (i = 0; i < args->n_devices; i++) {1431peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);1432if (WARN_ON_ONCE(!peer_pdd))1433continue;1434if (flush_tlb)1435kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);14361437/* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */1438err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);1439if (err)1440goto sync_memory_failed;1441}14421443mutex_unlock(&p->mutex);14441445kfree(devices_arr);14461447return 0;14481449bind_process_to_device_failed:1450get_mem_obj_from_handle_failed:1451unmap_memory_from_gpu_failed:1452sync_memory_failed:1453mutex_unlock(&p->mutex);1454copy_from_user_failed:1455kfree(devices_arr);1456return err;1457}14581459static int kfd_ioctl_alloc_queue_gws(struct file *filep,1460struct kfd_process *p, void *data)1461{1462int retval;1463struct kfd_ioctl_alloc_queue_gws_args *args = data;1464struct queue *q;1465struct kfd_node *dev;14661467mutex_lock(&p->mutex);1468q = pqm_get_user_queue(&p->pqm, args->queue_id);14691470if (q) {1471dev = q->device;1472} else {1473retval = -EINVAL;1474goto out_unlock;1475}14761477if (!dev->gws) {1478retval = -ENODEV;1479goto out_unlock;1480}14811482if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {1483retval = -ENODEV;1484goto out_unlock;1485}14861487if (p->debug_trap_enabled && (!kfd_dbg_has_gws_support(dev) ||1488kfd_dbg_has_cwsr_workaround(dev))) {1489retval = -EBUSY;1490goto out_unlock;1491}14921493retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);1494mutex_unlock(&p->mutex);14951496args->first_gws = 0;1497return retval;14981499out_unlock:1500mutex_unlock(&p->mutex);1501return retval;1502}15031504static int kfd_ioctl_get_dmabuf_info(struct file *filep,1505struct kfd_process *p, void *data)1506{1507struct kfd_ioctl_get_dmabuf_info_args *args = data;1508struct kfd_node *dev = NULL;1509struct amdgpu_device *dmabuf_adev;1510void *metadata_buffer = NULL;1511uint32_t flags;1512int8_t xcp_id;1513unsigned int i;1514int r;15151516/* Find a KFD GPU device that supports the get_dmabuf_info query */1517for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)1518if (dev && !kfd_devcgroup_check_permission(dev))1519break;1520if (!dev)1521return -EINVAL;15221523if (args->metadata_ptr) {1524metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);1525if (!metadata_buffer)1526return -ENOMEM;1527}15281529/* Get dmabuf info from KGD */1530r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd,1531&dmabuf_adev, &args->size,1532metadata_buffer, args->metadata_size,1533&args->metadata_size, &flags, &xcp_id);1534if (r)1535goto exit;15361537if (xcp_id >= 0)1538args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id;1539else1540args->gpu_id = dev->id;1541args->flags = flags;15421543/* Copy metadata buffer to user mode */1544if (metadata_buffer) {1545r = copy_to_user((void __user *)args->metadata_ptr,1546metadata_buffer, args->metadata_size);1547if (r != 0)1548r = -EFAULT;1549}15501551exit:1552kfree(metadata_buffer);15531554return r;1555}15561557static int kfd_ioctl_import_dmabuf(struct file *filep,1558struct kfd_process *p, void *data)1559{1560struct kfd_ioctl_import_dmabuf_args *args = data;1561struct kfd_process_device *pdd;1562int idr_handle;1563uint64_t size;1564void *mem;1565int r;15661567mutex_lock(&p->mutex);1568pdd = kfd_process_device_data_by_id(p, args->gpu_id);1569if (!pdd) {1570r = -EINVAL;1571goto err_unlock;1572}15731574pdd = kfd_bind_process_to_device(pdd->dev, p);1575if (IS_ERR(pdd)) {1576r = PTR_ERR(pdd);1577goto err_unlock;1578}15791580r = amdgpu_amdkfd_gpuvm_import_dmabuf_fd(pdd->dev->adev, args->dmabuf_fd,1581args->va_addr, pdd->drm_priv,1582(struct kgd_mem **)&mem, &size,1583NULL);1584if (r)1585goto err_unlock;15861587idr_handle = kfd_process_device_create_obj_handle(pdd, mem);1588if (idr_handle < 0) {1589r = -EFAULT;1590goto err_free;1591}15921593mutex_unlock(&p->mutex);15941595args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);15961597return 0;15981599err_free:1600amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, (struct kgd_mem *)mem,1601pdd->drm_priv, NULL);1602err_unlock:1603mutex_unlock(&p->mutex);1604return r;1605}16061607static int kfd_ioctl_export_dmabuf(struct file *filep,1608struct kfd_process *p, void *data)1609{1610struct kfd_ioctl_export_dmabuf_args *args = data;1611struct kfd_process_device *pdd;1612struct dma_buf *dmabuf;1613struct kfd_node *dev;1614void *mem;1615int ret = 0;16161617dev = kfd_device_by_id(GET_GPU_ID(args->handle));1618if (!dev)1619return -EINVAL;16201621mutex_lock(&p->mutex);16221623pdd = kfd_get_process_device_data(dev, p);1624if (!pdd) {1625ret = -EINVAL;1626goto err_unlock;1627}16281629mem = kfd_process_device_translate_handle(pdd,1630GET_IDR_HANDLE(args->handle));1631if (!mem) {1632ret = -EINVAL;1633goto err_unlock;1634}16351636ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);1637mutex_unlock(&p->mutex);1638if (ret)1639goto err_out;16401641ret = dma_buf_fd(dmabuf, args->flags);1642if (ret < 0) {1643dma_buf_put(dmabuf);1644goto err_out;1645}1646/* dma_buf_fd assigns the reference count to the fd, no need to1647* put the reference here.1648*/1649args->dmabuf_fd = ret;16501651return 0;16521653err_unlock:1654mutex_unlock(&p->mutex);1655err_out:1656return ret;1657}16581659/* Handle requests for watching SMI events */1660static int kfd_ioctl_smi_events(struct file *filep,1661struct kfd_process *p, void *data)1662{1663struct kfd_ioctl_smi_events_args *args = data;1664struct kfd_process_device *pdd;16651666mutex_lock(&p->mutex);16671668pdd = kfd_process_device_data_by_id(p, args->gpuid);1669mutex_unlock(&p->mutex);1670if (!pdd)1671return -EINVAL;16721673return kfd_smi_event_open(pdd->dev, &args->anon_fd);1674}16751676#if IS_ENABLED(CONFIG_HSA_AMD_SVM)16771678static int kfd_ioctl_set_xnack_mode(struct file *filep,1679struct kfd_process *p, void *data)1680{1681struct kfd_ioctl_set_xnack_mode_args *args = data;1682int r = 0;16831684mutex_lock(&p->mutex);1685if (args->xnack_enabled >= 0) {1686if (!list_empty(&p->pqm.queues)) {1687pr_debug("Process has user queues running\n");1688r = -EBUSY;1689goto out_unlock;1690}16911692if (p->xnack_enabled == args->xnack_enabled)1693goto out_unlock;16941695if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) {1696r = -EPERM;1697goto out_unlock;1698}16991700r = svm_range_switch_xnack_reserve_mem(p, args->xnack_enabled);1701} else {1702args->xnack_enabled = p->xnack_enabled;1703}17041705out_unlock:1706mutex_unlock(&p->mutex);17071708return r;1709}17101711static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)1712{1713struct kfd_ioctl_svm_args *args = data;1714int r = 0;17151716pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",1717args->start_addr, args->size, args->op, args->nattr);17181719if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))1720return -EINVAL;1721if (!args->start_addr || !args->size)1722return -EINVAL;17231724r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,1725args->attrs);17261727return r;1728}1729#else1730static int kfd_ioctl_set_xnack_mode(struct file *filep,1731struct kfd_process *p, void *data)1732{1733return -EPERM;1734}1735static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)1736{1737return -EPERM;1738}1739#endif17401741static int criu_checkpoint_process(struct kfd_process *p,1742uint8_t __user *user_priv_data,1743uint64_t *priv_offset)1744{1745struct kfd_criu_process_priv_data process_priv;1746int ret;17471748memset(&process_priv, 0, sizeof(process_priv));17491750process_priv.version = KFD_CRIU_PRIV_VERSION;1751/* For CR, we don't consider negative xnack mode which is used for1752* querying without changing it, here 0 simply means disabled and 11753* means enabled so retry for finding a valid PTE.1754*/1755process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;17561757ret = copy_to_user(user_priv_data + *priv_offset,1758&process_priv, sizeof(process_priv));17591760if (ret) {1761pr_err("Failed to copy process information to user\n");1762ret = -EFAULT;1763}17641765*priv_offset += sizeof(process_priv);1766return ret;1767}17681769static int criu_checkpoint_devices(struct kfd_process *p,1770uint32_t num_devices,1771uint8_t __user *user_addr,1772uint8_t __user *user_priv_data,1773uint64_t *priv_offset)1774{1775struct kfd_criu_device_priv_data *device_priv = NULL;1776struct kfd_criu_device_bucket *device_buckets = NULL;1777int ret = 0, i;17781779device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL);1780if (!device_buckets) {1781ret = -ENOMEM;1782goto exit;1783}17841785device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL);1786if (!device_priv) {1787ret = -ENOMEM;1788goto exit;1789}17901791for (i = 0; i < num_devices; i++) {1792struct kfd_process_device *pdd = p->pdds[i];17931794device_buckets[i].user_gpu_id = pdd->user_gpu_id;1795device_buckets[i].actual_gpu_id = pdd->dev->id;17961797/*1798* priv_data does not contain useful information for now and is reserved for1799* future use, so we do not set its contents.1800*/1801}18021803ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets));1804if (ret) {1805pr_err("Failed to copy device information to user\n");1806ret = -EFAULT;1807goto exit;1808}18091810ret = copy_to_user(user_priv_data + *priv_offset,1811device_priv,1812num_devices * sizeof(*device_priv));1813if (ret) {1814pr_err("Failed to copy device information to user\n");1815ret = -EFAULT;1816}1817*priv_offset += num_devices * sizeof(*device_priv);18181819exit:1820kvfree(device_buckets);1821kvfree(device_priv);1822return ret;1823}18241825static uint32_t get_process_num_bos(struct kfd_process *p)1826{1827uint32_t num_of_bos = 0;1828int i;18291830/* Run over all PDDs of the process */1831for (i = 0; i < p->n_pdds; i++) {1832struct kfd_process_device *pdd = p->pdds[i];1833void *mem;1834int id;18351836idr_for_each_entry(&pdd->alloc_idr, mem, id) {1837struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;18381839if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base)1840num_of_bos++;1841}1842}1843return num_of_bos;1844}18451846static int criu_get_prime_handle(struct kgd_mem *mem,1847int flags, u32 *shared_fd,1848struct file **file)1849{1850struct dma_buf *dmabuf;1851int ret;18521853ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);1854if (ret) {1855pr_err("dmabuf export failed for the BO\n");1856return ret;1857}18581859ret = get_unused_fd_flags(flags);1860if (ret < 0) {1861pr_err("dmabuf create fd failed, ret:%d\n", ret);1862goto out_free_dmabuf;1863}18641865*shared_fd = ret;1866*file = dmabuf->file;1867return 0;18681869out_free_dmabuf:1870dma_buf_put(dmabuf);1871return ret;1872}18731874static void commit_files(struct file **files,1875struct kfd_criu_bo_bucket *bo_buckets,1876unsigned int count,1877int err)1878{1879while (count--) {1880struct file *file = files[count];18811882if (!file)1883continue;1884if (err) {1885fput(file);1886put_unused_fd(bo_buckets[count].dmabuf_fd);1887} else {1888fd_install(bo_buckets[count].dmabuf_fd, file);1889}1890}1891}18921893static int criu_checkpoint_bos(struct kfd_process *p,1894uint32_t num_bos,1895uint8_t __user *user_bos,1896uint8_t __user *user_priv_data,1897uint64_t *priv_offset)1898{1899struct kfd_criu_bo_bucket *bo_buckets;1900struct kfd_criu_bo_priv_data *bo_privs;1901struct file **files = NULL;1902int ret = 0, pdd_index, bo_index = 0, id;1903void *mem;19041905bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);1906if (!bo_buckets)1907return -ENOMEM;19081909bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL);1910if (!bo_privs) {1911ret = -ENOMEM;1912goto exit;1913}19141915files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL);1916if (!files) {1917ret = -ENOMEM;1918goto exit;1919}19201921for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {1922struct kfd_process_device *pdd = p->pdds[pdd_index];1923struct amdgpu_bo *dumper_bo;1924struct kgd_mem *kgd_mem;19251926idr_for_each_entry(&pdd->alloc_idr, mem, id) {1927struct kfd_criu_bo_bucket *bo_bucket;1928struct kfd_criu_bo_priv_data *bo_priv;1929int i, dev_idx = 0;19301931kgd_mem = (struct kgd_mem *)mem;1932dumper_bo = kgd_mem->bo;19331934/* Skip checkpointing BOs that are used for Trap handler1935* code and state. Currently, these BOs have a VA that1936* is less GPUVM Base1937*/1938if (kgd_mem->va && kgd_mem->va <= pdd->gpuvm_base)1939continue;19401941bo_bucket = &bo_buckets[bo_index];1942bo_priv = &bo_privs[bo_index];19431944bo_bucket->gpu_id = pdd->user_gpu_id;1945bo_bucket->addr = (uint64_t)kgd_mem->va;1946bo_bucket->size = amdgpu_bo_size(dumper_bo);1947bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags;1948bo_priv->idr_handle = id;19491950if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {1951ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,1952&bo_priv->user_addr);1953if (ret) {1954pr_err("Failed to obtain user address for user-pointer bo\n");1955goto exit;1956}1957}1958if (bo_bucket->alloc_flags1959& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {1960ret = criu_get_prime_handle(kgd_mem,1961bo_bucket->alloc_flags &1962KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,1963&bo_bucket->dmabuf_fd, &files[bo_index]);1964if (ret)1965goto exit;1966} else {1967bo_bucket->dmabuf_fd = KFD_INVALID_FD;1968}19691970if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)1971bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |1972KFD_MMAP_GPU_ID(pdd->dev->id);1973else if (bo_bucket->alloc_flags &1974KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)1975bo_bucket->offset = KFD_MMAP_TYPE_MMIO |1976KFD_MMAP_GPU_ID(pdd->dev->id);1977else1978bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo);19791980for (i = 0; i < p->n_pdds; i++) {1981if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->drm_priv, kgd_mem))1982bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;1983}19841985pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n"1986"gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x",1987bo_bucket->size,1988bo_bucket->addr,1989bo_bucket->offset,1990bo_bucket->gpu_id,1991bo_bucket->alloc_flags,1992bo_priv->idr_handle);1993bo_index++;1994}1995}19961997ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets));1998if (ret) {1999pr_err("Failed to copy BO information to user\n");2000ret = -EFAULT;2001goto exit;2002}20032004ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs));2005if (ret) {2006pr_err("Failed to copy BO priv information to user\n");2007ret = -EFAULT;2008goto exit;2009}20102011*priv_offset += num_bos * sizeof(*bo_privs);20122013exit:2014commit_files(files, bo_buckets, bo_index, ret);2015kvfree(files);2016kvfree(bo_buckets);2017kvfree(bo_privs);2018return ret;2019}20202021static int criu_get_process_object_info(struct kfd_process *p,2022uint32_t *num_devices,2023uint32_t *num_bos,2024uint32_t *num_objects,2025uint64_t *objs_priv_size)2026{2027uint64_t queues_priv_data_size, svm_priv_data_size, priv_size;2028uint32_t num_queues, num_events, num_svm_ranges;2029int ret;20302031*num_devices = p->n_pdds;2032*num_bos = get_process_num_bos(p);20332034ret = kfd_process_get_queue_info(p, &num_queues, &queues_priv_data_size);2035if (ret)2036return ret;20372038num_events = kfd_get_num_events(p);20392040svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);20412042*num_objects = num_queues + num_events + num_svm_ranges;20432044if (objs_priv_size) {2045priv_size = sizeof(struct kfd_criu_process_priv_data);2046priv_size += *num_devices * sizeof(struct kfd_criu_device_priv_data);2047priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);2048priv_size += queues_priv_data_size;2049priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);2050priv_size += svm_priv_data_size;2051*objs_priv_size = priv_size;2052}2053return 0;2054}20552056static int criu_checkpoint(struct file *filep,2057struct kfd_process *p,2058struct kfd_ioctl_criu_args *args)2059{2060int ret;2061uint32_t num_devices, num_bos, num_objects;2062uint64_t priv_size, priv_offset = 0, bo_priv_offset;20632064if (!args->devices || !args->bos || !args->priv_data)2065return -EINVAL;20662067mutex_lock(&p->mutex);20682069if (!p->n_pdds) {2070pr_err("No pdd for given process\n");2071ret = -ENODEV;2072goto exit_unlock;2073}20742075/* Confirm all process queues are evicted */2076if (!p->queues_paused) {2077pr_err("Cannot dump process when queues are not in evicted state\n");2078/* CRIU plugin did not call op PROCESS_INFO before checkpointing */2079ret = -EINVAL;2080goto exit_unlock;2081}20822083ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size);2084if (ret)2085goto exit_unlock;20862087if (num_devices != args->num_devices ||2088num_bos != args->num_bos ||2089num_objects != args->num_objects ||2090priv_size != args->priv_data_size) {20912092ret = -EINVAL;2093goto exit_unlock;2094}20952096/* each function will store private data inside priv_data and adjust priv_offset */2097ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset);2098if (ret)2099goto exit_unlock;21002101ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices,2102(uint8_t __user *)args->priv_data, &priv_offset);2103if (ret)2104goto exit_unlock;21052106/* Leave room for BOs in the private data. They need to be restored2107* before events, but we checkpoint them last to simplify the error2108* handling.2109*/2110bo_priv_offset = priv_offset;2111priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data);21122113if (num_objects) {2114ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,2115&priv_offset);2116if (ret)2117goto exit_unlock;21182119ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,2120&priv_offset);2121if (ret)2122goto exit_unlock;21232124ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);2125if (ret)2126goto exit_unlock;2127}21282129/* This must be the last thing in this function that can fail.2130* Otherwise we leak dmabuf file descriptors.2131*/2132ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,2133(uint8_t __user *)args->priv_data, &bo_priv_offset);21342135exit_unlock:2136mutex_unlock(&p->mutex);2137if (ret)2138pr_err("Failed to dump CRIU ret:%d\n", ret);2139else2140pr_debug("CRIU dump ret:%d\n", ret);21412142return ret;2143}21442145static int criu_restore_process(struct kfd_process *p,2146struct kfd_ioctl_criu_args *args,2147uint64_t *priv_offset,2148uint64_t max_priv_data_size)2149{2150int ret = 0;2151struct kfd_criu_process_priv_data process_priv;21522153if (*priv_offset + sizeof(process_priv) > max_priv_data_size)2154return -EINVAL;21552156ret = copy_from_user(&process_priv,2157(void __user *)(args->priv_data + *priv_offset),2158sizeof(process_priv));2159if (ret) {2160pr_err("Failed to copy process private information from user\n");2161ret = -EFAULT;2162goto exit;2163}2164*priv_offset += sizeof(process_priv);21652166if (process_priv.version != KFD_CRIU_PRIV_VERSION) {2167pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n",2168process_priv.version, KFD_CRIU_PRIV_VERSION);2169return -EINVAL;2170}21712172pr_debug("Setting XNACK mode\n");2173if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) {2174pr_err("xnack mode cannot be set\n");2175ret = -EPERM;2176goto exit;2177} else {2178pr_debug("set xnack mode: %d\n", process_priv.xnack_mode);2179p->xnack_enabled = process_priv.xnack_mode;2180}21812182exit:2183return ret;2184}21852186static int criu_restore_devices(struct kfd_process *p,2187struct kfd_ioctl_criu_args *args,2188uint64_t *priv_offset,2189uint64_t max_priv_data_size)2190{2191struct kfd_criu_device_bucket *device_buckets;2192struct kfd_criu_device_priv_data *device_privs;2193int ret = 0;2194uint32_t i;21952196if (args->num_devices != p->n_pdds)2197return -EINVAL;21982199if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size)2200return -EINVAL;22012202device_buckets = kmalloc_array(args->num_devices, sizeof(*device_buckets), GFP_KERNEL);2203if (!device_buckets)2204return -ENOMEM;22052206ret = copy_from_user(device_buckets, (void __user *)args->devices,2207args->num_devices * sizeof(*device_buckets));2208if (ret) {2209pr_err("Failed to copy devices buckets from user\n");2210ret = -EFAULT;2211goto exit;2212}22132214for (i = 0; i < args->num_devices; i++) {2215struct kfd_node *dev;2216struct kfd_process_device *pdd;2217struct file *drm_file;22182219/* device private data is not currently used */22202221if (!device_buckets[i].user_gpu_id) {2222pr_err("Invalid user gpu_id\n");2223ret = -EINVAL;2224goto exit;2225}22262227dev = kfd_device_by_id(device_buckets[i].actual_gpu_id);2228if (!dev) {2229pr_err("Failed to find device with gpu_id = %x\n",2230device_buckets[i].actual_gpu_id);2231ret = -EINVAL;2232goto exit;2233}22342235pdd = kfd_get_process_device_data(dev, p);2236if (!pdd) {2237pr_err("Failed to get pdd for gpu_id = %x\n",2238device_buckets[i].actual_gpu_id);2239ret = -EINVAL;2240goto exit;2241}2242pdd->user_gpu_id = device_buckets[i].user_gpu_id;22432244drm_file = fget(device_buckets[i].drm_fd);2245if (!drm_file) {2246pr_err("Invalid render node file descriptor sent from plugin (%d)\n",2247device_buckets[i].drm_fd);2248ret = -EINVAL;2249goto exit;2250}22512252if (pdd->drm_file) {2253ret = -EINVAL;2254goto exit;2255}22562257/* create the vm using render nodes for kfd pdd */2258if (kfd_process_device_init_vm(pdd, drm_file)) {2259pr_err("could not init vm for given pdd\n");2260/* On success, the PDD keeps the drm_file reference */2261fput(drm_file);2262ret = -EINVAL;2263goto exit;2264}2265/*2266* pdd now already has the vm bound to render node so below api won't create a new2267* exclusive kfd mapping but use existing one with renderDXXX but is still needed2268* for iommu v2 binding and runtime pm.2269*/2270pdd = kfd_bind_process_to_device(dev, p);2271if (IS_ERR(pdd)) {2272ret = PTR_ERR(pdd);2273goto exit;2274}22752276if (!pdd->qpd.proc_doorbells) {2277ret = kfd_alloc_process_doorbells(dev->kfd, pdd);2278if (ret)2279goto exit;2280}2281}22822283/*2284* We are not copying device private data from user as we are not using the data for now,2285* but we still adjust for its private data.2286*/2287*priv_offset += args->num_devices * sizeof(*device_privs);22882289exit:2290kfree(device_buckets);2291return ret;2292}22932294static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,2295struct kfd_criu_bo_bucket *bo_bucket,2296struct kfd_criu_bo_priv_data *bo_priv,2297struct kgd_mem **kgd_mem)2298{2299int idr_handle;2300int ret;2301const bool criu_resume = true;2302u64 offset;23032304if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {2305if (bo_bucket->size !=2306kfd_doorbell_process_slice(pdd->dev->kfd))2307return -EINVAL;23082309offset = kfd_get_process_doorbells(pdd);2310if (!offset)2311return -ENOMEM;2312} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {2313/* MMIO BOs need remapped bus address */2314if (bo_bucket->size != PAGE_SIZE) {2315pr_err("Invalid page size\n");2316return -EINVAL;2317}2318offset = pdd->dev->adev->rmmio_remap.bus_addr;2319if (!offset || (PAGE_SIZE > 4096)) {2320pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");2321return -ENOMEM;2322}2323} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {2324offset = bo_priv->user_addr;2325}2326/* Create the BO */2327ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,2328bo_bucket->size, pdd->drm_priv, kgd_mem,2329&offset, bo_bucket->alloc_flags, criu_resume);2330if (ret) {2331pr_err("Could not create the BO\n");2332return ret;2333}2334pr_debug("New BO created: size:0x%llx addr:0x%llx offset:0x%llx\n",2335bo_bucket->size, bo_bucket->addr, offset);23362337/* Restore previous IDR handle */2338pr_debug("Restoring old IDR handle for the BO");2339idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle,2340bo_priv->idr_handle + 1, GFP_KERNEL);23412342if (idr_handle < 0) {2343pr_err("Could not allocate idr\n");2344amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, *kgd_mem, pdd->drm_priv,2345NULL);2346return -ENOMEM;2347}23482349if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)2350bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id);2351if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {2352bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(pdd->dev->id);2353} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {2354bo_bucket->restored_offset = offset;2355} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {2356bo_bucket->restored_offset = offset;2357/* Update the VRAM usage count */2358atomic64_add(bo_bucket->size, &pdd->vram_usage);2359}2360return 0;2361}23622363static int criu_restore_bo(struct kfd_process *p,2364struct kfd_criu_bo_bucket *bo_bucket,2365struct kfd_criu_bo_priv_data *bo_priv,2366struct file **file)2367{2368struct kfd_process_device *pdd;2369struct kgd_mem *kgd_mem;2370int ret;2371int j;23722373pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n",2374bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags,2375bo_priv->idr_handle);23762377pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id);2378if (!pdd) {2379pr_err("Failed to get pdd\n");2380return -ENODEV;2381}23822383ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem);2384if (ret)2385return ret;23862387/* now map these BOs to GPU/s */2388for (j = 0; j < p->n_pdds; j++) {2389struct kfd_node *peer;2390struct kfd_process_device *peer_pdd;23912392if (!bo_priv->mapped_gpuids[j])2393break;23942395peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]);2396if (!peer_pdd)2397return -EINVAL;23982399peer = peer_pdd->dev;24002401peer_pdd = kfd_bind_process_to_device(peer, p);2402if (IS_ERR(peer_pdd))2403return PTR_ERR(peer_pdd);24042405ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem,2406peer_pdd->drm_priv);2407if (ret) {2408pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds);2409return ret;2410}2411}24122413pr_debug("map memory was successful for the BO\n");2414/* create the dmabuf object and export the bo */2415if (bo_bucket->alloc_flags2416& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {2417ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,2418&bo_bucket->dmabuf_fd, file);2419if (ret)2420return ret;2421} else {2422bo_bucket->dmabuf_fd = KFD_INVALID_FD;2423}24242425return 0;2426}24272428static int criu_restore_bos(struct kfd_process *p,2429struct kfd_ioctl_criu_args *args,2430uint64_t *priv_offset,2431uint64_t max_priv_data_size)2432{2433struct kfd_criu_bo_bucket *bo_buckets = NULL;2434struct kfd_criu_bo_priv_data *bo_privs = NULL;2435struct file **files = NULL;2436int ret = 0;2437uint32_t i = 0;24382439if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)2440return -EINVAL;24412442/* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */2443amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);24442445bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);2446if (!bo_buckets)2447return -ENOMEM;24482449files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL);2450if (!files) {2451ret = -ENOMEM;2452goto exit;2453}24542455ret = copy_from_user(bo_buckets, (void __user *)args->bos,2456args->num_bos * sizeof(*bo_buckets));2457if (ret) {2458pr_err("Failed to copy BOs information from user\n");2459ret = -EFAULT;2460goto exit;2461}24622463bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);2464if (!bo_privs) {2465ret = -ENOMEM;2466goto exit;2467}24682469ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,2470args->num_bos * sizeof(*bo_privs));2471if (ret) {2472pr_err("Failed to copy BOs information from user\n");2473ret = -EFAULT;2474goto exit;2475}2476*priv_offset += args->num_bos * sizeof(*bo_privs);24772478/* Create and map new BOs */2479for (; i < args->num_bos; i++) {2480ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]);2481if (ret) {2482pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);2483goto exit;2484}2485} /* done */24862487/* Copy only the buckets back so user can read bo_buckets[N].restored_offset */2488ret = copy_to_user((void __user *)args->bos,2489bo_buckets,2490(args->num_bos * sizeof(*bo_buckets)));2491if (ret)2492ret = -EFAULT;24932494exit:2495commit_files(files, bo_buckets, i, ret);2496kvfree(files);2497kvfree(bo_buckets);2498kvfree(bo_privs);2499return ret;2500}25012502static int criu_restore_objects(struct file *filep,2503struct kfd_process *p,2504struct kfd_ioctl_criu_args *args,2505uint64_t *priv_offset,2506uint64_t max_priv_data_size)2507{2508int ret = 0;2509uint32_t i;25102511BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type));2512BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type));2513BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, object_type));25142515for (i = 0; i < args->num_objects; i++) {2516uint32_t object_type;25172518if (*priv_offset + sizeof(object_type) > max_priv_data_size) {2519pr_err("Invalid private data size\n");2520return -EINVAL;2521}25222523ret = get_user(object_type, (uint32_t __user *)(args->priv_data + *priv_offset));2524if (ret) {2525pr_err("Failed to copy private information from user\n");2526goto exit;2527}25282529switch (object_type) {2530case KFD_CRIU_OBJECT_TYPE_QUEUE:2531ret = kfd_criu_restore_queue(p, (uint8_t __user *)args->priv_data,2532priv_offset, max_priv_data_size);2533if (ret)2534goto exit;2535break;2536case KFD_CRIU_OBJECT_TYPE_EVENT:2537ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data,2538priv_offset, max_priv_data_size);2539if (ret)2540goto exit;2541break;2542case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:2543ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data,2544priv_offset, max_priv_data_size);2545if (ret)2546goto exit;2547break;2548default:2549pr_err("Invalid object type:%u at index:%d\n", object_type, i);2550ret = -EINVAL;2551goto exit;2552}2553}2554exit:2555return ret;2556}25572558static int criu_restore(struct file *filep,2559struct kfd_process *p,2560struct kfd_ioctl_criu_args *args)2561{2562uint64_t priv_offset = 0;2563int ret = 0;25642565pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",2566args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);25672568if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||2569!args->num_devices || !args->num_bos)2570return -EINVAL;25712572mutex_lock(&p->mutex);25732574/*2575* Set the process to evicted state to avoid running any new queues before all the memory2576* mappings are ready.2577*/2578ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);2579if (ret)2580goto exit_unlock;25812582/* Each function will adjust priv_offset based on how many bytes they consumed */2583ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size);2584if (ret)2585goto exit_unlock;25862587ret = criu_restore_devices(p, args, &priv_offset, args->priv_data_size);2588if (ret)2589goto exit_unlock;25902591ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);2592if (ret)2593goto exit_unlock;25942595ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size);2596if (ret)2597goto exit_unlock;25982599if (priv_offset != args->priv_data_size) {2600pr_err("Invalid private data size\n");2601ret = -EINVAL;2602}26032604exit_unlock:2605mutex_unlock(&p->mutex);2606if (ret)2607pr_err("Failed to restore CRIU ret:%d\n", ret);2608else2609pr_debug("CRIU restore successful\n");26102611return ret;2612}26132614static int criu_unpause(struct file *filep,2615struct kfd_process *p,2616struct kfd_ioctl_criu_args *args)2617{2618int ret;26192620mutex_lock(&p->mutex);26212622if (!p->queues_paused) {2623mutex_unlock(&p->mutex);2624return -EINVAL;2625}26262627ret = kfd_process_restore_queues(p);2628if (ret)2629pr_err("Failed to unpause queues ret:%d\n", ret);2630else2631p->queues_paused = false;26322633mutex_unlock(&p->mutex);26342635return ret;2636}26372638static int criu_resume(struct file *filep,2639struct kfd_process *p,2640struct kfd_ioctl_criu_args *args)2641{2642struct kfd_process *target = NULL;2643struct pid *pid = NULL;2644int ret = 0;26452646pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,2647args->pid);26482649pid = find_get_pid(args->pid);2650if (!pid) {2651pr_err("Cannot find pid info for %i\n", args->pid);2652return -ESRCH;2653}26542655pr_debug("calling kfd_lookup_process_by_pid\n");2656target = kfd_lookup_process_by_pid(pid);26572658put_pid(pid);26592660if (!target) {2661pr_debug("Cannot find process info for %i\n", args->pid);2662return -ESRCH;2663}26642665mutex_lock(&target->mutex);2666ret = kfd_criu_resume_svm(target);2667if (ret) {2668pr_err("kfd_criu_resume_svm failed for %i\n", args->pid);2669goto exit;2670}26712672ret = amdgpu_amdkfd_criu_resume(target->kgd_process_info);2673if (ret)2674pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid);26752676exit:2677mutex_unlock(&target->mutex);26782679kfd_unref_process(target);2680return ret;2681}26822683static int criu_process_info(struct file *filep,2684struct kfd_process *p,2685struct kfd_ioctl_criu_args *args)2686{2687int ret = 0;26882689mutex_lock(&p->mutex);26902691if (!p->n_pdds) {2692pr_err("No pdd for given process\n");2693ret = -ENODEV;2694goto err_unlock;2695}26962697ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);2698if (ret)2699goto err_unlock;27002701p->queues_paused = true;27022703args->pid = task_pid_nr_ns(p->lead_thread,2704task_active_pid_ns(p->lead_thread));27052706ret = criu_get_process_object_info(p, &args->num_devices, &args->num_bos,2707&args->num_objects, &args->priv_data_size);2708if (ret)2709goto err_unlock;27102711dev_dbg(kfd_device, "Num of devices:%u bos:%u objects:%u priv_data_size:%lld\n",2712args->num_devices, args->num_bos, args->num_objects,2713args->priv_data_size);27142715err_unlock:2716if (ret) {2717kfd_process_restore_queues(p);2718p->queues_paused = false;2719}2720mutex_unlock(&p->mutex);2721return ret;2722}27232724static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)2725{2726struct kfd_ioctl_criu_args *args = data;2727int ret;27282729dev_dbg(kfd_device, "CRIU operation: %d\n", args->op);2730switch (args->op) {2731case KFD_CRIU_OP_PROCESS_INFO:2732ret = criu_process_info(filep, p, args);2733break;2734case KFD_CRIU_OP_CHECKPOINT:2735ret = criu_checkpoint(filep, p, args);2736break;2737case KFD_CRIU_OP_UNPAUSE:2738ret = criu_unpause(filep, p, args);2739break;2740case KFD_CRIU_OP_RESTORE:2741ret = criu_restore(filep, p, args);2742break;2743case KFD_CRIU_OP_RESUME:2744ret = criu_resume(filep, p, args);2745break;2746default:2747dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);2748ret = -EINVAL;2749break;2750}27512752if (ret)2753dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret);27542755return ret;2756}27572758static int runtime_enable(struct kfd_process *p, uint64_t r_debug,2759bool enable_ttmp_setup)2760{2761int i = 0, ret = 0;27622763if (p->is_runtime_retry)2764goto retry;27652766if (p->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)2767return -EBUSY;27682769for (i = 0; i < p->n_pdds; i++) {2770struct kfd_process_device *pdd = p->pdds[i];27712772if (pdd->qpd.queue_count)2773return -EEXIST;27742775/*2776* Setup TTMPs by default.2777* Note that this call must remain here for MES ADD QUEUE to2778* skip_process_ctx_clear unconditionally as the first call to2779* SET_SHADER_DEBUGGER clears any stale process context data2780* saved in MES.2781*/2782if (pdd->dev->kfd->shared_resources.enable_mes)2783kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));2784}27852786p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;2787p->runtime_info.r_debug = r_debug;2788p->runtime_info.ttmp_setup = enable_ttmp_setup;27892790if (p->runtime_info.ttmp_setup) {2791for (i = 0; i < p->n_pdds; i++) {2792struct kfd_process_device *pdd = p->pdds[i];27932794if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {2795amdgpu_gfx_off_ctrl(pdd->dev->adev, false);2796pdd->dev->kfd2kgd->enable_debug_trap(2797pdd->dev->adev,2798true,2799pdd->dev->vm_info.last_vmid_kfd);2800} else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {2801pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(2802pdd->dev->adev,2803false,28040);2805}2806}2807}28082809retry:2810if (p->debug_trap_enabled) {2811if (!p->is_runtime_retry) {2812kfd_dbg_trap_activate(p);2813kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME),2814p, NULL, 0, false, NULL, 0);2815}28162817mutex_unlock(&p->mutex);2818ret = down_interruptible(&p->runtime_enable_sema);2819mutex_lock(&p->mutex);28202821p->is_runtime_retry = !!ret;2822}28232824return ret;2825}28262827static int runtime_disable(struct kfd_process *p)2828{2829int i = 0, ret;2830bool was_enabled = p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED;28312832p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_DISABLED;2833p->runtime_info.r_debug = 0;28342835if (p->debug_trap_enabled) {2836if (was_enabled)2837kfd_dbg_trap_deactivate(p, false, 0);28382839if (!p->is_runtime_retry)2840kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME),2841p, NULL, 0, false, NULL, 0);28422843mutex_unlock(&p->mutex);2844ret = down_interruptible(&p->runtime_enable_sema);2845mutex_lock(&p->mutex);28462847p->is_runtime_retry = !!ret;2848if (ret)2849return ret;2850}28512852if (was_enabled && p->runtime_info.ttmp_setup) {2853for (i = 0; i < p->n_pdds; i++) {2854struct kfd_process_device *pdd = p->pdds[i];28552856if (!kfd_dbg_is_rlc_restore_supported(pdd->dev))2857amdgpu_gfx_off_ctrl(pdd->dev->adev, true);2858}2859}28602861p->runtime_info.ttmp_setup = false;28622863/* disable ttmp setup */2864for (i = 0; i < p->n_pdds; i++) {2865struct kfd_process_device *pdd = p->pdds[i];28662867if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {2868pdd->spi_dbg_override =2869pdd->dev->kfd2kgd->disable_debug_trap(2870pdd->dev->adev,2871false,2872pdd->dev->vm_info.last_vmid_kfd);28732874if (!pdd->dev->kfd->shared_resources.enable_mes)2875debug_refresh_runlist(pdd->dev->dqm);2876else2877kfd_dbg_set_mes_debug_mode(pdd,2878!kfd_dbg_has_cwsr_workaround(pdd->dev));2879}2880}28812882return 0;2883}28842885static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data)2886{2887struct kfd_ioctl_runtime_enable_args *args = data;2888int r;28892890mutex_lock(&p->mutex);28912892if (args->mode_mask & KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK)2893r = runtime_enable(p, args->r_debug,2894!!(args->mode_mask & KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK));2895else2896r = runtime_disable(p);28972898mutex_unlock(&p->mutex);28992900return r;2901}29022903static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, void *data)2904{2905struct kfd_ioctl_dbg_trap_args *args = data;2906struct task_struct *thread = NULL;2907struct mm_struct *mm = NULL;2908struct pid *pid = NULL;2909struct kfd_process *target = NULL;2910struct kfd_process_device *pdd = NULL;2911int r = 0;29122913if (sched_policy == KFD_SCHED_POLICY_NO_HWS) {2914pr_err("Debugging does not support sched_policy %i", sched_policy);2915return -EINVAL;2916}29172918pid = find_get_pid(args->pid);2919if (!pid) {2920pr_debug("Cannot find pid info for %i\n", args->pid);2921r = -ESRCH;2922goto out;2923}29242925thread = get_pid_task(pid, PIDTYPE_PID);2926if (!thread) {2927r = -ESRCH;2928goto out;2929}29302931mm = get_task_mm(thread);2932if (!mm) {2933r = -ESRCH;2934goto out;2935}29362937if (args->op == KFD_IOC_DBG_TRAP_ENABLE) {2938bool create_process;29392940rcu_read_lock();2941create_process = thread && thread != current && ptrace_parent(thread) == current;2942rcu_read_unlock();29432944target = create_process ? kfd_create_process(thread) :2945kfd_lookup_process_by_pid(pid);2946} else {2947target = kfd_lookup_process_by_pid(pid);2948}29492950if (IS_ERR_OR_NULL(target)) {2951pr_debug("Cannot find process PID %i to debug\n", args->pid);2952r = target ? PTR_ERR(target) : -ESRCH;2953target = NULL;2954goto out;2955}29562957/* Check if target is still PTRACED. */2958rcu_read_lock();2959if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE2960&& ptrace_parent(target->lead_thread) != current) {2961pr_err("PID %i is not PTRACED and cannot be debugged\n", args->pid);2962r = -EPERM;2963}2964rcu_read_unlock();29652966if (r)2967goto out;29682969mutex_lock(&target->mutex);29702971if (args->op != KFD_IOC_DBG_TRAP_ENABLE && !target->debug_trap_enabled) {2972pr_err("PID %i not debug enabled for op %i\n", args->pid, args->op);2973r = -EINVAL;2974goto unlock_out;2975}29762977if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_ENABLED &&2978(args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE ||2979args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE ||2980args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES ||2981args->op == KFD_IOC_DBG_TRAP_RESUME_QUEUES ||2982args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ||2983args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH ||2984args->op == KFD_IOC_DBG_TRAP_SET_FLAGS)) {2985r = -EPERM;2986goto unlock_out;2987}29882989if (args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ||2990args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH) {2991int user_gpu_id = kfd_process_get_user_gpu_id(target,2992args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ?2993args->set_node_address_watch.gpu_id :2994args->clear_node_address_watch.gpu_id);29952996pdd = kfd_process_device_data_by_id(target, user_gpu_id);2997if (user_gpu_id == -EINVAL || !pdd) {2998r = -ENODEV;2999goto unlock_out;3000}3001}30023003switch (args->op) {3004case KFD_IOC_DBG_TRAP_ENABLE:3005if (target != p)3006target->debugger_process = p;30073008r = kfd_dbg_trap_enable(target,3009args->enable.dbg_fd,3010(void __user *)args->enable.rinfo_ptr,3011&args->enable.rinfo_size);3012if (!r)3013target->exception_enable_mask = args->enable.exception_mask;30143015break;3016case KFD_IOC_DBG_TRAP_DISABLE:3017r = kfd_dbg_trap_disable(target);3018break;3019case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT:3020r = kfd_dbg_send_exception_to_runtime(target,3021args->send_runtime_event.gpu_id,3022args->send_runtime_event.queue_id,3023args->send_runtime_event.exception_mask);3024break;3025case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED:3026kfd_dbg_set_enabled_debug_exception_mask(target,3027args->set_exceptions_enabled.exception_mask);3028break;3029case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE:3030r = kfd_dbg_trap_set_wave_launch_override(target,3031args->launch_override.override_mode,3032args->launch_override.enable_mask,3033args->launch_override.support_request_mask,3034&args->launch_override.enable_mask,3035&args->launch_override.support_request_mask);3036break;3037case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE:3038r = kfd_dbg_trap_set_wave_launch_mode(target,3039args->launch_mode.launch_mode);3040break;3041case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES:3042r = suspend_queues(target,3043args->suspend_queues.num_queues,3044args->suspend_queues.grace_period,3045args->suspend_queues.exception_mask,3046(uint32_t *)args->suspend_queues.queue_array_ptr);30473048break;3049case KFD_IOC_DBG_TRAP_RESUME_QUEUES:3050r = resume_queues(target, args->resume_queues.num_queues,3051(uint32_t *)args->resume_queues.queue_array_ptr);3052break;3053case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH:3054r = kfd_dbg_trap_set_dev_address_watch(pdd,3055args->set_node_address_watch.address,3056args->set_node_address_watch.mask,3057&args->set_node_address_watch.id,3058args->set_node_address_watch.mode);3059break;3060case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH:3061r = kfd_dbg_trap_clear_dev_address_watch(pdd,3062args->clear_node_address_watch.id);3063break;3064case KFD_IOC_DBG_TRAP_SET_FLAGS:3065r = kfd_dbg_trap_set_flags(target, &args->set_flags.flags);3066break;3067case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT:3068r = kfd_dbg_ev_query_debug_event(target,3069&args->query_debug_event.queue_id,3070&args->query_debug_event.gpu_id,3071args->query_debug_event.exception_mask,3072&args->query_debug_event.exception_mask);3073break;3074case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO:3075r = kfd_dbg_trap_query_exception_info(target,3076args->query_exception_info.source_id,3077args->query_exception_info.exception_code,3078args->query_exception_info.clear_exception,3079(void __user *)args->query_exception_info.info_ptr,3080&args->query_exception_info.info_size);3081break;3082case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT:3083r = pqm_get_queue_snapshot(&target->pqm,3084args->queue_snapshot.exception_mask,3085(void __user *)args->queue_snapshot.snapshot_buf_ptr,3086&args->queue_snapshot.num_queues,3087&args->queue_snapshot.entry_size);3088break;3089case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT:3090r = kfd_dbg_trap_device_snapshot(target,3091args->device_snapshot.exception_mask,3092(void __user *)args->device_snapshot.snapshot_buf_ptr,3093&args->device_snapshot.num_devices,3094&args->device_snapshot.entry_size);3095break;3096default:3097pr_err("Invalid option: %i\n", args->op);3098r = -EINVAL;3099}31003101unlock_out:3102mutex_unlock(&target->mutex);31033104out:3105if (thread)3106put_task_struct(thread);31073108if (mm)3109mmput(mm);31103111if (pid)3112put_pid(pid);31133114if (target)3115kfd_unref_process(target);31163117return r;3118}31193120#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \3121[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \3122.cmd_drv = 0, .name = #ioctl}31233124/** Ioctl table */3125static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {3126AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,3127kfd_ioctl_get_version, 0),31283129AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,3130kfd_ioctl_create_queue, 0),31313132AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,3133kfd_ioctl_destroy_queue, 0),31343135AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,3136kfd_ioctl_set_memory_policy, 0),31373138AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,3139kfd_ioctl_get_clock_counters, 0),31403141AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,3142kfd_ioctl_get_process_apertures, 0),31433144AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,3145kfd_ioctl_update_queue, 0),31463147AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,3148kfd_ioctl_create_event, 0),31493150AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,3151kfd_ioctl_destroy_event, 0),31523153AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,3154kfd_ioctl_set_event, 0),31553156AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,3157kfd_ioctl_reset_event, 0),31583159AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,3160kfd_ioctl_wait_events, 0),31613162AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER_DEPRECATED,3163kfd_ioctl_dbg_register, 0),31643165AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED,3166kfd_ioctl_dbg_unregister, 0),31673168AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED,3169kfd_ioctl_dbg_address_watch, 0),31703171AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED,3172kfd_ioctl_dbg_wave_control, 0),31733174AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,3175kfd_ioctl_set_scratch_backing_va, 0),31763177AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,3178kfd_ioctl_get_tile_config, 0),31793180AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,3181kfd_ioctl_set_trap_handler, 0),31823183AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,3184kfd_ioctl_get_process_apertures_new, 0),31853186AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,3187kfd_ioctl_acquire_vm, 0),31883189AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,3190kfd_ioctl_alloc_memory_of_gpu, 0),31913192AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,3193kfd_ioctl_free_memory_of_gpu, 0),31943195AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,3196kfd_ioctl_map_memory_to_gpu, 0),31973198AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,3199kfd_ioctl_unmap_memory_from_gpu, 0),32003201AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,3202kfd_ioctl_set_cu_mask, 0),32033204AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,3205kfd_ioctl_get_queue_wave_state, 0),32063207AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,3208kfd_ioctl_get_dmabuf_info, 0),32093210AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,3211kfd_ioctl_import_dmabuf, 0),32123213AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,3214kfd_ioctl_alloc_queue_gws, 0),32153216AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,3217kfd_ioctl_smi_events, 0),32183219AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),32203221AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,3222kfd_ioctl_set_xnack_mode, 0),32233224AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,3225kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),32263227AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,3228kfd_ioctl_get_available_memory, 0),32293230AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF,3231kfd_ioctl_export_dmabuf, 0),32323233AMDKFD_IOCTL_DEF(AMDKFD_IOC_RUNTIME_ENABLE,3234kfd_ioctl_runtime_enable, 0),32353236AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP,3237kfd_ioctl_set_debug_trap, 0),3238};32393240#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)32413242static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)3243{3244struct kfd_process *process;3245amdkfd_ioctl_t *func;3246const struct amdkfd_ioctl_desc *ioctl = NULL;3247unsigned int nr = _IOC_NR(cmd);3248char stack_kdata[128];3249char *kdata = NULL;3250unsigned int usize, asize;3251int retcode = -EINVAL;3252bool ptrace_attached = false;32533254if (nr >= AMDKFD_CORE_IOCTL_COUNT)3255goto err_i1;32563257if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {3258u32 amdkfd_size;32593260ioctl = &amdkfd_ioctls[nr];32613262amdkfd_size = _IOC_SIZE(ioctl->cmd);3263usize = asize = _IOC_SIZE(cmd);3264if (amdkfd_size > asize)3265asize = amdkfd_size;32663267cmd = ioctl->cmd;3268} else3269goto err_i1;32703271dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);32723273/* Get the process struct from the filep. Only the process3274* that opened /dev/kfd can use the file descriptor. Child3275* processes need to create their own KFD device context.3276*/3277process = filep->private_data;32783279rcu_read_lock();3280if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) &&3281ptrace_parent(process->lead_thread) == current)3282ptrace_attached = true;3283rcu_read_unlock();32843285if (process->lead_thread != current->group_leader3286&& !ptrace_attached) {3287dev_dbg(kfd_device, "Using KFD FD in wrong process\n");3288retcode = -EBADF;3289goto err_i1;3290}32913292/* Do not trust userspace, use our own definition */3293func = ioctl->func;32943295if (unlikely(!func)) {3296dev_dbg(kfd_device, "no function\n");3297retcode = -EINVAL;3298goto err_i1;3299}33003301/*3302* Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support3303* CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a3304* more priviledged access.3305*/3306if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) {3307if (!capable(CAP_CHECKPOINT_RESTORE) &&3308!capable(CAP_SYS_ADMIN)) {3309retcode = -EACCES;3310goto err_i1;3311}3312}33133314if (cmd & (IOC_IN | IOC_OUT)) {3315if (asize <= sizeof(stack_kdata)) {3316kdata = stack_kdata;3317} else {3318kdata = kmalloc(asize, GFP_KERNEL);3319if (!kdata) {3320retcode = -ENOMEM;3321goto err_i1;3322}3323}3324if (asize > usize)3325memset(kdata + usize, 0, asize - usize);3326}33273328if (cmd & IOC_IN) {3329if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {3330retcode = -EFAULT;3331goto err_i1;3332}3333} else if (cmd & IOC_OUT) {3334memset(kdata, 0, usize);3335}33363337retcode = func(filep, process, kdata);33383339if (cmd & IOC_OUT)3340if (copy_to_user((void __user *)arg, kdata, usize) != 0)3341retcode = -EFAULT;33423343err_i1:3344if (!ioctl)3345dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",3346task_pid_nr(current), cmd, nr);33473348if (kdata != stack_kdata)3349kfree(kdata);33503351if (retcode)3352dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",3353nr, arg, retcode);33543355return retcode;3356}33573358static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,3359struct vm_area_struct *vma)3360{3361phys_addr_t address;33623363if (vma->vm_end - vma->vm_start != PAGE_SIZE)3364return -EINVAL;33653366if (PAGE_SIZE > 4096)3367return -EINVAL;33683369address = dev->adev->rmmio_remap.bus_addr;33703371vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |3372VM_DONTDUMP | VM_PFNMAP);33733374vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);33753376pr_debug("process pid %d mapping mmio page\n"3377" target user address == 0x%08llX\n"3378" physical address == 0x%08llX\n"3379" vm_flags == 0x%04lX\n"3380" size == 0x%04lX\n",3381process->lead_thread->pid, (unsigned long long) vma->vm_start,3382address, vma->vm_flags, PAGE_SIZE);33833384return io_remap_pfn_range(vma,3385vma->vm_start,3386address >> PAGE_SHIFT,3387PAGE_SIZE,3388vma->vm_page_prot);3389}339033913392static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)3393{3394struct kfd_process *process;3395struct kfd_node *dev = NULL;3396unsigned long mmap_offset;3397unsigned int gpu_id;33983399process = kfd_get_process(current);3400if (IS_ERR(process))3401return PTR_ERR(process);34023403mmap_offset = vma->vm_pgoff << PAGE_SHIFT;3404gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);3405if (gpu_id)3406dev = kfd_device_by_id(gpu_id);34073408switch (mmap_offset & KFD_MMAP_TYPE_MASK) {3409case KFD_MMAP_TYPE_DOORBELL:3410if (!dev)3411return -ENODEV;3412return kfd_doorbell_mmap(dev, process, vma);34133414case KFD_MMAP_TYPE_EVENTS:3415return kfd_event_mmap(process, vma);34163417case KFD_MMAP_TYPE_RESERVED_MEM:3418if (!dev)3419return -ENODEV;3420return kfd_reserved_mem_mmap(dev, process, vma);3421case KFD_MMAP_TYPE_MMIO:3422if (!dev)3423return -ENODEV;3424return kfd_mmio_mmap(dev, process, vma);3425}34263427return -EFAULT;3428}342934303431