Path: blob/master/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
26516 views
// SPDX-License-Identifier: GPL-2.0 OR MIT1/*2* Copyright 2014-2022 Advanced Micro Devices, Inc.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice shall be included in12* all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR18* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,19* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR20* OTHER DEALINGS IN THE SOFTWARE.21*/22#include "kfd_priv.h"23#include <linux/mm.h>24#include <linux/mman.h>25#include <linux/slab.h>26#include <linux/io.h>27#include <linux/idr.h>2829/*30* This extension supports a kernel level doorbells management for the31* kernel queues using the first doorbell page reserved for the kernel.32*/3334/*35* Each device exposes a doorbell aperture, a PCI MMIO aperture that36* receives 32-bit writes that are passed to queues as wptr values.37* The doorbells are intended to be written by applications as part38* of queueing work on user-mode queues.39* We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks.40* We map the doorbell address space into user-mode when a process creates41* its first queue on each device.42* Although the mapping is done by KFD, it is equivalent to an mmap of43* the /dev/kfd with the particular device encoded in the mmap offset.44* There will be other uses for mmap of /dev/kfd, so only a range of45* offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells.46*/4748/* # of doorbell bytes allocated for each process. */49size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)50{51if (!kfd->shared_resources.enable_mes)52return roundup(kfd->device_info.doorbell_size *53KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,54PAGE_SIZE);55else56return amdgpu_mes_doorbell_process_slice(57(struct amdgpu_device *)kfd->adev);58}5960/* Doorbell calculations for device init. */61int kfd_doorbell_init(struct kfd_dev *kfd)62{63int size = PAGE_SIZE;64int r;6566/*67* Todo: KFD kernel level operations need only one doorbell for68* ring test/HWS. So instead of reserving a whole page here for69* kernel, reserve and consume a doorbell from existing KGD kernel70* doorbell page.71*/7273/* Bitmap to dynamically allocate doorbells from kernel page */74kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL);75if (!kfd->doorbell_bitmap) {76DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");77return -ENOMEM;78}7980/* Alloc a doorbell page for KFD kernel usages */81r = amdgpu_bo_create_kernel(kfd->adev,82size,83PAGE_SIZE,84AMDGPU_GEM_DOMAIN_DOORBELL,85&kfd->doorbells,86NULL,87(void **)&kfd->doorbell_kernel_ptr);88if (r) {89pr_err("failed to allocate kernel doorbells\n");90bitmap_free(kfd->doorbell_bitmap);91return r;92}9394pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);95return 0;96}9798void kfd_doorbell_fini(struct kfd_dev *kfd)99{100bitmap_free(kfd->doorbell_bitmap);101amdgpu_bo_free_kernel(&kfd->doorbells, NULL,102(void **)&kfd->doorbell_kernel_ptr);103}104105int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process,106struct vm_area_struct *vma)107{108phys_addr_t address;109struct kfd_process_device *pdd;110111/*112* For simplicitly we only allow mapping of the entire doorbell113* allocation of a single device & process.114*/115if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev->kfd))116return -EINVAL;117118pdd = kfd_get_process_device_data(dev, process);119if (!pdd)120return -EINVAL;121122/* Calculate physical address of doorbell */123address = kfd_get_process_doorbells(pdd);124if (!address)125return -ENOMEM;126vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |127VM_DONTDUMP | VM_PFNMAP);128129vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);130131pr_debug("Mapping doorbell page\n"132" target user address == 0x%08llX\n"133" physical address == 0x%08llX\n"134" vm_flags == 0x%04lX\n"135" size == 0x%04lX\n",136(unsigned long long) vma->vm_start, address, vma->vm_flags,137kfd_doorbell_process_slice(dev->kfd));138139140return io_remap_pfn_range(vma,141vma->vm_start,142address >> PAGE_SHIFT,143kfd_doorbell_process_slice(dev->kfd),144vma->vm_page_prot);145}146147148/* get kernel iomem pointer for a doorbell */149void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,150unsigned int *doorbell_off)151{152u32 inx;153154mutex_lock(&kfd->doorbell_mutex);155inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));156157__set_bit(inx, kfd->doorbell_bitmap);158mutex_unlock(&kfd->doorbell_mutex);159160if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)161return NULL;162163*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev,164kfd->doorbells,165inx,166kfd->device_info.doorbell_size);167inx *= 2;168169pr_debug("Get kernel queue doorbell\n"170" doorbell offset == 0x%08X\n"171" doorbell index == 0x%x\n",172*doorbell_off, inx);173174return kfd->doorbell_kernel_ptr + inx;175}176177void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)178{179unsigned int inx;180181inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);182inx /= 2;183184mutex_lock(&kfd->doorbell_mutex);185__clear_bit(inx, kfd->doorbell_bitmap);186mutex_unlock(&kfd->doorbell_mutex);187}188189void write_kernel_doorbell(void __iomem *db, u32 value)190{191if (db) {192writel(value, db);193pr_debug("Writing %d to doorbell address %p\n", value, db);194}195}196197void write_kernel_doorbell64(void __iomem *db, u64 value)198{199if (db) {200WARN(((unsigned long)db & 7) != 0,201"Unaligned 64-bit doorbell");202writeq(value, (u64 __iomem *)db);203pr_debug("writing %llu to doorbell address %p\n", value, db);204}205}206207static int init_doorbell_bitmap(struct qcm_process_device *qpd,208struct kfd_dev *dev)209{210unsigned int i;211int range_start = dev->shared_resources.non_cp_doorbells_start;212int range_end = dev->shared_resources.non_cp_doorbells_end;213214if (!KFD_IS_SOC15(dev))215return 0;216217/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */218pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);219pr_debug("reserved doorbell 0x%03x - 0x%03x\n",220range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,221range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);222223for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {224if (i >= range_start && i <= range_end) {225__set_bit(i, qpd->doorbell_bitmap);226__set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,227qpd->doorbell_bitmap);228}229}230231return 0;232}233234phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)235{236struct amdgpu_device *adev = pdd->dev->adev;237uint32_t first_db_index;238239if (!pdd->qpd.proc_doorbells) {240if (kfd_alloc_process_doorbells(pdd->dev->kfd, pdd))241/* phys_addr_t 0 is error */242return 0;243}244245first_db_index = amdgpu_doorbell_index_on_bar(adev,246pdd->qpd.proc_doorbells,2470,248pdd->dev->kfd->device_info.doorbell_size);249return adev->doorbell.base + first_db_index * sizeof(uint32_t);250}251252int kfd_alloc_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)253{254int r;255struct qcm_process_device *qpd = &pdd->qpd;256257/* Allocate bitmap for dynamic doorbell allocation */258qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,259GFP_KERNEL);260if (!qpd->doorbell_bitmap) {261DRM_ERROR("Failed to allocate process doorbell bitmap\n");262return -ENOMEM;263}264265r = init_doorbell_bitmap(&pdd->qpd, kfd);266if (r) {267DRM_ERROR("Failed to initialize process doorbells\n");268r = -ENOMEM;269goto err;270}271272/* Allocate doorbells for this process */273r = amdgpu_bo_create_kernel(kfd->adev,274kfd_doorbell_process_slice(kfd),275PAGE_SIZE,276AMDGPU_GEM_DOMAIN_DOORBELL,277&qpd->proc_doorbells,278NULL,279NULL);280if (r) {281DRM_ERROR("Failed to allocate process doorbells\n");282goto err;283}284return 0;285286err:287bitmap_free(qpd->doorbell_bitmap);288qpd->doorbell_bitmap = NULL;289return r;290}291292void kfd_free_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)293{294struct qcm_process_device *qpd = &pdd->qpd;295296if (qpd->doorbell_bitmap) {297bitmap_free(qpd->doorbell_bitmap);298qpd->doorbell_bitmap = NULL;299}300301amdgpu_bo_free_kernel(&qpd->proc_doorbells, NULL, NULL);302}303304305