Path: blob/master/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
26516 views
// SPDX-License-Identifier: GPL-2.0 OR MIT1/*2* Copyright 2014-2022 Advanced Micro Devices, Inc.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice shall be included in12* all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR18* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,19* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR20* OTHER DEALINGS IN THE SOFTWARE.21*22*/2324#include <linux/slab.h>25#include <linux/mutex.h>26#include "kfd_device_queue_manager.h"27#include "kfd_kernel_queue.h"28#include "kfd_priv.h"2930#define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0)31#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1)32#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2)33#define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3)3435static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,36unsigned int buffer_size_bytes)37{38unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t);3940WARN((temp * sizeof(uint32_t)) > buffer_size_bytes,41"Runlist IB overflow");42*wptr = temp;43}4445static void pm_calc_rlib_size(struct packet_manager *pm,46unsigned int *rlib_size,47int *over_subscription,48int xnack_conflict)49{50unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;51unsigned int map_queue_size;52unsigned int max_proc_per_quantum = 1;53struct kfd_node *node = pm->dqm->dev;54struct device *dev = node->adev->dev;5556process_count = pm->dqm->processes_count;57queue_count = pm->dqm->active_queue_count;58compute_queue_count = pm->dqm->active_cp_queue_count;59gws_queue_count = pm->dqm->gws_queue_count;6061/* check if there is over subscription62* Note: the arbitration between the number of VMIDs and63* hws_max_conc_proc has been done in64* kgd2kfd_device_init().65*/66*over_subscription = 0;6768if (node->max_proc_per_quantum > 1)69max_proc_per_quantum = node->max_proc_per_quantum;7071if (process_count > max_proc_per_quantum)72*over_subscription |= OVER_SUBSCRIPTION_PROCESS_COUNT;73if (compute_queue_count > get_cp_queues_num(pm->dqm))74*over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT;75if (gws_queue_count > 1)76*over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT;77if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))78*over_subscription |= OVER_SUBSCRIPTION_XNACK_CONFLICT;7980if (*over_subscription)81dev_dbg(dev, "Over subscribed runlist\n");8283map_queue_size = pm->pmf->map_queues_size;84/* calculate run list ib allocation size */85*rlib_size = process_count * pm->pmf->map_process_size +86queue_count * map_queue_size;8788/*89* Increase the allocation size in case we need a chained run list90* when over subscription91*/92if (*over_subscription)93*rlib_size += pm->pmf->runlist_size;9495dev_dbg(dev, "runlist ib size %d\n", *rlib_size);96}9798static int pm_allocate_runlist_ib(struct packet_manager *pm,99unsigned int **rl_buffer,100uint64_t *rl_gpu_buffer,101unsigned int *rl_buffer_size,102int *is_over_subscription,103int xnack_conflict)104{105struct kfd_node *node = pm->dqm->dev;106struct device *dev = node->adev->dev;107int retval;108109if (WARN_ON(pm->allocated))110return -EINVAL;111112pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription,113xnack_conflict);114115mutex_lock(&pm->lock);116117retval = kfd_gtt_sa_allocate(node, *rl_buffer_size, &pm->ib_buffer_obj);118119if (retval) {120dev_err(dev, "Failed to allocate runlist IB\n");121goto out;122}123124*(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;125*rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr;126127memset(*rl_buffer, 0, *rl_buffer_size);128pm->allocated = true;129130out:131mutex_unlock(&pm->lock);132return retval;133}134135static int pm_create_runlist_ib(struct packet_manager *pm,136struct list_head *queues,137uint64_t *rl_gpu_addr,138size_t *rl_size_bytes)139{140unsigned int alloc_size_bytes;141unsigned int *rl_buffer, rl_wptr, i;142struct kfd_node *node = pm->dqm->dev;143struct device *dev = node->adev->dev;144int retval, processes_mapped;145struct device_process_node *cur;146struct qcm_process_device *qpd;147struct queue *q;148struct kernel_queue *kq;149int is_over_subscription;150int xnack_enabled = -1;151bool xnack_conflict = 0;152153rl_wptr = retval = processes_mapped = 0;154155/* Check if processes set different xnack modes */156list_for_each_entry(cur, queues, list) {157qpd = cur->qpd;158if (xnack_enabled < 0)159/* First process */160xnack_enabled = qpd->pqm->process->xnack_enabled;161else if (qpd->pqm->process->xnack_enabled != xnack_enabled) {162/* Found a process with a different xnack mode */163xnack_conflict = 1;164break;165}166}167168retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,169&alloc_size_bytes, &is_over_subscription,170xnack_conflict);171if (retval)172return retval;173174*rl_size_bytes = alloc_size_bytes;175pm->ib_size_bytes = alloc_size_bytes;176177dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n",178pm->dqm->processes_count, pm->dqm->active_queue_count);179180build_runlist_ib:181/* build the run list ib packet */182list_for_each_entry(cur, queues, list) {183qpd = cur->qpd;184/* group processes with the same xnack mode together */185if (qpd->pqm->process->xnack_enabled != xnack_enabled)186continue;187/* build map process packet */188if (processes_mapped >= pm->dqm->processes_count) {189dev_dbg(dev, "Not enough space left in runlist IB\n");190pm_release_ib(pm);191return -ENOMEM;192}193194retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd);195if (retval)196return retval;197198processes_mapped++;199inc_wptr(&rl_wptr, pm->pmf->map_process_size,200alloc_size_bytes);201202list_for_each_entry(kq, &qpd->priv_queue_list, list) {203if (!kq->queue->properties.is_active)204continue;205206dev_dbg(dev,207"static_queue, mapping kernel q %d, is debug status %d\n",208kq->queue->queue, qpd->is_debug);209210retval = pm->pmf->map_queues(pm,211&rl_buffer[rl_wptr],212kq->queue,213qpd->is_debug);214if (retval)215return retval;216217inc_wptr(&rl_wptr,218pm->pmf->map_queues_size,219alloc_size_bytes);220}221222list_for_each_entry(q, &qpd->queues_list, list) {223if (!q->properties.is_active)224continue;225226dev_dbg(dev,227"static_queue, mapping user queue %d, is debug status %d\n",228q->queue, qpd->is_debug);229230retval = pm->pmf->map_queues(pm,231&rl_buffer[rl_wptr],232q,233qpd->is_debug);234235if (retval)236return retval;237238inc_wptr(&rl_wptr,239pm->pmf->map_queues_size,240alloc_size_bytes);241}242}243if (xnack_conflict) {244/* pick up processes with the other xnack mode */245xnack_enabled = !xnack_enabled;246xnack_conflict = 0;247goto build_runlist_ib;248}249250dev_dbg(dev, "Finished map process and queues to runlist\n");251252if (is_over_subscription) {253if (!pm->is_over_subscription)254dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n",255is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ?256" too many processes" : "",257is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ?258" too many queues" : "",259is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ?260" multiple processes using cooperative launch" : "",261is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ?262" xnack on/off processes mixed on gfx9" : "");263264retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],265*rl_gpu_addr,266alloc_size_bytes / sizeof(uint32_t),267true);268}269pm->is_over_subscription = !!is_over_subscription;270271for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)272pr_debug("0x%2X ", rl_buffer[i]);273pr_debug("\n");274275return retval;276}277278int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)279{280switch (dqm->dev->adev->asic_type) {281case CHIP_KAVERI:282case CHIP_HAWAII:283/* PM4 packet structures on CIK are the same as on VI */284case CHIP_CARRIZO:285case CHIP_TONGA:286case CHIP_FIJI:287case CHIP_POLARIS10:288case CHIP_POLARIS11:289case CHIP_POLARIS12:290case CHIP_VEGAM:291pm->pmf = &kfd_vi_pm_funcs;292break;293default:294if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) ||295KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) ||296KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) ||297KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0))298pm->pmf = &kfd_aldebaran_pm_funcs;299else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))300pm->pmf = &kfd_v9_pm_funcs;301else {302WARN(1, "Unexpected ASIC family %u",303dqm->dev->adev->asic_type);304return -EINVAL;305}306}307308pm->dqm = dqm;309mutex_init(&pm->lock);310pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);311if (!pm->priv_queue) {312mutex_destroy(&pm->lock);313return -ENOMEM;314}315pm->allocated = false;316317return 0;318}319320void pm_uninit(struct packet_manager *pm)321{322mutex_destroy(&pm->lock);323kernel_queue_uninit(pm->priv_queue);324pm->priv_queue = NULL;325}326327int pm_send_set_resources(struct packet_manager *pm,328struct scheduling_resources *res)329{330struct kfd_node *node = pm->dqm->dev;331struct device *dev = node->adev->dev;332uint32_t *buffer, size;333int retval = 0;334335size = pm->pmf->set_resources_size;336mutex_lock(&pm->lock);337kq_acquire_packet_buffer(pm->priv_queue,338size / sizeof(uint32_t),339(unsigned int **)&buffer);340if (!buffer) {341dev_err(dev, "Failed to allocate buffer on kernel queue\n");342retval = -ENOMEM;343goto out;344}345346retval = pm->pmf->set_resources(pm, buffer, res);347if (!retval)348retval = kq_submit_packet(pm->priv_queue);349else350kq_rollback_packet(pm->priv_queue);351352out:353mutex_unlock(&pm->lock);354355return retval;356}357358int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)359{360uint64_t rl_gpu_ib_addr;361uint32_t *rl_buffer;362size_t rl_ib_size, packet_size_dwords;363int retval;364365retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,366&rl_ib_size);367if (retval)368goto fail_create_runlist_ib;369370pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);371372packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);373mutex_lock(&pm->lock);374375retval = kq_acquire_packet_buffer(pm->priv_queue,376packet_size_dwords, &rl_buffer);377if (retval)378goto fail_acquire_packet_buffer;379380retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,381rl_ib_size / sizeof(uint32_t), false);382if (retval)383goto fail_create_runlist;384385retval = kq_submit_packet(pm->priv_queue);386387mutex_unlock(&pm->lock);388389return retval;390391fail_create_runlist:392kq_rollback_packet(pm->priv_queue);393fail_acquire_packet_buffer:394mutex_unlock(&pm->lock);395fail_create_runlist_ib:396pm_release_ib(pm);397return retval;398}399400int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,401uint64_t fence_value)402{403struct kfd_node *node = pm->dqm->dev;404struct device *dev = node->adev->dev;405uint32_t *buffer, size;406int retval = 0;407408if (WARN_ON(!fence_address))409return -EFAULT;410411size = pm->pmf->query_status_size;412mutex_lock(&pm->lock);413kq_acquire_packet_buffer(pm->priv_queue,414size / sizeof(uint32_t), (unsigned int **)&buffer);415if (!buffer) {416dev_err(dev, "Failed to allocate buffer on kernel queue\n");417retval = -ENOMEM;418goto out;419}420421retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);422if (!retval)423retval = kq_submit_packet(pm->priv_queue);424else425kq_rollback_packet(pm->priv_queue);426427out:428mutex_unlock(&pm->lock);429return retval;430}431432/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts433* by writing to CP_IQ_WAIT_TIME2 registers.434*435* @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition436* @value: Depends on the cmd. This parameter is unused for437* KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For438* KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set439*440*/441int pm_config_dequeue_wait_counts(struct packet_manager *pm,442enum kfd_config_dequeue_wait_counts_cmd cmd,443uint32_t value)444{445struct kfd_node *node = pm->dqm->dev;446struct device *dev = node->adev->dev;447int retval = 0;448uint32_t *buffer, size;449450if (!pm->pmf->config_dequeue_wait_counts ||451!pm->pmf->config_dequeue_wait_counts_size)452return 0;453454if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||455KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)))456return 0;457458size = pm->pmf->config_dequeue_wait_counts_size;459460mutex_lock(&pm->lock);461462if (size) {463kq_acquire_packet_buffer(pm->priv_queue,464size / sizeof(uint32_t),465(unsigned int **)&buffer);466467if (!buffer) {468dev_err(dev,469"Failed to allocate buffer on kernel queue\n");470retval = -ENOMEM;471goto out;472}473474retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,475cmd, value);476if (!retval) {477retval = kq_submit_packet(pm->priv_queue);478479/* If default value is modified, cache that in dqm->wait_times */480if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)481update_dqm_wait_times(pm->dqm);482} else {483kq_rollback_packet(pm->priv_queue);484}485}486out:487mutex_unlock(&pm->lock);488return retval;489}490491int pm_send_unmap_queue(struct packet_manager *pm,492enum kfd_unmap_queues_filter filter,493uint32_t filter_param, bool reset)494{495struct kfd_node *node = pm->dqm->dev;496struct device *dev = node->adev->dev;497uint32_t *buffer, size;498int retval = 0;499500size = pm->pmf->unmap_queues_size;501mutex_lock(&pm->lock);502kq_acquire_packet_buffer(pm->priv_queue,503size / sizeof(uint32_t), (unsigned int **)&buffer);504if (!buffer) {505dev_err(dev, "Failed to allocate buffer on kernel queue\n");506retval = -ENOMEM;507goto out;508}509510retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset);511if (!retval)512retval = kq_submit_packet(pm->priv_queue);513else514kq_rollback_packet(pm->priv_queue);515516out:517mutex_unlock(&pm->lock);518return retval;519}520521void pm_release_ib(struct packet_manager *pm)522{523mutex_lock(&pm->lock);524if (pm->allocated) {525kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);526pm->allocated = false;527}528mutex_unlock(&pm->lock);529}530531#if defined(CONFIG_DEBUG_FS)532533int pm_debugfs_runlist(struct seq_file *m, void *data)534{535struct packet_manager *pm = data;536537mutex_lock(&pm->lock);538539if (!pm->allocated) {540seq_puts(m, " No active runlist\n");541goto out;542}543544seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,545pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);546547out:548mutex_unlock(&pm->lock);549return 0;550}551552int pm_debugfs_hang_hws(struct packet_manager *pm)553{554struct kfd_node *node = pm->dqm->dev;555struct device *dev = node->adev->dev;556uint32_t *buffer, size;557int r = 0;558559if (!pm->priv_queue)560return -EAGAIN;561562size = pm->pmf->query_status_size;563mutex_lock(&pm->lock);564kq_acquire_packet_buffer(pm->priv_queue,565size / sizeof(uint32_t), (unsigned int **)&buffer);566if (!buffer) {567dev_err(dev, "Failed to allocate buffer on kernel queue\n");568r = -ENOMEM;569goto out;570}571memset(buffer, 0x55, size);572kq_submit_packet(pm->priv_queue);573574dev_info(dev, "Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",575buffer[0], buffer[1], buffer[2], buffer[3], buffer[4],576buffer[5], buffer[6]);577out:578mutex_unlock(&pm->lock);579return r;580}581582583#endif584585586