Path: blob/master/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
26517 views
/*1* Copyright 2008 Jerome Glisse.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR19* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,20* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER21* DEALINGS IN THE SOFTWARE.22*23* Authors:24* Jerome Glisse <[email protected]>25*/2627#include <linux/file.h>28#include <linux/pagemap.h>29#include <linux/sync_file.h>30#include <linux/dma-buf.h>3132#include <drm/amdgpu_drm.h>33#include <drm/drm_syncobj.h>34#include <drm/ttm/ttm_tt.h>3536#include "amdgpu_cs.h"37#include "amdgpu.h"38#include "amdgpu_trace.h"39#include "amdgpu_gmc.h"40#include "amdgpu_gem.h"41#include "amdgpu_ras.h"4243static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,44struct amdgpu_device *adev,45struct drm_file *filp,46union drm_amdgpu_cs *cs)47{48struct amdgpu_fpriv *fpriv = filp->driver_priv;4950if (cs->in.num_chunks == 0)51return -EINVAL;5253memset(p, 0, sizeof(*p));54p->adev = adev;55p->filp = filp;5657p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);58if (!p->ctx)59return -EINVAL;6061if (atomic_read(&p->ctx->guilty)) {62amdgpu_ctx_put(p->ctx);63return -ECANCELED;64}6566amdgpu_sync_create(&p->sync);67drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |68DRM_EXEC_IGNORE_DUPLICATES, 0);69return 0;70}7172static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,73struct drm_amdgpu_cs_chunk_ib *chunk_ib)74{75struct drm_sched_entity *entity;76unsigned int i;77int r;7879r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,80chunk_ib->ip_instance,81chunk_ib->ring, &entity);82if (r)83return r;8485/*86* Abort if there is no run queue associated with this entity.87* Possibly because of disabled HW IP.88*/89if (entity->rq == NULL)90return -EINVAL;9192/* Check if we can add this IB to some existing job */93for (i = 0; i < p->gang_size; ++i)94if (p->entities[i] == entity)95return i;9697/* If not increase the gang size if possible */98if (i == AMDGPU_CS_GANG_SIZE)99return -EINVAL;100101p->entities[i] = entity;102p->gang_size = i + 1;103return i;104}105106static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,107struct drm_amdgpu_cs_chunk_ib *chunk_ib,108unsigned int *num_ibs)109{110int r;111112r = amdgpu_cs_job_idx(p, chunk_ib);113if (r < 0)114return r;115116if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))117return -EINVAL;118119++(num_ibs[r]);120p->gang_leader_idx = r;121return 0;122}123124static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,125struct drm_amdgpu_cs_chunk_fence *data,126uint32_t *offset)127{128struct drm_gem_object *gobj;129unsigned long size;130131gobj = drm_gem_object_lookup(p->filp, data->handle);132if (gobj == NULL)133return -EINVAL;134135p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));136drm_gem_object_put(gobj);137138size = amdgpu_bo_size(p->uf_bo);139if (size != PAGE_SIZE || data->offset > (size - 8))140return -EINVAL;141142if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm))143return -EINVAL;144145*offset = data->offset;146return 0;147}148149static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,150struct drm_amdgpu_bo_list_in *data)151{152struct drm_amdgpu_bo_list_entry *info;153int r;154155r = amdgpu_bo_create_list_entry_array(data, &info);156if (r)157return r;158159r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,160&p->bo_list);161if (r)162goto error_free;163164kvfree(info);165return 0;166167error_free:168kvfree(info);169170return r;171}172173/* Copy the data from userspace and go over it the first time */174static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,175union drm_amdgpu_cs *cs)176{177struct amdgpu_fpriv *fpriv = p->filp->driver_priv;178unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };179struct amdgpu_vm *vm = &fpriv->vm;180uint64_t *chunk_array_user;181uint64_t *chunk_array;182uint32_t uf_offset = 0;183size_t size;184int ret;185int i;186187chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t),188GFP_KERNEL);189if (!chunk_array)190return -ENOMEM;191192/* get chunks */193chunk_array_user = u64_to_user_ptr(cs->in.chunks);194if (copy_from_user(chunk_array, chunk_array_user,195sizeof(uint64_t)*cs->in.num_chunks)) {196ret = -EFAULT;197goto free_chunk;198}199200p->nchunks = cs->in.num_chunks;201p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),202GFP_KERNEL);203if (!p->chunks) {204ret = -ENOMEM;205goto free_chunk;206}207208for (i = 0; i < p->nchunks; i++) {209struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;210struct drm_amdgpu_cs_chunk user_chunk;211uint32_t __user *cdata;212213chunk_ptr = u64_to_user_ptr(chunk_array[i]);214if (copy_from_user(&user_chunk, chunk_ptr,215sizeof(struct drm_amdgpu_cs_chunk))) {216ret = -EFAULT;217i--;218goto free_partial_kdata;219}220p->chunks[i].chunk_id = user_chunk.chunk_id;221p->chunks[i].length_dw = user_chunk.length_dw;222223size = p->chunks[i].length_dw;224cdata = u64_to_user_ptr(user_chunk.chunk_data);225226p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t),227GFP_KERNEL);228if (p->chunks[i].kdata == NULL) {229ret = -ENOMEM;230i--;231goto free_partial_kdata;232}233size *= sizeof(uint32_t);234if (copy_from_user(p->chunks[i].kdata, cdata, size)) {235ret = -EFAULT;236goto free_partial_kdata;237}238239/* Assume the worst on the following checks */240ret = -EINVAL;241switch (p->chunks[i].chunk_id) {242case AMDGPU_CHUNK_ID_IB:243if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))244goto free_partial_kdata;245246ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);247if (ret)248goto free_partial_kdata;249break;250251case AMDGPU_CHUNK_ID_FENCE:252if (size < sizeof(struct drm_amdgpu_cs_chunk_fence))253goto free_partial_kdata;254255ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata,256&uf_offset);257if (ret)258goto free_partial_kdata;259break;260261case AMDGPU_CHUNK_ID_BO_HANDLES:262if (size < sizeof(struct drm_amdgpu_bo_list_in))263goto free_partial_kdata;264265/* Only a single BO list is allowed to simplify handling. */266if (p->bo_list)267goto free_partial_kdata;268269ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);270if (ret)271goto free_partial_kdata;272break;273274case AMDGPU_CHUNK_ID_DEPENDENCIES:275case AMDGPU_CHUNK_ID_SYNCOBJ_IN:276case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:277case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:278case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:279case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:280case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:281break;282283default:284goto free_partial_kdata;285}286}287288if (!p->gang_size) {289ret = -EINVAL;290goto free_all_kdata;291}292293for (i = 0; i < p->gang_size; ++i) {294ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,295num_ibs[i], &p->jobs[i],296p->filp->client_id);297if (ret)298goto free_all_kdata;299switch (p->adev->enforce_isolation[fpriv->xcp_id]) {300case AMDGPU_ENFORCE_ISOLATION_DISABLE:301default:302p->jobs[i]->enforce_isolation = false;303p->jobs[i]->run_cleaner_shader = false;304break;305case AMDGPU_ENFORCE_ISOLATION_ENABLE:306p->jobs[i]->enforce_isolation = true;307p->jobs[i]->run_cleaner_shader = true;308break;309case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY:310p->jobs[i]->enforce_isolation = true;311p->jobs[i]->run_cleaner_shader = false;312break;313case AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER:314p->jobs[i]->enforce_isolation = true;315p->jobs[i]->run_cleaner_shader = false;316break;317}318}319p->gang_leader = p->jobs[p->gang_leader_idx];320321if (p->ctx->generation != p->gang_leader->generation) {322ret = -ECANCELED;323goto free_all_kdata;324}325326if (p->uf_bo)327p->gang_leader->uf_addr = uf_offset;328kvfree(chunk_array);329330/* Use this opportunity to fill in task info for the vm */331amdgpu_vm_set_task_info(vm);332333return 0;334335free_all_kdata:336i = p->nchunks - 1;337free_partial_kdata:338for (; i >= 0; i--)339kvfree(p->chunks[i].kdata);340kvfree(p->chunks);341p->chunks = NULL;342p->nchunks = 0;343free_chunk:344kvfree(chunk_array);345346return ret;347}348349static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,350struct amdgpu_cs_chunk *chunk,351unsigned int *ce_preempt,352unsigned int *de_preempt)353{354struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;355struct amdgpu_fpriv *fpriv = p->filp->driver_priv;356struct amdgpu_vm *vm = &fpriv->vm;357struct amdgpu_ring *ring;358struct amdgpu_job *job;359struct amdgpu_ib *ib;360int r;361362r = amdgpu_cs_job_idx(p, chunk_ib);363if (r < 0)364return r;365366job = p->jobs[r];367ring = amdgpu_job_ring(job);368ib = &job->ibs[job->num_ibs++];369370/* submissions to kernel queues are disabled */371if (ring->no_user_submission)372return -EINVAL;373374/* MM engine doesn't support user fences */375if (p->uf_bo && ring->funcs->no_user_fence)376return -EINVAL;377378if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&379chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {380if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)381(*ce_preempt)++;382else383(*de_preempt)++;384385/* Each GFX command submit allows only 1 IB max386* preemptible for CE & DE */387if (*ce_preempt > 1 || *de_preempt > 1)388return -EINVAL;389}390391if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)392job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;393394r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?395chunk_ib->ib_bytes : 0,396AMDGPU_IB_POOL_DELAYED, ib);397if (r) {398DRM_ERROR("Failed to get ib !\n");399return r;400}401402ib->gpu_addr = chunk_ib->va_start;403ib->length_dw = chunk_ib->ib_bytes / 4;404ib->flags = chunk_ib->flags;405return 0;406}407408static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,409struct amdgpu_cs_chunk *chunk)410{411struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;412struct amdgpu_fpriv *fpriv = p->filp->driver_priv;413unsigned int num_deps;414int i, r;415416num_deps = chunk->length_dw * 4 /417sizeof(struct drm_amdgpu_cs_chunk_dep);418419for (i = 0; i < num_deps; ++i) {420struct amdgpu_ctx *ctx;421struct drm_sched_entity *entity;422struct dma_fence *fence;423424ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);425if (ctx == NULL)426return -EINVAL;427428r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,429deps[i].ip_instance,430deps[i].ring, &entity);431if (r) {432amdgpu_ctx_put(ctx);433return r;434}435436fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);437amdgpu_ctx_put(ctx);438439if (IS_ERR(fence))440return PTR_ERR(fence);441else if (!fence)442continue;443444if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {445struct drm_sched_fence *s_fence;446struct dma_fence *old = fence;447448s_fence = to_drm_sched_fence(fence);449fence = dma_fence_get(&s_fence->scheduled);450dma_fence_put(old);451}452453r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);454dma_fence_put(fence);455if (r)456return r;457}458return 0;459}460461static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,462uint32_t handle, u64 point,463u64 flags)464{465struct dma_fence *fence;466int r;467468r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);469if (r) {470DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",471handle, point, r);472return r;473}474475r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);476dma_fence_put(fence);477return r;478}479480static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,481struct amdgpu_cs_chunk *chunk)482{483struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;484unsigned int num_deps;485int i, r;486487num_deps = chunk->length_dw * 4 /488sizeof(struct drm_amdgpu_cs_chunk_sem);489for (i = 0; i < num_deps; ++i) {490r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0);491if (r)492return r;493}494495return 0;496}497498static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,499struct amdgpu_cs_chunk *chunk)500{501struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;502unsigned int num_deps;503int i, r;504505num_deps = chunk->length_dw * 4 /506sizeof(struct drm_amdgpu_cs_chunk_syncobj);507for (i = 0; i < num_deps; ++i) {508r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle,509syncobj_deps[i].point,510syncobj_deps[i].flags);511if (r)512return r;513}514515return 0;516}517518static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,519struct amdgpu_cs_chunk *chunk)520{521struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;522unsigned int num_deps;523int i;524525num_deps = chunk->length_dw * 4 /526sizeof(struct drm_amdgpu_cs_chunk_sem);527528if (p->post_deps)529return -EINVAL;530531p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),532GFP_KERNEL);533p->num_post_deps = 0;534535if (!p->post_deps)536return -ENOMEM;537538539for (i = 0; i < num_deps; ++i) {540p->post_deps[i].syncobj =541drm_syncobj_find(p->filp, deps[i].handle);542if (!p->post_deps[i].syncobj)543return -EINVAL;544p->post_deps[i].chain = NULL;545p->post_deps[i].point = 0;546p->num_post_deps++;547}548549return 0;550}551552static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,553struct amdgpu_cs_chunk *chunk)554{555struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;556unsigned int num_deps;557int i;558559num_deps = chunk->length_dw * 4 /560sizeof(struct drm_amdgpu_cs_chunk_syncobj);561562if (p->post_deps)563return -EINVAL;564565p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),566GFP_KERNEL);567p->num_post_deps = 0;568569if (!p->post_deps)570return -ENOMEM;571572for (i = 0; i < num_deps; ++i) {573struct amdgpu_cs_post_dep *dep = &p->post_deps[i];574575dep->chain = NULL;576if (syncobj_deps[i].point) {577dep->chain = dma_fence_chain_alloc();578if (!dep->chain)579return -ENOMEM;580}581582dep->syncobj = drm_syncobj_find(p->filp,583syncobj_deps[i].handle);584if (!dep->syncobj) {585dma_fence_chain_free(dep->chain);586return -EINVAL;587}588dep->point = syncobj_deps[i].point;589p->num_post_deps++;590}591592return 0;593}594595static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,596struct amdgpu_cs_chunk *chunk)597{598struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;599int i;600601if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)602return -EINVAL;603604for (i = 0; i < p->gang_size; ++i) {605p->jobs[i]->shadow_va = shadow->shadow_va;606p->jobs[i]->csa_va = shadow->csa_va;607p->jobs[i]->gds_va = shadow->gds_va;608p->jobs[i]->init_shadow =609shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;610}611612return 0;613}614615static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)616{617unsigned int ce_preempt = 0, de_preempt = 0;618int i, r;619620for (i = 0; i < p->nchunks; ++i) {621struct amdgpu_cs_chunk *chunk;622623chunk = &p->chunks[i];624625switch (chunk->chunk_id) {626case AMDGPU_CHUNK_ID_IB:627r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);628if (r)629return r;630break;631case AMDGPU_CHUNK_ID_DEPENDENCIES:632case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:633r = amdgpu_cs_p2_dependencies(p, chunk);634if (r)635return r;636break;637case AMDGPU_CHUNK_ID_SYNCOBJ_IN:638r = amdgpu_cs_p2_syncobj_in(p, chunk);639if (r)640return r;641break;642case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:643r = amdgpu_cs_p2_syncobj_out(p, chunk);644if (r)645return r;646break;647case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:648r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);649if (r)650return r;651break;652case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:653r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);654if (r)655return r;656break;657case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:658r = amdgpu_cs_p2_shadow(p, chunk);659if (r)660return r;661break;662}663}664665return 0;666}667668/* Convert microseconds to bytes. */669static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)670{671if (us <= 0 || !adev->mm_stats.log2_max_MBps)672return 0;673674/* Since accum_us is incremented by a million per second, just675* multiply it by the number of MB/s to get the number of bytes.676*/677return us << adev->mm_stats.log2_max_MBps;678}679680static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)681{682if (!adev->mm_stats.log2_max_MBps)683return 0;684685return bytes >> adev->mm_stats.log2_max_MBps;686}687688/* Returns how many bytes TTM can move right now. If no bytes can be moved,689* it returns 0. If it returns non-zero, it's OK to move at least one buffer,690* which means it can go over the threshold once. If that happens, the driver691* will be in debt and no other buffer migrations can be done until that debt692* is repaid.693*694* This approach allows moving a buffer of any size (it's important to allow695* that).696*697* The currency is simply time in microseconds and it increases as the clock698* ticks. The accumulated microseconds (us) are converted to bytes and699* returned.700*/701static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,702u64 *max_bytes,703u64 *max_vis_bytes)704{705s64 time_us, increment_us;706u64 free_vram, total_vram, used_vram;707/* Allow a maximum of 200 accumulated ms. This is basically per-IB708* throttling.709*710* It means that in order to get full max MBps, at least 5 IBs per711* second must be submitted and not more than 200ms apart from each712* other.713*/714const s64 us_upper_bound = 200000;715716if (!adev->mm_stats.log2_max_MBps) {717*max_bytes = 0;718*max_vis_bytes = 0;719return;720}721722total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);723used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);724free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;725726spin_lock(&adev->mm_stats.lock);727728/* Increase the amount of accumulated us. */729time_us = ktime_to_us(ktime_get());730increment_us = time_us - adev->mm_stats.last_update_us;731adev->mm_stats.last_update_us = time_us;732adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,733us_upper_bound);734735/* This prevents the short period of low performance when the VRAM736* usage is low and the driver is in debt or doesn't have enough737* accumulated us to fill VRAM quickly.738*739* The situation can occur in these cases:740* - a lot of VRAM is freed by userspace741* - the presence of a big buffer causes a lot of evictions742* (solution: split buffers into smaller ones)743*744* If 128 MB or 1/8th of VRAM is free, start filling it now by setting745* accum_us to a positive number.746*/747if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {748s64 min_us;749750/* Be more aggressive on dGPUs. Try to fill a portion of free751* VRAM now.752*/753if (!(adev->flags & AMD_IS_APU))754min_us = bytes_to_us(adev, free_vram / 4);755else756min_us = 0; /* Reset accum_us on APUs. */757758adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);759}760761/* This is set to 0 if the driver is in debt to disallow (optional)762* buffer moves.763*/764*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);765766/* Do the same for visible VRAM if half of it is free */767if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {768u64 total_vis_vram = adev->gmc.visible_vram_size;769u64 used_vis_vram =770amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);771772if (used_vis_vram < total_vis_vram) {773u64 free_vis_vram = total_vis_vram - used_vis_vram;774775adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +776increment_us, us_upper_bound);777778if (free_vis_vram >= total_vis_vram / 2)779adev->mm_stats.accum_us_vis =780max(bytes_to_us(adev, free_vis_vram / 2),781adev->mm_stats.accum_us_vis);782}783784*max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);785} else {786*max_vis_bytes = 0;787}788789spin_unlock(&adev->mm_stats.lock);790}791792/* Report how many bytes have really been moved for the last command793* submission. This can result in a debt that can stop buffer migrations794* temporarily.795*/796void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,797u64 num_vis_bytes)798{799spin_lock(&adev->mm_stats.lock);800adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);801adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);802spin_unlock(&adev->mm_stats.lock);803}804805static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)806{807struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);808struct amdgpu_cs_parser *p = param;809struct ttm_operation_ctx ctx = {810.interruptible = true,811.no_wait_gpu = false,812.resv = bo->tbo.base.resv813};814uint32_t domain;815int r;816817if (bo->tbo.pin_count)818return 0;819820/* Don't move this buffer if we have depleted our allowance821* to move it. Don't move anything if the threshold is zero.822*/823if (p->bytes_moved < p->bytes_moved_threshold &&824(!bo->tbo.base.dma_buf ||825list_empty(&bo->tbo.base.dma_buf->attachments))) {826if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&827(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {828/* And don't move a CPU_ACCESS_REQUIRED BO to limited829* visible VRAM if we've depleted our allowance to do830* that.831*/832if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)833domain = bo->preferred_domains;834else835domain = bo->allowed_domains;836} else {837domain = bo->preferred_domains;838}839} else {840domain = bo->allowed_domains;841}842843retry:844amdgpu_bo_placement_from_domain(bo, domain);845r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);846847p->bytes_moved += ctx.bytes_moved;848if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&849amdgpu_res_cpu_visible(adev, bo->tbo.resource))850p->bytes_moved_vis += ctx.bytes_moved;851852if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {853domain = bo->allowed_domains;854goto retry;855}856857return r;858}859860static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,861union drm_amdgpu_cs *cs)862{863struct amdgpu_fpriv *fpriv = p->filp->driver_priv;864struct ttm_operation_ctx ctx = { true, false };865struct amdgpu_vm *vm = &fpriv->vm;866struct amdgpu_bo_list_entry *e;867struct drm_gem_object *obj;868unsigned long index;869unsigned int i;870int r;871872/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */873if (cs->in.bo_list_handle) {874if (p->bo_list)875return -EINVAL;876877r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,878&p->bo_list);879if (r)880return r;881} else if (!p->bo_list) {882/* Create a empty bo_list when no handle is provided */883r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,884&p->bo_list);885if (r)886return r;887}888889mutex_lock(&p->bo_list->bo_list_mutex);890891/* Get userptr backing pages. If pages are updated after registered892* in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do893* amdgpu_ttm_backend_bind() to flush and invalidate new pages894*/895amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {896bool userpage_invalidated = false;897struct amdgpu_bo *bo = e->bo;898int i;899900e->user_pages = kvcalloc(bo->tbo.ttm->num_pages,901sizeof(struct page *),902GFP_KERNEL);903if (!e->user_pages) {904DRM_ERROR("kvmalloc_array failure\n");905r = -ENOMEM;906goto out_free_user_pages;907}908909r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range);910if (r) {911kvfree(e->user_pages);912e->user_pages = NULL;913goto out_free_user_pages;914}915916for (i = 0; i < bo->tbo.ttm->num_pages; i++) {917if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {918userpage_invalidated = true;919break;920}921}922e->user_invalidated = userpage_invalidated;923}924925drm_exec_until_all_locked(&p->exec) {926r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size);927drm_exec_retry_on_contention(&p->exec);928if (unlikely(r))929goto out_free_user_pages;930931amdgpu_bo_list_for_each_entry(e, p->bo_list) {932/* One fence for TTM and one for each CS job */933r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base,9341 + p->gang_size);935drm_exec_retry_on_contention(&p->exec);936if (unlikely(r))937goto out_free_user_pages;938939e->bo_va = amdgpu_vm_bo_find(vm, e->bo);940}941942if (p->uf_bo) {943r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base,9441 + p->gang_size);945drm_exec_retry_on_contention(&p->exec);946if (unlikely(r))947goto out_free_user_pages;948}949}950951amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {952struct mm_struct *usermm;953954usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm);955if (usermm && usermm != current->mm) {956r = -EPERM;957goto out_free_user_pages;958}959960if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&961e->user_invalidated && e->user_pages) {962amdgpu_bo_placement_from_domain(e->bo,963AMDGPU_GEM_DOMAIN_CPU);964r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,965&ctx);966if (r)967goto out_free_user_pages;968969amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,970e->user_pages);971}972973kvfree(e->user_pages);974e->user_pages = NULL;975}976977amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,978&p->bytes_moved_vis_threshold);979p->bytes_moved = 0;980p->bytes_moved_vis = 0;981982r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,983amdgpu_cs_bo_validate, p);984if (r) {985DRM_ERROR("amdgpu_vm_validate() failed.\n");986goto out_free_user_pages;987}988989drm_exec_for_each_locked_object(&p->exec, index, obj) {990r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj));991if (unlikely(r))992goto out_free_user_pages;993}994995if (p->uf_bo) {996r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo);997if (unlikely(r))998goto out_free_user_pages;9991000p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo);1001}10021003amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,1004p->bytes_moved_vis);10051006for (i = 0; i < p->gang_size; ++i)1007amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,1008p->bo_list->gws_obj,1009p->bo_list->oa_obj);1010return 0;10111012out_free_user_pages:1013amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {1014struct amdgpu_bo *bo = e->bo;10151016if (!e->user_pages)1017continue;1018amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);1019kvfree(e->user_pages);1020e->user_pages = NULL;1021e->range = NULL;1022}1023mutex_unlock(&p->bo_list->bo_list_mutex);1024return r;1025}10261027static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)1028{1029int i, j;10301031if (!trace_amdgpu_cs_enabled())1032return;10331034for (i = 0; i < p->gang_size; ++i) {1035struct amdgpu_job *job = p->jobs[i];10361037for (j = 0; j < job->num_ibs; ++j)1038trace_amdgpu_cs(p, job, &job->ibs[j]);1039}1040}10411042static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,1043struct amdgpu_job *job)1044{1045struct amdgpu_ring *ring = amdgpu_job_ring(job);1046unsigned int i;1047int r;10481049/* Only for UVD/VCE VM emulation */1050if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place)1051return 0;10521053for (i = 0; i < job->num_ibs; ++i) {1054struct amdgpu_ib *ib = &job->ibs[i];1055struct amdgpu_bo_va_mapping *m;1056struct amdgpu_bo *aobj;1057uint64_t va_start;1058uint8_t *kptr;10591060va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;1061r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);1062if (r) {1063DRM_ERROR("IB va_start is invalid\n");1064return r;1065}10661067if ((va_start + ib->length_dw * 4) >1068(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {1069DRM_ERROR("IB va_start+ib_bytes is invalid\n");1070return -EINVAL;1071}10721073/* the IB should be reserved at this point */1074r = amdgpu_bo_kmap(aobj, (void **)&kptr);1075if (r)1076return r;10771078kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);10791080if (ring->funcs->parse_cs) {1081memcpy(ib->ptr, kptr, ib->length_dw * 4);1082amdgpu_bo_kunmap(aobj);10831084r = amdgpu_ring_parse_cs(ring, p, job, ib);1085if (r)1086return r;10871088if (ib->sa_bo)1089ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);1090} else {1091ib->ptr = (uint32_t *)kptr;1092r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);1093amdgpu_bo_kunmap(aobj);1094if (r)1095return r;1096}1097}10981099return 0;1100}11011102static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p)1103{1104unsigned int i;1105int r;11061107for (i = 0; i < p->gang_size; ++i) {1108r = amdgpu_cs_patch_ibs(p, p->jobs[i]);1109if (r)1110return r;1111}1112return 0;1113}11141115static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)1116{1117struct amdgpu_fpriv *fpriv = p->filp->driver_priv;1118struct amdgpu_job *job = p->gang_leader;1119struct amdgpu_device *adev = p->adev;1120struct amdgpu_vm *vm = &fpriv->vm;1121struct amdgpu_bo_list_entry *e;1122struct amdgpu_bo_va *bo_va;1123unsigned int i;1124int r;11251126/*1127* We can't use gang submit on with reserved VMIDs when the VM changes1128* can't be invalidated by more than one engine at the same time.1129*/1130if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) {1131for (i = 0; i < p->gang_size; ++i) {1132struct drm_sched_entity *entity = p->entities[i];1133struct drm_gpu_scheduler *sched = entity->rq->sched;1134struct amdgpu_ring *ring = to_amdgpu_ring(sched);11351136if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))1137return -EINVAL;1138}1139}11401141if (!amdgpu_vm_ready(vm))1142return -EINVAL;11431144r = amdgpu_vm_clear_freed(adev, vm, NULL);1145if (r)1146return r;11471148r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);1149if (r)1150return r;11511152r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update,1153GFP_KERNEL);1154if (r)1155return r;11561157if (fpriv->csa_va) {1158bo_va = fpriv->csa_va;1159BUG_ON(!bo_va);1160r = amdgpu_vm_bo_update(adev, bo_va, false);1161if (r)1162return r;11631164r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,1165GFP_KERNEL);1166if (r)1167return r;1168}11691170/* FIXME: In theory this loop shouldn't be needed any more when1171* amdgpu_vm_handle_moved handles all moved BOs that are reserved1172* with p->ticket. But removing it caused test regressions, so I'm1173* leaving it here for now.1174*/1175amdgpu_bo_list_for_each_entry(e, p->bo_list) {1176bo_va = e->bo_va;1177if (bo_va == NULL)1178continue;11791180r = amdgpu_vm_bo_update(adev, bo_va, false);1181if (r)1182return r;11831184r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,1185GFP_KERNEL);1186if (r)1187return r;1188}11891190r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket);1191if (r)1192return r;11931194r = amdgpu_vm_update_pdes(adev, vm, false);1195if (r)1196return r;11971198r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL);1199if (r)1200return r;12011202for (i = 0; i < p->gang_size; ++i) {1203job = p->jobs[i];12041205if (!job->vm)1206continue;12071208job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);1209}12101211if (adev->debug_vm) {1212/* Invalidate all BOs to test for userspace bugs */1213amdgpu_bo_list_for_each_entry(e, p->bo_list) {1214struct amdgpu_bo *bo = e->bo;12151216/* ignore duplicates */1217if (!bo)1218continue;12191220amdgpu_vm_bo_invalidate(bo, false);1221}1222}12231224return 0;1225}12261227static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)1228{1229struct amdgpu_fpriv *fpriv = p->filp->driver_priv;1230struct drm_gpu_scheduler *sched;1231struct drm_gem_object *obj;1232struct dma_fence *fence;1233unsigned long index;1234unsigned int i;1235int r;12361237r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);1238if (r) {1239if (r != -ERESTARTSYS)1240DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");1241return r;1242}12431244drm_exec_for_each_locked_object(&p->exec, index, obj) {1245struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);12461247struct dma_resv *resv = bo->tbo.base.resv;1248enum amdgpu_sync_mode sync_mode;12491250sync_mode = amdgpu_bo_explicit_sync(bo) ?1251AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;1252r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,1253&fpriv->vm);1254if (r)1255return r;1256}12571258for (i = 0; i < p->gang_size; ++i) {1259r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]);1260if (r)1261return r;1262}12631264sched = p->gang_leader->base.entity->rq->sched;1265while ((fence = amdgpu_sync_get_fence(&p->sync))) {1266struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);12671268/*1269* When we have an dependency it might be necessary to insert a1270* pipeline sync to make sure that all caches etc are flushed and the1271* next job actually sees the results from the previous one1272* before we start executing on the same scheduler ring.1273*/1274if (!s_fence || s_fence->sched != sched) {1275dma_fence_put(fence);1276continue;1277}12781279r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence,1280GFP_KERNEL);1281dma_fence_put(fence);1282if (r)1283return r;1284}1285return 0;1286}12871288static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)1289{1290int i;12911292for (i = 0; i < p->num_post_deps; ++i) {1293if (p->post_deps[i].chain && p->post_deps[i].point) {1294drm_syncobj_add_point(p->post_deps[i].syncobj,1295p->post_deps[i].chain,1296p->fence, p->post_deps[i].point);1297p->post_deps[i].chain = NULL;1298} else {1299drm_syncobj_replace_fence(p->post_deps[i].syncobj,1300p->fence);1301}1302}1303}13041305static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,1306union drm_amdgpu_cs *cs)1307{1308struct amdgpu_fpriv *fpriv = p->filp->driver_priv;1309struct amdgpu_job *leader = p->gang_leader;1310struct amdgpu_bo_list_entry *e;1311struct drm_gem_object *gobj;1312unsigned long index;1313unsigned int i;1314uint64_t seq;1315int r;13161317for (i = 0; i < p->gang_size; ++i)1318drm_sched_job_arm(&p->jobs[i]->base);13191320for (i = 0; i < p->gang_size; ++i) {1321struct dma_fence *fence;13221323if (p->jobs[i] == leader)1324continue;13251326fence = &p->jobs[i]->base.s_fence->scheduled;1327dma_fence_get(fence);1328r = drm_sched_job_add_dependency(&leader->base, fence);1329if (r) {1330dma_fence_put(fence);1331return r;1332}1333}13341335if (p->gang_size > 1) {1336for (i = 0; i < p->gang_size; ++i)1337amdgpu_job_set_gang_leader(p->jobs[i], leader);1338}13391340/* No memory allocation is allowed while holding the notifier lock.1341* The lock is held until amdgpu_cs_submit is finished and fence is1342* added to BOs.1343*/1344mutex_lock(&p->adev->notifier_lock);13451346/* If userptr are invalidated after amdgpu_cs_parser_bos(), return1347* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.1348*/1349r = 0;1350amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {1351r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm,1352e->range);1353e->range = NULL;1354}1355if (r) {1356r = -EAGAIN;1357mutex_unlock(&p->adev->notifier_lock);1358return r;1359}13601361p->fence = dma_fence_get(&leader->base.s_fence->finished);1362drm_exec_for_each_locked_object(&p->exec, index, gobj) {13631364ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo);13651366/* Everybody except for the gang leader uses READ */1367for (i = 0; i < p->gang_size; ++i) {1368if (p->jobs[i] == leader)1369continue;13701371dma_resv_add_fence(gobj->resv,1372&p->jobs[i]->base.s_fence->finished,1373DMA_RESV_USAGE_READ);1374}13751376/* The gang leader as remembered as writer */1377dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE);1378}13791380seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],1381p->fence);1382amdgpu_cs_post_dependencies(p);13831384if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&1385!p->ctx->preamble_presented) {1386leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;1387p->ctx->preamble_presented = true;1388}13891390cs->out.handle = seq;1391leader->uf_sequence = seq;13921393amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket);1394for (i = 0; i < p->gang_size; ++i) {1395amdgpu_job_free_resources(p->jobs[i]);1396trace_amdgpu_cs_ioctl(p->jobs[i]);1397drm_sched_entity_push_job(&p->jobs[i]->base);1398p->jobs[i] = NULL;1399}14001401amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);14021403mutex_unlock(&p->adev->notifier_lock);1404mutex_unlock(&p->bo_list->bo_list_mutex);1405return 0;1406}14071408/* Cleanup the parser structure */1409static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)1410{1411unsigned int i;14121413amdgpu_sync_free(&parser->sync);1414drm_exec_fini(&parser->exec);14151416for (i = 0; i < parser->num_post_deps; i++) {1417drm_syncobj_put(parser->post_deps[i].syncobj);1418kfree(parser->post_deps[i].chain);1419}1420kfree(parser->post_deps);14211422dma_fence_put(parser->fence);14231424if (parser->ctx)1425amdgpu_ctx_put(parser->ctx);1426if (parser->bo_list)1427amdgpu_bo_list_put(parser->bo_list);14281429for (i = 0; i < parser->nchunks; i++)1430kvfree(parser->chunks[i].kdata);1431kvfree(parser->chunks);1432for (i = 0; i < parser->gang_size; ++i) {1433if (parser->jobs[i])1434amdgpu_job_free(parser->jobs[i]);1435}1436amdgpu_bo_unref(&parser->uf_bo);1437}14381439int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)1440{1441struct amdgpu_device *adev = drm_to_adev(dev);1442struct amdgpu_cs_parser parser;1443int r;14441445if (amdgpu_ras_intr_triggered())1446return -EHWPOISON;14471448if (!adev->accel_working)1449return -EBUSY;14501451r = amdgpu_cs_parser_init(&parser, adev, filp, data);1452if (r) {1453DRM_ERROR_RATELIMITED("Failed to initialize parser %d!\n", r);1454return r;1455}14561457r = amdgpu_cs_pass1(&parser, data);1458if (r)1459goto error_fini;14601461r = amdgpu_cs_pass2(&parser);1462if (r)1463goto error_fini;14641465r = amdgpu_cs_parser_bos(&parser, data);1466if (r) {1467if (r == -ENOMEM)1468DRM_ERROR("Not enough memory for command submission!\n");1469else if (r != -ERESTARTSYS && r != -EAGAIN)1470DRM_DEBUG("Failed to process the buffer list %d!\n", r);1471goto error_fini;1472}14731474r = amdgpu_cs_patch_jobs(&parser);1475if (r)1476goto error_backoff;14771478r = amdgpu_cs_vm_handling(&parser);1479if (r)1480goto error_backoff;14811482r = amdgpu_cs_sync_rings(&parser);1483if (r)1484goto error_backoff;14851486trace_amdgpu_cs_ibs(&parser);14871488r = amdgpu_cs_submit(&parser, data);1489if (r)1490goto error_backoff;14911492amdgpu_cs_parser_fini(&parser);1493return 0;14941495error_backoff:1496mutex_unlock(&parser.bo_list->bo_list_mutex);14971498error_fini:1499amdgpu_cs_parser_fini(&parser);1500return r;1501}15021503/**1504* amdgpu_cs_wait_ioctl - wait for a command submission to finish1505*1506* @dev: drm device1507* @data: data from userspace1508* @filp: file private1509*1510* Wait for the command submission identified by handle to finish.1511*/1512int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,1513struct drm_file *filp)1514{1515union drm_amdgpu_wait_cs *wait = data;1516unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);1517struct drm_sched_entity *entity;1518struct amdgpu_ctx *ctx;1519struct dma_fence *fence;1520long r;15211522ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);1523if (ctx == NULL)1524return -EINVAL;15251526r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,1527wait->in.ring, &entity);1528if (r) {1529amdgpu_ctx_put(ctx);1530return r;1531}15321533fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);1534if (IS_ERR(fence))1535r = PTR_ERR(fence);1536else if (fence) {1537r = dma_fence_wait_timeout(fence, true, timeout);1538if (r > 0 && fence->error)1539r = fence->error;1540dma_fence_put(fence);1541} else1542r = 1;15431544amdgpu_ctx_put(ctx);1545if (r < 0)1546return r;15471548memset(wait, 0, sizeof(*wait));1549wait->out.status = (r == 0);15501551return 0;1552}15531554/**1555* amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence1556*1557* @adev: amdgpu device1558* @filp: file private1559* @user: drm_amdgpu_fence copied from user space1560*/1561static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,1562struct drm_file *filp,1563struct drm_amdgpu_fence *user)1564{1565struct drm_sched_entity *entity;1566struct amdgpu_ctx *ctx;1567struct dma_fence *fence;1568int r;15691570ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);1571if (ctx == NULL)1572return ERR_PTR(-EINVAL);15731574r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,1575user->ring, &entity);1576if (r) {1577amdgpu_ctx_put(ctx);1578return ERR_PTR(r);1579}15801581fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);1582amdgpu_ctx_put(ctx);15831584return fence;1585}15861587int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,1588struct drm_file *filp)1589{1590struct amdgpu_device *adev = drm_to_adev(dev);1591union drm_amdgpu_fence_to_handle *info = data;1592struct dma_fence *fence;1593struct drm_syncobj *syncobj;1594struct sync_file *sync_file;1595int fd, r;15961597fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);1598if (IS_ERR(fence))1599return PTR_ERR(fence);16001601if (!fence)1602fence = dma_fence_get_stub();16031604switch (info->in.what) {1605case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:1606r = drm_syncobj_create(&syncobj, 0, fence);1607dma_fence_put(fence);1608if (r)1609return r;1610r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);1611drm_syncobj_put(syncobj);1612return r;16131614case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:1615r = drm_syncobj_create(&syncobj, 0, fence);1616dma_fence_put(fence);1617if (r)1618return r;1619r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle);1620drm_syncobj_put(syncobj);1621return r;16221623case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:1624fd = get_unused_fd_flags(O_CLOEXEC);1625if (fd < 0) {1626dma_fence_put(fence);1627return fd;1628}16291630sync_file = sync_file_create(fence);1631dma_fence_put(fence);1632if (!sync_file) {1633put_unused_fd(fd);1634return -ENOMEM;1635}16361637fd_install(fd, sync_file->file);1638info->out.handle = fd;1639return 0;16401641default:1642dma_fence_put(fence);1643return -EINVAL;1644}1645}16461647/**1648* amdgpu_cs_wait_all_fences - wait on all fences to signal1649*1650* @adev: amdgpu device1651* @filp: file private1652* @wait: wait parameters1653* @fences: array of drm_amdgpu_fence1654*/1655static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,1656struct drm_file *filp,1657union drm_amdgpu_wait_fences *wait,1658struct drm_amdgpu_fence *fences)1659{1660uint32_t fence_count = wait->in.fence_count;1661unsigned int i;1662long r = 1;16631664for (i = 0; i < fence_count; i++) {1665struct dma_fence *fence;1666unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);16671668fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);1669if (IS_ERR(fence))1670return PTR_ERR(fence);1671else if (!fence)1672continue;16731674r = dma_fence_wait_timeout(fence, true, timeout);1675if (r > 0 && fence->error)1676r = fence->error;16771678dma_fence_put(fence);1679if (r < 0)1680return r;16811682if (r == 0)1683break;1684}16851686memset(wait, 0, sizeof(*wait));1687wait->out.status = (r > 0);16881689return 0;1690}16911692/**1693* amdgpu_cs_wait_any_fence - wait on any fence to signal1694*1695* @adev: amdgpu device1696* @filp: file private1697* @wait: wait parameters1698* @fences: array of drm_amdgpu_fence1699*/1700static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,1701struct drm_file *filp,1702union drm_amdgpu_wait_fences *wait,1703struct drm_amdgpu_fence *fences)1704{1705unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);1706uint32_t fence_count = wait->in.fence_count;1707uint32_t first = ~0;1708struct dma_fence **array;1709unsigned int i;1710long r;17111712/* Prepare the fence array */1713array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);17141715if (array == NULL)1716return -ENOMEM;17171718for (i = 0; i < fence_count; i++) {1719struct dma_fence *fence;17201721fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);1722if (IS_ERR(fence)) {1723r = PTR_ERR(fence);1724goto err_free_fence_array;1725} else if (fence) {1726array[i] = fence;1727} else { /* NULL, the fence has been already signaled */1728r = 1;1729first = i;1730goto out;1731}1732}17331734r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,1735&first);1736if (r < 0)1737goto err_free_fence_array;17381739out:1740memset(wait, 0, sizeof(*wait));1741wait->out.status = (r > 0);1742wait->out.first_signaled = first;17431744if (first < fence_count && array[first])1745r = array[first]->error;1746else1747r = 0;17481749err_free_fence_array:1750for (i = 0; i < fence_count; i++)1751dma_fence_put(array[i]);1752kfree(array);17531754return r;1755}17561757/**1758* amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish1759*1760* @dev: drm device1761* @data: data from userspace1762* @filp: file private1763*/1764int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,1765struct drm_file *filp)1766{1767struct amdgpu_device *adev = drm_to_adev(dev);1768union drm_amdgpu_wait_fences *wait = data;1769uint32_t fence_count = wait->in.fence_count;1770struct drm_amdgpu_fence *fences_user;1771struct drm_amdgpu_fence *fences;1772int r;17731774/* Get the fences from userspace */1775fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),1776GFP_KERNEL);1777if (fences == NULL)1778return -ENOMEM;17791780fences_user = u64_to_user_ptr(wait->in.fences);1781if (copy_from_user(fences, fences_user,1782sizeof(struct drm_amdgpu_fence) * fence_count)) {1783r = -EFAULT;1784goto err_free_fences;1785}17861787if (wait->in.wait_all)1788r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);1789else1790r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);17911792err_free_fences:1793kfree(fences);17941795return r;1796}17971798/**1799* amdgpu_cs_find_mapping - find bo_va for VM address1800*1801* @parser: command submission parser context1802* @addr: VM address1803* @bo: resulting BO of the mapping found1804* @map: Placeholder to return found BO mapping1805*1806* Search the buffer objects in the command submission context for a certain1807* virtual memory address. Returns allocation structure when found, NULL1808* otherwise.1809*/1810int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,1811uint64_t addr, struct amdgpu_bo **bo,1812struct amdgpu_bo_va_mapping **map)1813{1814struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;1815struct ttm_operation_ctx ctx = { false, false };1816struct amdgpu_vm *vm = &fpriv->vm;1817struct amdgpu_bo_va_mapping *mapping;1818int i, r;18191820addr /= AMDGPU_GPU_PAGE_SIZE;18211822mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);1823if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)1824return -EINVAL;18251826*bo = mapping->bo_va->base.bo;1827*map = mapping;18281829/* Double check that the BO is reserved by this CS */1830if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)1831return -EINVAL;18321833/* Make sure VRAM is allocated contigiously */1834(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;1835if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&1836!((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {18371838amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);1839for (i = 0; i < (*bo)->placement.num_placement; i++)1840(*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;1841r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);1842if (r)1843return r;1844}18451846return amdgpu_ttm_alloc_gart(&(*bo)->tbo);1847}184818491850