Path: blob/master/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
49634 views
/*1* Copyright 2008 Jerome Glisse.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR19* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,20* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER21* DEALINGS IN THE SOFTWARE.22*23* Authors:24* Jerome Glisse <[email protected]>25*/2627#include <linux/file.h>28#include <linux/pagemap.h>29#include <linux/sync_file.h>30#include <linux/dma-buf.h>3132#include <drm/amdgpu_drm.h>33#include <drm/drm_syncobj.h>34#include <drm/ttm/ttm_tt.h>3536#include "amdgpu_cs.h"37#include "amdgpu.h"38#include "amdgpu_trace.h"39#include "amdgpu_gmc.h"40#include "amdgpu_gem.h"41#include "amdgpu_ras.h"42#include "amdgpu_hmm.h"4344static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,45struct amdgpu_device *adev,46struct drm_file *filp,47union drm_amdgpu_cs *cs)48{49struct amdgpu_fpriv *fpriv = filp->driver_priv;5051if (cs->in.num_chunks == 0)52return -EINVAL;5354memset(p, 0, sizeof(*p));55p->adev = adev;56p->filp = filp;5758p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);59if (!p->ctx)60return -EINVAL;6162if (atomic_read(&p->ctx->guilty)) {63amdgpu_ctx_put(p->ctx);64return -ECANCELED;65}6667amdgpu_sync_create(&p->sync);68drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |69DRM_EXEC_IGNORE_DUPLICATES, 0);70return 0;71}7273static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,74struct drm_amdgpu_cs_chunk_ib *chunk_ib)75{76struct drm_sched_entity *entity;77unsigned int i;78int r;7980r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,81chunk_ib->ip_instance,82chunk_ib->ring, &entity);83if (r)84return r;8586/*87* Abort if there is no run queue associated with this entity.88* Possibly because of disabled HW IP.89*/90if (entity->rq == NULL)91return -EINVAL;9293/* Check if we can add this IB to some existing job */94for (i = 0; i < p->gang_size; ++i)95if (p->entities[i] == entity)96return i;9798/* If not increase the gang size if possible */99if (i == AMDGPU_CS_GANG_SIZE)100return -EINVAL;101102p->entities[i] = entity;103p->gang_size = i + 1;104return i;105}106107static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,108struct drm_amdgpu_cs_chunk_ib *chunk_ib,109unsigned int *num_ibs)110{111int r;112113r = amdgpu_cs_job_idx(p, chunk_ib);114if (r < 0)115return r;116117if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))118return -EINVAL;119120++(num_ibs[r]);121p->gang_leader_idx = r;122return 0;123}124125static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,126struct drm_amdgpu_cs_chunk_fence *data,127uint32_t *offset)128{129struct drm_gem_object *gobj;130unsigned long size;131132gobj = drm_gem_object_lookup(p->filp, data->handle);133if (gobj == NULL)134return -EINVAL;135136p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));137drm_gem_object_put(gobj);138139size = amdgpu_bo_size(p->uf_bo);140if (size != PAGE_SIZE || data->offset > (size - 8))141return -EINVAL;142143if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm))144return -EINVAL;145146*offset = data->offset;147return 0;148}149150static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,151struct drm_amdgpu_bo_list_in *data)152{153struct drm_amdgpu_bo_list_entry *info;154int r;155156r = amdgpu_bo_create_list_entry_array(data, &info);157if (r)158return r;159160r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,161&p->bo_list);162if (r)163goto error_free;164165kvfree(info);166return 0;167168error_free:169kvfree(info);170171return r;172}173174/* Copy the data from userspace and go over it the first time */175static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,176union drm_amdgpu_cs *cs)177{178struct amdgpu_fpriv *fpriv = p->filp->driver_priv;179unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };180struct amdgpu_vm *vm = &fpriv->vm;181uint64_t *chunk_array;182uint32_t uf_offset = 0;183size_t size;184int ret;185int i;186187chunk_array = memdup_array_user(u64_to_user_ptr(cs->in.chunks),188cs->in.num_chunks,189sizeof(uint64_t));190if (IS_ERR(chunk_array))191return PTR_ERR(chunk_array);192193p->nchunks = cs->in.num_chunks;194p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),195GFP_KERNEL);196if (!p->chunks) {197ret = -ENOMEM;198goto free_chunk;199}200201for (i = 0; i < p->nchunks; i++) {202struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;203struct drm_amdgpu_cs_chunk user_chunk;204205chunk_ptr = u64_to_user_ptr(chunk_array[i]);206if (copy_from_user(&user_chunk, chunk_ptr,207sizeof(struct drm_amdgpu_cs_chunk))) {208ret = -EFAULT;209i--;210goto free_partial_kdata;211}212p->chunks[i].chunk_id = user_chunk.chunk_id;213p->chunks[i].length_dw = user_chunk.length_dw;214215size = p->chunks[i].length_dw;216217p->chunks[i].kdata = vmemdup_array_user(u64_to_user_ptr(user_chunk.chunk_data),218size,219sizeof(uint32_t));220if (IS_ERR(p->chunks[i].kdata)) {221ret = PTR_ERR(p->chunks[i].kdata);222i--;223goto free_partial_kdata;224}225size *= sizeof(uint32_t);226227/* Assume the worst on the following checks */228ret = -EINVAL;229switch (p->chunks[i].chunk_id) {230case AMDGPU_CHUNK_ID_IB:231if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))232goto free_partial_kdata;233234ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);235if (ret)236goto free_partial_kdata;237break;238239case AMDGPU_CHUNK_ID_FENCE:240if (size < sizeof(struct drm_amdgpu_cs_chunk_fence))241goto free_partial_kdata;242243ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata,244&uf_offset);245if (ret)246goto free_partial_kdata;247break;248249case AMDGPU_CHUNK_ID_BO_HANDLES:250if (size < sizeof(struct drm_amdgpu_bo_list_in))251goto free_partial_kdata;252253/* Only a single BO list is allowed to simplify handling. */254if (p->bo_list)255goto free_partial_kdata;256257ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);258if (ret)259goto free_partial_kdata;260break;261262case AMDGPU_CHUNK_ID_DEPENDENCIES:263case AMDGPU_CHUNK_ID_SYNCOBJ_IN:264case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:265case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:266case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:267case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:268case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:269break;270271default:272goto free_partial_kdata;273}274}275276if (!p->gang_size || (amdgpu_sriov_vf(p->adev) && p->gang_size > 1)) {277ret = -EINVAL;278goto free_all_kdata;279}280281for (i = 0; i < p->gang_size; ++i) {282ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,283num_ibs[i], &p->jobs[i],284p->filp->client_id);285if (ret)286goto free_all_kdata;287switch (p->adev->enforce_isolation[fpriv->xcp_id]) {288case AMDGPU_ENFORCE_ISOLATION_DISABLE:289default:290p->jobs[i]->enforce_isolation = false;291p->jobs[i]->run_cleaner_shader = false;292break;293case AMDGPU_ENFORCE_ISOLATION_ENABLE:294p->jobs[i]->enforce_isolation = true;295p->jobs[i]->run_cleaner_shader = true;296break;297case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY:298p->jobs[i]->enforce_isolation = true;299p->jobs[i]->run_cleaner_shader = false;300break;301case AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER:302p->jobs[i]->enforce_isolation = true;303p->jobs[i]->run_cleaner_shader = false;304break;305}306}307p->gang_leader = p->jobs[p->gang_leader_idx];308309if (p->ctx->generation != p->gang_leader->generation) {310ret = -ECANCELED;311goto free_all_kdata;312}313314if (p->uf_bo)315p->gang_leader->uf_addr = uf_offset;316kvfree(chunk_array);317318/* Use this opportunity to fill in task info for the vm */319amdgpu_vm_set_task_info(vm);320321return 0;322323free_all_kdata:324i = p->nchunks - 1;325free_partial_kdata:326for (; i >= 0; i--)327kvfree(p->chunks[i].kdata);328kvfree(p->chunks);329p->chunks = NULL;330p->nchunks = 0;331free_chunk:332kvfree(chunk_array);333334return ret;335}336337static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,338struct amdgpu_cs_chunk *chunk,339unsigned int *ce_preempt,340unsigned int *de_preempt)341{342struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;343struct amdgpu_fpriv *fpriv = p->filp->driver_priv;344struct amdgpu_vm *vm = &fpriv->vm;345struct amdgpu_ring *ring;346struct amdgpu_job *job;347struct amdgpu_ib *ib;348int r;349350r = amdgpu_cs_job_idx(p, chunk_ib);351if (r < 0)352return r;353354job = p->jobs[r];355ring = amdgpu_job_ring(job);356ib = &job->ibs[job->num_ibs++];357358/* submissions to kernel queues are disabled */359if (ring->no_user_submission)360return -EINVAL;361362/* MM engine doesn't support user fences */363if (p->uf_bo && ring->funcs->no_user_fence)364return -EINVAL;365366if (!p->adev->debug_enable_ce_cs &&367chunk_ib->flags & AMDGPU_IB_FLAG_CE) {368dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n");369return -EINVAL;370}371372if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&373chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {374if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)375(*ce_preempt)++;376else377(*de_preempt)++;378379/* Each GFX command submit allows only 1 IB max380* preemptible for CE & DE */381if (*ce_preempt > 1 || *de_preempt > 1)382return -EINVAL;383}384385if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)386job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;387388r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?389chunk_ib->ib_bytes : 0,390AMDGPU_IB_POOL_DELAYED, ib);391if (r) {392drm_err(adev_to_drm(p->adev), "Failed to get ib !\n");393return r;394}395396ib->gpu_addr = chunk_ib->va_start;397ib->length_dw = chunk_ib->ib_bytes / 4;398ib->flags = chunk_ib->flags;399return 0;400}401402static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,403struct amdgpu_cs_chunk *chunk)404{405struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;406struct amdgpu_fpriv *fpriv = p->filp->driver_priv;407unsigned int num_deps;408int i, r;409410num_deps = chunk->length_dw * 4 /411sizeof(struct drm_amdgpu_cs_chunk_dep);412413for (i = 0; i < num_deps; ++i) {414struct amdgpu_ctx *ctx;415struct drm_sched_entity *entity;416struct dma_fence *fence;417418ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);419if (ctx == NULL)420return -EINVAL;421422r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,423deps[i].ip_instance,424deps[i].ring, &entity);425if (r) {426amdgpu_ctx_put(ctx);427return r;428}429430fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);431amdgpu_ctx_put(ctx);432433if (IS_ERR(fence))434return PTR_ERR(fence);435else if (!fence)436continue;437438if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {439struct drm_sched_fence *s_fence;440struct dma_fence *old = fence;441442s_fence = to_drm_sched_fence(fence);443fence = dma_fence_get(&s_fence->scheduled);444dma_fence_put(old);445}446447r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);448dma_fence_put(fence);449if (r)450return r;451}452return 0;453}454455static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,456uint32_t handle, u64 point,457u64 flags)458{459struct dma_fence *fence;460int r;461462r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);463if (r) {464drm_err(adev_to_drm(p->adev), "syncobj %u failed to find fence @ %llu (%d)!\n",465handle, point, r);466return r;467}468469r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);470dma_fence_put(fence);471return r;472}473474static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,475struct amdgpu_cs_chunk *chunk)476{477struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;478unsigned int num_deps;479int i, r;480481num_deps = chunk->length_dw * 4 /482sizeof(struct drm_amdgpu_cs_chunk_sem);483for (i = 0; i < num_deps; ++i) {484r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0);485if (r)486return r;487}488489return 0;490}491492static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,493struct amdgpu_cs_chunk *chunk)494{495struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;496unsigned int num_deps;497int i, r;498499num_deps = chunk->length_dw * 4 /500sizeof(struct drm_amdgpu_cs_chunk_syncobj);501for (i = 0; i < num_deps; ++i) {502r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle,503syncobj_deps[i].point,504syncobj_deps[i].flags);505if (r)506return r;507}508509return 0;510}511512static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,513struct amdgpu_cs_chunk *chunk)514{515struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;516unsigned int num_deps;517int i;518519num_deps = chunk->length_dw * 4 /520sizeof(struct drm_amdgpu_cs_chunk_sem);521522if (p->post_deps)523return -EINVAL;524525p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),526GFP_KERNEL);527p->num_post_deps = 0;528529if (!p->post_deps)530return -ENOMEM;531532533for (i = 0; i < num_deps; ++i) {534p->post_deps[i].syncobj =535drm_syncobj_find(p->filp, deps[i].handle);536if (!p->post_deps[i].syncobj)537return -EINVAL;538p->post_deps[i].chain = NULL;539p->post_deps[i].point = 0;540p->num_post_deps++;541}542543return 0;544}545546static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,547struct amdgpu_cs_chunk *chunk)548{549struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;550unsigned int num_deps;551int i;552553num_deps = chunk->length_dw * 4 /554sizeof(struct drm_amdgpu_cs_chunk_syncobj);555556if (p->post_deps)557return -EINVAL;558559p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),560GFP_KERNEL);561p->num_post_deps = 0;562563if (!p->post_deps)564return -ENOMEM;565566for (i = 0; i < num_deps; ++i) {567struct amdgpu_cs_post_dep *dep = &p->post_deps[i];568569dep->chain = NULL;570if (syncobj_deps[i].point) {571dep->chain = dma_fence_chain_alloc();572if (!dep->chain)573return -ENOMEM;574}575576dep->syncobj = drm_syncobj_find(p->filp,577syncobj_deps[i].handle);578if (!dep->syncobj) {579dma_fence_chain_free(dep->chain);580return -EINVAL;581}582dep->point = syncobj_deps[i].point;583p->num_post_deps++;584}585586return 0;587}588589static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,590struct amdgpu_cs_chunk *chunk)591{592struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;593int i;594595if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)596return -EINVAL;597598for (i = 0; i < p->gang_size; ++i) {599p->jobs[i]->shadow_va = shadow->shadow_va;600p->jobs[i]->csa_va = shadow->csa_va;601p->jobs[i]->gds_va = shadow->gds_va;602p->jobs[i]->init_shadow =603shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;604}605606return 0;607}608609static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)610{611unsigned int ce_preempt = 0, de_preempt = 0;612int i, r;613614for (i = 0; i < p->nchunks; ++i) {615struct amdgpu_cs_chunk *chunk;616617chunk = &p->chunks[i];618619switch (chunk->chunk_id) {620case AMDGPU_CHUNK_ID_IB:621r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);622if (r)623return r;624break;625case AMDGPU_CHUNK_ID_DEPENDENCIES:626case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:627r = amdgpu_cs_p2_dependencies(p, chunk);628if (r)629return r;630break;631case AMDGPU_CHUNK_ID_SYNCOBJ_IN:632r = amdgpu_cs_p2_syncobj_in(p, chunk);633if (r)634return r;635break;636case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:637r = amdgpu_cs_p2_syncobj_out(p, chunk);638if (r)639return r;640break;641case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:642r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);643if (r)644return r;645break;646case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:647r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);648if (r)649return r;650break;651case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:652r = amdgpu_cs_p2_shadow(p, chunk);653if (r)654return r;655break;656}657}658659return 0;660}661662/* Convert microseconds to bytes. */663static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)664{665if (us <= 0 || !adev->mm_stats.log2_max_MBps)666return 0;667668/* Since accum_us is incremented by a million per second, just669* multiply it by the number of MB/s to get the number of bytes.670*/671return us << adev->mm_stats.log2_max_MBps;672}673674static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)675{676if (!adev->mm_stats.log2_max_MBps)677return 0;678679return bytes >> adev->mm_stats.log2_max_MBps;680}681682/* Returns how many bytes TTM can move right now. If no bytes can be moved,683* it returns 0. If it returns non-zero, it's OK to move at least one buffer,684* which means it can go over the threshold once. If that happens, the driver685* will be in debt and no other buffer migrations can be done until that debt686* is repaid.687*688* This approach allows moving a buffer of any size (it's important to allow689* that).690*691* The currency is simply time in microseconds and it increases as the clock692* ticks. The accumulated microseconds (us) are converted to bytes and693* returned.694*/695static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,696u64 *max_bytes,697u64 *max_vis_bytes)698{699s64 time_us, increment_us;700u64 free_vram, total_vram, used_vram;701/* Allow a maximum of 200 accumulated ms. This is basically per-IB702* throttling.703*704* It means that in order to get full max MBps, at least 5 IBs per705* second must be submitted and not more than 200ms apart from each706* other.707*/708const s64 us_upper_bound = 200000;709710if ((!adev->mm_stats.log2_max_MBps) || !ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) {711*max_bytes = 0;712*max_vis_bytes = 0;713return;714}715716total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);717used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);718free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;719720spin_lock(&adev->mm_stats.lock);721722/* Increase the amount of accumulated us. */723time_us = ktime_to_us(ktime_get());724increment_us = time_us - adev->mm_stats.last_update_us;725adev->mm_stats.last_update_us = time_us;726adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,727us_upper_bound);728729/* This prevents the short period of low performance when the VRAM730* usage is low and the driver is in debt or doesn't have enough731* accumulated us to fill VRAM quickly.732*733* The situation can occur in these cases:734* - a lot of VRAM is freed by userspace735* - the presence of a big buffer causes a lot of evictions736* (solution: split buffers into smaller ones)737*738* If 128 MB or 1/8th of VRAM is free, start filling it now by setting739* accum_us to a positive number.740*/741if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {742s64 min_us;743744/* Be more aggressive on dGPUs. Try to fill a portion of free745* VRAM now.746*/747if (!(adev->flags & AMD_IS_APU))748min_us = bytes_to_us(adev, free_vram / 4);749else750min_us = 0; /* Reset accum_us on APUs. */751752adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);753}754755/* This is set to 0 if the driver is in debt to disallow (optional)756* buffer moves.757*/758*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);759760/* Do the same for visible VRAM if half of it is free */761if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {762u64 total_vis_vram = adev->gmc.visible_vram_size;763u64 used_vis_vram =764amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);765766if (used_vis_vram < total_vis_vram) {767u64 free_vis_vram = total_vis_vram - used_vis_vram;768769adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +770increment_us, us_upper_bound);771772if (free_vis_vram >= total_vis_vram / 2)773adev->mm_stats.accum_us_vis =774max(bytes_to_us(adev, free_vis_vram / 2),775adev->mm_stats.accum_us_vis);776}777778*max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);779} else {780*max_vis_bytes = 0;781}782783spin_unlock(&adev->mm_stats.lock);784}785786/* Report how many bytes have really been moved for the last command787* submission. This can result in a debt that can stop buffer migrations788* temporarily.789*/790void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,791u64 num_vis_bytes)792{793spin_lock(&adev->mm_stats.lock);794adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);795adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);796spin_unlock(&adev->mm_stats.lock);797}798799static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)800{801struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);802struct amdgpu_cs_parser *p = param;803struct ttm_operation_ctx ctx = {804.interruptible = true,805.no_wait_gpu = false,806.resv = bo->tbo.base.resv807};808uint32_t domain;809int r;810811if (bo->tbo.pin_count)812return 0;813814/* Don't move this buffer if we have depleted our allowance815* to move it. Don't move anything if the threshold is zero.816*/817if (p->bytes_moved < p->bytes_moved_threshold &&818(!bo->tbo.base.dma_buf ||819list_empty(&bo->tbo.base.dma_buf->attachments))) {820if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&821(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {822/* And don't move a CPU_ACCESS_REQUIRED BO to limited823* visible VRAM if we've depleted our allowance to do824* that.825*/826if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)827domain = bo->preferred_domains;828else829domain = bo->allowed_domains;830} else {831domain = bo->preferred_domains;832}833} else {834domain = bo->allowed_domains;835}836837retry:838amdgpu_bo_placement_from_domain(bo, domain);839r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);840841p->bytes_moved += ctx.bytes_moved;842if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&843amdgpu_res_cpu_visible(adev, bo->tbo.resource))844p->bytes_moved_vis += ctx.bytes_moved;845846if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {847domain = bo->allowed_domains;848goto retry;849}850851return r;852}853854static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,855union drm_amdgpu_cs *cs)856{857struct amdgpu_fpriv *fpriv = p->filp->driver_priv;858struct ttm_operation_ctx ctx = { true, false };859struct amdgpu_vm *vm = &fpriv->vm;860struct amdgpu_bo_list_entry *e;861struct drm_gem_object *obj;862unsigned long index;863unsigned int i;864int r;865866/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */867if (cs->in.bo_list_handle) {868if (p->bo_list)869return -EINVAL;870871r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,872&p->bo_list);873if (r)874return r;875} else if (!p->bo_list) {876/* Create a empty bo_list when no handle is provided */877r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,878&p->bo_list);879if (r)880return r;881}882883mutex_lock(&p->bo_list->bo_list_mutex);884885/* Get userptr backing pages. If pages are updated after registered886* in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do887* amdgpu_ttm_backend_bind() to flush and invalidate new pages888*/889amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {890bool userpage_invalidated = false;891struct amdgpu_bo *bo = e->bo;892893e->range = amdgpu_hmm_range_alloc(NULL);894if (unlikely(!e->range))895return -ENOMEM;896897r = amdgpu_ttm_tt_get_user_pages(bo, e->range);898if (r)899goto out_free_user_pages;900901for (i = 0; i < bo->tbo.ttm->num_pages; i++) {902if (bo->tbo.ttm->pages[i] !=903hmm_pfn_to_page(e->range->hmm_range.hmm_pfns[i])) {904userpage_invalidated = true;905break;906}907}908e->user_invalidated = userpage_invalidated;909}910911drm_exec_until_all_locked(&p->exec) {912r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size);913drm_exec_retry_on_contention(&p->exec);914if (unlikely(r))915goto out_free_user_pages;916917amdgpu_bo_list_for_each_entry(e, p->bo_list) {918/* One fence for TTM and one for each CS job */919r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base,9201 + p->gang_size);921drm_exec_retry_on_contention(&p->exec);922if (unlikely(r))923goto out_free_user_pages;924925e->bo_va = amdgpu_vm_bo_find(vm, e->bo);926}927928if (p->uf_bo) {929r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base,9301 + p->gang_size);931drm_exec_retry_on_contention(&p->exec);932if (unlikely(r))933goto out_free_user_pages;934}935}936937amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {938struct mm_struct *usermm;939940usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm);941if (usermm && usermm != current->mm) {942r = -EPERM;943goto out_free_user_pages;944}945946if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&947e->user_invalidated) {948amdgpu_bo_placement_from_domain(e->bo,949AMDGPU_GEM_DOMAIN_CPU);950r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,951&ctx);952if (r)953goto out_free_user_pages;954955amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,956e->range);957}958}959960amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,961&p->bytes_moved_vis_threshold);962p->bytes_moved = 0;963p->bytes_moved_vis = 0;964965r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,966amdgpu_cs_bo_validate, p);967if (r) {968drm_err(adev_to_drm(p->adev), "amdgpu_vm_validate() failed.\n");969goto out_free_user_pages;970}971972drm_exec_for_each_locked_object(&p->exec, index, obj) {973r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj));974if (unlikely(r))975goto out_free_user_pages;976}977978if (p->uf_bo) {979r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo);980if (unlikely(r))981goto out_free_user_pages;982983p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo);984}985986amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,987p->bytes_moved_vis);988989for (i = 0; i < p->gang_size; ++i)990amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,991p->bo_list->gws_obj,992p->bo_list->oa_obj);993return 0;994995out_free_user_pages:996amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {997amdgpu_hmm_range_free(e->range);998e->range = NULL;999}1000mutex_unlock(&p->bo_list->bo_list_mutex);1001return r;1002}10031004static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)1005{1006int i, j;10071008if (!trace_amdgpu_cs_enabled())1009return;10101011for (i = 0; i < p->gang_size; ++i) {1012struct amdgpu_job *job = p->jobs[i];10131014for (j = 0; j < job->num_ibs; ++j)1015trace_amdgpu_cs(p, job, &job->ibs[j]);1016}1017}10181019static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,1020struct amdgpu_job *job)1021{1022struct amdgpu_ring *ring = amdgpu_job_ring(job);1023unsigned int i;1024int r;10251026/* Only for UVD/VCE VM emulation */1027if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place)1028return 0;10291030for (i = 0; i < job->num_ibs; ++i) {1031struct amdgpu_ib *ib = &job->ibs[i];1032struct amdgpu_bo_va_mapping *m;1033struct amdgpu_bo *aobj;1034uint64_t va_start;1035uint8_t *kptr;10361037va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;1038r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);1039if (r) {1040drm_err(adev_to_drm(p->adev), "IB va_start is invalid\n");1041return r;1042}10431044if ((va_start + ib->length_dw * 4) >1045(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {1046drm_err(adev_to_drm(p->adev), "IB va_start+ib_bytes is invalid\n");1047return -EINVAL;1048}10491050/* the IB should be reserved at this point */1051r = amdgpu_bo_kmap(aobj, (void **)&kptr);1052if (r)1053return r;10541055kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);10561057if (ring->funcs->parse_cs) {1058memcpy(ib->ptr, kptr, ib->length_dw * 4);1059amdgpu_bo_kunmap(aobj);10601061r = amdgpu_ring_parse_cs(ring, p, job, ib);1062if (r)1063return r;10641065if (ib->sa_bo)1066ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);1067} else {1068ib->ptr = (uint32_t *)kptr;1069r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);1070amdgpu_bo_kunmap(aobj);1071if (r)1072return r;1073}1074}10751076return 0;1077}10781079static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p)1080{1081unsigned int i;1082int r;10831084for (i = 0; i < p->gang_size; ++i) {1085r = amdgpu_cs_patch_ibs(p, p->jobs[i]);1086if (r)1087return r;1088}1089return 0;1090}10911092static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)1093{1094struct amdgpu_fpriv *fpriv = p->filp->driver_priv;1095struct amdgpu_job *job = p->gang_leader;1096struct amdgpu_device *adev = p->adev;1097struct amdgpu_vm *vm = &fpriv->vm;1098struct amdgpu_bo_list_entry *e;1099struct amdgpu_bo_va *bo_va;1100unsigned int i;1101int r;11021103/*1104* We can't use gang submit on with reserved VMIDs when the VM changes1105* can't be invalidated by more than one engine at the same time.1106*/1107if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) {1108for (i = 0; i < p->gang_size; ++i) {1109struct drm_sched_entity *entity = p->entities[i];1110struct drm_gpu_scheduler *sched = entity->rq->sched;1111struct amdgpu_ring *ring = to_amdgpu_ring(sched);11121113if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))1114return -EINVAL;1115}1116}11171118if (!amdgpu_vm_ready(vm))1119return -EINVAL;11201121r = amdgpu_vm_clear_freed(adev, vm, NULL);1122if (r)1123return r;11241125r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);1126if (r)1127return r;11281129r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update,1130GFP_KERNEL);1131if (r)1132return r;11331134if (fpriv->csa_va) {1135bo_va = fpriv->csa_va;1136BUG_ON(!bo_va);1137r = amdgpu_vm_bo_update(adev, bo_va, false);1138if (r)1139return r;11401141r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,1142GFP_KERNEL);1143if (r)1144return r;1145}11461147/* FIXME: In theory this loop shouldn't be needed any more when1148* amdgpu_vm_handle_moved handles all moved BOs that are reserved1149* with p->ticket. But removing it caused test regressions, so I'm1150* leaving it here for now.1151*/1152amdgpu_bo_list_for_each_entry(e, p->bo_list) {1153bo_va = e->bo_va;1154if (bo_va == NULL)1155continue;11561157r = amdgpu_vm_bo_update(adev, bo_va, false);1158if (r)1159return r;11601161r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,1162GFP_KERNEL);1163if (r)1164return r;1165}11661167r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket);1168if (r)1169return r;11701171r = amdgpu_vm_update_pdes(adev, vm, false);1172if (r)1173return r;11741175r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL);1176if (r)1177return r;11781179for (i = 0; i < p->gang_size; ++i) {1180job = p->jobs[i];11811182if (!job->vm)1183continue;11841185job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);1186}11871188if (adev->debug_vm) {1189/* Invalidate all BOs to test for userspace bugs */1190amdgpu_bo_list_for_each_entry(e, p->bo_list) {1191struct amdgpu_bo *bo = e->bo;11921193/* ignore duplicates */1194if (!bo)1195continue;11961197amdgpu_vm_bo_invalidate(bo, false);1198}1199}12001201return 0;1202}12031204static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)1205{1206struct amdgpu_fpriv *fpriv = p->filp->driver_priv;1207struct drm_gpu_scheduler *sched;1208struct drm_gem_object *obj;1209struct dma_fence *fence;1210unsigned long index;1211unsigned int i;1212int r;12131214r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);1215if (r) {1216if (r != -ERESTARTSYS)1217drm_err(adev_to_drm(p->adev), "amdgpu_ctx_wait_prev_fence failed.\n");1218return r;1219}12201221drm_exec_for_each_locked_object(&p->exec, index, obj) {1222struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);12231224struct dma_resv *resv = bo->tbo.base.resv;1225enum amdgpu_sync_mode sync_mode;12261227sync_mode = amdgpu_bo_explicit_sync(bo) ?1228AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;1229r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,1230&fpriv->vm);1231if (r)1232return r;1233}12341235for (i = 0; i < p->gang_size; ++i) {1236r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]);1237if (r)1238return r;1239}12401241sched = p->gang_leader->base.entity->rq->sched;1242while ((fence = amdgpu_sync_get_fence(&p->sync))) {1243struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);12441245/*1246* When we have an dependency it might be necessary to insert a1247* pipeline sync to make sure that all caches etc are flushed and the1248* next job actually sees the results from the previous one1249* before we start executing on the same scheduler ring.1250*/1251if (!s_fence || s_fence->sched != sched) {1252dma_fence_put(fence);1253continue;1254}12551256r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence,1257GFP_KERNEL);1258dma_fence_put(fence);1259if (r)1260return r;1261}1262return 0;1263}12641265static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)1266{1267int i;12681269for (i = 0; i < p->num_post_deps; ++i) {1270if (p->post_deps[i].chain && p->post_deps[i].point) {1271drm_syncobj_add_point(p->post_deps[i].syncobj,1272p->post_deps[i].chain,1273p->fence, p->post_deps[i].point);1274p->post_deps[i].chain = NULL;1275} else {1276drm_syncobj_replace_fence(p->post_deps[i].syncobj,1277p->fence);1278}1279}1280}12811282static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,1283union drm_amdgpu_cs *cs)1284{1285struct amdgpu_fpriv *fpriv = p->filp->driver_priv;1286struct amdgpu_job *leader = p->gang_leader;1287struct amdgpu_bo_list_entry *e;1288struct drm_gem_object *gobj;1289unsigned long index;1290unsigned int i;1291uint64_t seq;1292int r;12931294for (i = 0; i < p->gang_size; ++i)1295drm_sched_job_arm(&p->jobs[i]->base);12961297for (i = 0; i < p->gang_size; ++i) {1298struct dma_fence *fence;12991300if (p->jobs[i] == leader)1301continue;13021303fence = &p->jobs[i]->base.s_fence->scheduled;1304dma_fence_get(fence);1305r = drm_sched_job_add_dependency(&leader->base, fence);1306if (r) {1307dma_fence_put(fence);1308return r;1309}1310}13111312if (p->gang_size > 1) {1313for (i = 0; i < p->gang_size; ++i)1314amdgpu_job_set_gang_leader(p->jobs[i], leader);1315}13161317/* No memory allocation is allowed while holding the notifier lock.1318* The lock is held until amdgpu_cs_submit is finished and fence is1319* added to BOs.1320*/1321mutex_lock(&p->adev->notifier_lock);13221323/* If userptr are invalidated after amdgpu_cs_parser_bos(), return1324* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.1325*/1326r = 0;1327amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {1328r |= !amdgpu_hmm_range_valid(e->range);1329amdgpu_hmm_range_free(e->range);1330e->range = NULL;1331}1332if (r) {1333r = -EAGAIN;1334mutex_unlock(&p->adev->notifier_lock);1335return r;1336}13371338p->fence = dma_fence_get(&leader->base.s_fence->finished);1339drm_exec_for_each_locked_object(&p->exec, index, gobj) {13401341ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo);13421343/* Everybody except for the gang leader uses READ */1344for (i = 0; i < p->gang_size; ++i) {1345if (p->jobs[i] == leader)1346continue;13471348dma_resv_add_fence(gobj->resv,1349&p->jobs[i]->base.s_fence->finished,1350DMA_RESV_USAGE_READ);1351}13521353/* The gang leader as remembered as writer */1354dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE);1355}13561357seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],1358p->fence);1359amdgpu_cs_post_dependencies(p);13601361if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&1362!p->ctx->preamble_presented) {1363leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;1364p->ctx->preamble_presented = true;1365}13661367cs->out.handle = seq;1368leader->uf_sequence = seq;13691370amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket);1371for (i = 0; i < p->gang_size; ++i) {1372amdgpu_job_free_resources(p->jobs[i]);1373trace_amdgpu_cs_ioctl(p->jobs[i]);1374drm_sched_entity_push_job(&p->jobs[i]->base);1375p->jobs[i] = NULL;1376}13771378amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);13791380mutex_unlock(&p->adev->notifier_lock);1381mutex_unlock(&p->bo_list->bo_list_mutex);1382return 0;1383}13841385/* Cleanup the parser structure */1386static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)1387{1388unsigned int i;13891390amdgpu_sync_free(&parser->sync);1391drm_exec_fini(&parser->exec);13921393for (i = 0; i < parser->num_post_deps; i++) {1394drm_syncobj_put(parser->post_deps[i].syncobj);1395kfree(parser->post_deps[i].chain);1396}1397kfree(parser->post_deps);13981399dma_fence_put(parser->fence);14001401if (parser->ctx)1402amdgpu_ctx_put(parser->ctx);1403if (parser->bo_list)1404amdgpu_bo_list_put(parser->bo_list);14051406for (i = 0; i < parser->nchunks; i++)1407kvfree(parser->chunks[i].kdata);1408kvfree(parser->chunks);1409for (i = 0; i < parser->gang_size; ++i) {1410if (parser->jobs[i])1411amdgpu_job_free(parser->jobs[i]);1412}1413amdgpu_bo_unref(&parser->uf_bo);1414}14151416int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)1417{1418struct amdgpu_device *adev = drm_to_adev(dev);1419struct amdgpu_cs_parser parser;1420int r;14211422if (amdgpu_ras_intr_triggered())1423return -EHWPOISON;14241425if (!adev->accel_working)1426return -EBUSY;14271428r = amdgpu_cs_parser_init(&parser, adev, filp, data);1429if (r) {1430drm_err_ratelimited(dev, "Failed to initialize parser %d!\n", r);1431return r;1432}14331434r = amdgpu_cs_pass1(&parser, data);1435if (r)1436goto error_fini;14371438r = amdgpu_cs_pass2(&parser);1439if (r)1440goto error_fini;14411442r = amdgpu_cs_parser_bos(&parser, data);1443if (r) {1444if (r == -ENOMEM)1445drm_err(dev, "Not enough memory for command submission!\n");1446else if (r != -ERESTARTSYS && r != -EAGAIN)1447drm_dbg(dev, "Failed to process the buffer list %d!\n", r);1448goto error_fini;1449}14501451r = amdgpu_cs_patch_jobs(&parser);1452if (r)1453goto error_backoff;14541455r = amdgpu_cs_vm_handling(&parser);1456if (r)1457goto error_backoff;14581459r = amdgpu_cs_sync_rings(&parser);1460if (r)1461goto error_backoff;14621463trace_amdgpu_cs_ibs(&parser);14641465r = amdgpu_cs_submit(&parser, data);1466if (r)1467goto error_backoff;14681469amdgpu_cs_parser_fini(&parser);1470return 0;14711472error_backoff:1473mutex_unlock(&parser.bo_list->bo_list_mutex);14741475error_fini:1476amdgpu_cs_parser_fini(&parser);1477return r;1478}14791480/**1481* amdgpu_cs_wait_ioctl - wait for a command submission to finish1482*1483* @dev: drm device1484* @data: data from userspace1485* @filp: file private1486*1487* Wait for the command submission identified by handle to finish.1488*/1489int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,1490struct drm_file *filp)1491{1492union drm_amdgpu_wait_cs *wait = data;1493unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);1494struct drm_sched_entity *entity;1495struct amdgpu_ctx *ctx;1496struct dma_fence *fence;1497long r;14981499ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);1500if (ctx == NULL)1501return -EINVAL;15021503r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,1504wait->in.ring, &entity);1505if (r) {1506amdgpu_ctx_put(ctx);1507return r;1508}15091510fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);1511if (IS_ERR(fence))1512r = PTR_ERR(fence);1513else if (fence) {1514r = dma_fence_wait_timeout(fence, true, timeout);1515if (r > 0 && fence->error)1516r = fence->error;1517dma_fence_put(fence);1518} else1519r = 1;15201521amdgpu_ctx_put(ctx);1522if (r < 0)1523return r;15241525memset(wait, 0, sizeof(*wait));1526wait->out.status = (r == 0);15271528return 0;1529}15301531/**1532* amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence1533*1534* @adev: amdgpu device1535* @filp: file private1536* @user: drm_amdgpu_fence copied from user space1537*/1538static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,1539struct drm_file *filp,1540struct drm_amdgpu_fence *user)1541{1542struct drm_sched_entity *entity;1543struct amdgpu_ctx *ctx;1544struct dma_fence *fence;1545int r;15461547ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);1548if (ctx == NULL)1549return ERR_PTR(-EINVAL);15501551r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,1552user->ring, &entity);1553if (r) {1554amdgpu_ctx_put(ctx);1555return ERR_PTR(r);1556}15571558fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);1559amdgpu_ctx_put(ctx);15601561return fence;1562}15631564int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,1565struct drm_file *filp)1566{1567struct amdgpu_device *adev = drm_to_adev(dev);1568union drm_amdgpu_fence_to_handle *info = data;1569struct dma_fence *fence;1570struct drm_syncobj *syncobj;1571struct sync_file *sync_file;1572int fd, r;15731574fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);1575if (IS_ERR(fence))1576return PTR_ERR(fence);15771578if (!fence)1579fence = dma_fence_get_stub();15801581switch (info->in.what) {1582case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:1583r = drm_syncobj_create(&syncobj, 0, fence);1584dma_fence_put(fence);1585if (r)1586return r;1587r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);1588drm_syncobj_put(syncobj);1589return r;15901591case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:1592r = drm_syncobj_create(&syncobj, 0, fence);1593dma_fence_put(fence);1594if (r)1595return r;1596r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle);1597drm_syncobj_put(syncobj);1598return r;15991600case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:1601fd = get_unused_fd_flags(O_CLOEXEC);1602if (fd < 0) {1603dma_fence_put(fence);1604return fd;1605}16061607sync_file = sync_file_create(fence);1608dma_fence_put(fence);1609if (!sync_file) {1610put_unused_fd(fd);1611return -ENOMEM;1612}16131614fd_install(fd, sync_file->file);1615info->out.handle = fd;1616return 0;16171618default:1619dma_fence_put(fence);1620return -EINVAL;1621}1622}16231624/**1625* amdgpu_cs_wait_all_fences - wait on all fences to signal1626*1627* @adev: amdgpu device1628* @filp: file private1629* @wait: wait parameters1630* @fences: array of drm_amdgpu_fence1631*/1632static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,1633struct drm_file *filp,1634union drm_amdgpu_wait_fences *wait,1635struct drm_amdgpu_fence *fences)1636{1637uint32_t fence_count = wait->in.fence_count;1638unsigned int i;1639long r = 1;16401641for (i = 0; i < fence_count; i++) {1642struct dma_fence *fence;1643unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);16441645fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);1646if (IS_ERR(fence))1647return PTR_ERR(fence);1648else if (!fence)1649continue;16501651r = dma_fence_wait_timeout(fence, true, timeout);1652if (r > 0 && fence->error)1653r = fence->error;16541655dma_fence_put(fence);1656if (r < 0)1657return r;16581659if (r == 0)1660break;1661}16621663memset(wait, 0, sizeof(*wait));1664wait->out.status = (r > 0);16651666return 0;1667}16681669/**1670* amdgpu_cs_wait_any_fence - wait on any fence to signal1671*1672* @adev: amdgpu device1673* @filp: file private1674* @wait: wait parameters1675* @fences: array of drm_amdgpu_fence1676*/1677static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,1678struct drm_file *filp,1679union drm_amdgpu_wait_fences *wait,1680struct drm_amdgpu_fence *fences)1681{1682unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);1683uint32_t fence_count = wait->in.fence_count;1684uint32_t first = ~0;1685struct dma_fence **array;1686unsigned int i;1687long r;16881689/* Prepare the fence array */1690array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);16911692if (array == NULL)1693return -ENOMEM;16941695for (i = 0; i < fence_count; i++) {1696struct dma_fence *fence;16971698fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);1699if (IS_ERR(fence)) {1700r = PTR_ERR(fence);1701goto err_free_fence_array;1702} else if (fence) {1703array[i] = fence;1704} else { /* NULL, the fence has been already signaled */1705r = 1;1706first = i;1707goto out;1708}1709}17101711r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,1712&first);1713if (r < 0)1714goto err_free_fence_array;17151716out:1717memset(wait, 0, sizeof(*wait));1718wait->out.status = (r > 0);1719wait->out.first_signaled = first;17201721if (first < fence_count && array[first])1722r = array[first]->error;1723else1724r = 0;17251726err_free_fence_array:1727for (i = 0; i < fence_count; i++)1728dma_fence_put(array[i]);1729kfree(array);17301731return r;1732}17331734/**1735* amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish1736*1737* @dev: drm device1738* @data: data from userspace1739* @filp: file private1740*/1741int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,1742struct drm_file *filp)1743{1744struct amdgpu_device *adev = drm_to_adev(dev);1745union drm_amdgpu_wait_fences *wait = data;1746struct drm_amdgpu_fence *fences;1747int r;17481749/* Get the fences from userspace */1750fences = memdup_array_user(u64_to_user_ptr(wait->in.fences),1751wait->in.fence_count,1752sizeof(struct drm_amdgpu_fence));1753if (IS_ERR(fences))1754return PTR_ERR(fences);17551756if (wait->in.wait_all)1757r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);1758else1759r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);17601761kfree(fences);17621763return r;1764}17651766/**1767* amdgpu_cs_find_mapping - find bo_va for VM address1768*1769* @parser: command submission parser context1770* @addr: VM address1771* @bo: resulting BO of the mapping found1772* @map: Placeholder to return found BO mapping1773*1774* Search the buffer objects in the command submission context for a certain1775* virtual memory address. Returns allocation structure when found, NULL1776* otherwise.1777*/1778int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,1779uint64_t addr, struct amdgpu_bo **bo,1780struct amdgpu_bo_va_mapping **map)1781{1782struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;1783struct ttm_operation_ctx ctx = { false, false };1784struct amdgpu_vm *vm = &fpriv->vm;1785struct amdgpu_bo_va_mapping *mapping;1786int i, r;17871788addr /= AMDGPU_GPU_PAGE_SIZE;17891790mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);1791if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)1792return -EINVAL;17931794*bo = mapping->bo_va->base.bo;1795*map = mapping;17961797/* Double check that the BO is reserved by this CS */1798if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)1799return -EINVAL;18001801/* Make sure VRAM is allocated contigiously */1802(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;1803if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&1804!((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {18051806amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);1807for (i = 0; i < (*bo)->placement.num_placement; i++)1808(*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;1809r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);1810if (r)1811return r;1812}18131814return amdgpu_ttm_alloc_gart(&(*bo)->tbo);1815}181618171818