Path: blob/21.2-virgl/src/panfrost/lib/pan_scoreboard.c
4560 views
/*1* Copyright (C) 2019 Collabora, Ltd.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22*/2324#include <string.h>25#include "pan_scoreboard.h"26#include "pan_device.h"27#include "panfrost-quirks.h"2829/*30* There are various types of Mali jobs:31*32* - WRITE_VALUE: generic write primitive, used to zero tiler field33* - VERTEX: runs a vertex shader34* - TILER: runs tiling and sets up a fragment shader35* - FRAGMENT: runs fragment shaders and writes out36* - COMPUTE: runs a compute shader37* - FUSED: vertex+tiler fused together, implicit intradependency (Bifrost)38* - GEOMETRY: runs a geometry shader (unimplemented)39* - CACHE_FLUSH: unseen in the wild, theoretically cache flush40*41* In between a full batch and a single Mali job is the "job chain", a series42* of Mali jobs together forming a linked list. Within the job chain, each Mali43* job can set (up to) two dependencies on other earlier jobs in the chain.44* This dependency graph forms a scoreboard. The general idea of a scoreboard45* applies: when there is a data dependency of job B on job A, job B sets one46* of its dependency indices to job A, ensuring that job B won't start until47* job A finishes.48*49* More specifically, here are a set of rules:50*51* - A write value job must appear if and only if there is at least one tiler52* job, and tiler jobs must depend on it.53*54* - Vertex jobs and tiler jobs are independent.55*56* - A tiler job must have a dependency on its data source. If it's getting57* data from a vertex job, it depends on the vertex job. If it's getting data58* from software, this is null.59*60* - Tiler jobs must depend on the write value job (chained or otherwise).61*62* - Tiler jobs must be strictly ordered. So each tiler job must depend on the63* previous job in the chain.64*65* - Jobs linking via next_job has no bearing on order of execution, rather it66* just establishes the linked list of jobs, EXCEPT:67*68* - A job's dependencies must appear earlier in the linked list (job chain).69*70* Justification for each rule:71*72* - Write value jobs are used to write a zero into a magic tiling field, which73* enables tiling to work. If tiling occurs, they are needed; if it does not,74* we cannot emit them since then tiling partially occurs and it's bad.75*76* - The hardware has no notion of a "vertex/tiler job" (at least not our77* hardware -- other revs have fused jobs, but --- crap, this just got even78* more complicated). They are independent units that take in data, process79* it, and spit out data.80*81* - Any job must depend on its data source, in fact, or risk a82* read-before-write hazard. Tiler jobs get their data from vertex jobs, ergo83* tiler jobs depend on the corresponding vertex job (if it's there).84*85* - The tiler is not thread-safe; this dependency prevents race conditions86* between two different jobs trying to write to the tiler outputs at the87* same time.88*89* - Internally, jobs are scoreboarded; the next job fields just form a linked90* list to allow the jobs to be read in; the execution order is from91* resolving the dependency fields instead.92*93* - The hardware cannot set a dependency on a job it doesn't know about yet,94* and dependencies are processed in-order of the next job fields.95*96*/9798/* Generates, uploads, and queues a a new job. All fields are written in order99* except for next_job accounting (TODO: Should we be clever and defer the100* upload of the header here until next job to keep the access pattern totally101* linear? Or is that just a micro op at this point?). Returns the generated102* index for dep management.103*104* Inject is used to inject a job at the front, for wallpapering. If you are105* not wallpapering and set this, dragons will eat you. */106107unsigned108panfrost_add_job(109struct pan_pool *pool,110struct pan_scoreboard *scoreboard,111enum mali_job_type type,112bool barrier, bool suppress_prefetch,113unsigned local_dep, unsigned global_dep,114const struct panfrost_ptr *job,115bool inject)116{117if (type == MALI_JOB_TYPE_TILER) {118/* Tiler jobs must be chained, and on Midgard, the first tiler119* job must depend on the write value job, whose index we120* reserve now */121122if (!pan_is_bifrost(pool->dev) && !scoreboard->write_value_index)123scoreboard->write_value_index = ++scoreboard->job_index;124125if (scoreboard->tiler_dep && !inject)126global_dep = scoreboard->tiler_dep;127else if (!pan_is_bifrost(pool->dev))128global_dep = scoreboard->write_value_index;129}130131/* Assign the index */132unsigned index = ++scoreboard->job_index;133134pan_pack(job->cpu, JOB_HEADER, header) {135header.type = type;136header.barrier = barrier;137header.suppress_prefetch = suppress_prefetch;138header.index = index;139header.dependency_1 = local_dep;140header.dependency_2 = global_dep;141142if (inject)143header.next = scoreboard->first_job;144}145146if (inject) {147assert(type == MALI_JOB_TYPE_TILER && "only for blit shaders");148149if (scoreboard->first_tiler) {150/* Manual update of the dep2 field. This is bad,151* don't copy this pattern.152*/153scoreboard->first_tiler->opaque[5] =154scoreboard->first_tiler_dep1 | (index << 16);155}156157scoreboard->first_tiler = (void *)job->cpu;158scoreboard->first_tiler_dep1 = local_dep;159scoreboard->first_job = job->gpu;160return index;161}162163/* Form a chain */164if (type == MALI_JOB_TYPE_TILER) {165if (!scoreboard->first_tiler) {166scoreboard->first_tiler = (void *)job->cpu;167scoreboard->first_tiler_dep1 = local_dep;168}169scoreboard->tiler_dep = index;170}171172if (scoreboard->prev_job) {173/* Manual update of the next pointer. This is bad, don't copy174* this pattern.175* TODO: Find a way to defer last job header emission until we176* have a new job to queue or the batch is ready for execution.177*/178scoreboard->prev_job->opaque[6] = job->gpu;179scoreboard->prev_job->opaque[7] = job->gpu >> 32;180} else {181scoreboard->first_job = job->gpu;182}183184scoreboard->prev_job = (struct mali_job_header_packed *)job->cpu;185return index;186}187188/* Generates a write value job, used to initialize the tiler structures. Note189* this is called right before frame submission. */190191struct panfrost_ptr192panfrost_scoreboard_initialize_tiler(struct pan_pool *pool,193struct pan_scoreboard *scoreboard,194mali_ptr polygon_list)195{196struct panfrost_ptr transfer = { 0 };197198/* Check if we even need tiling */199if (pan_is_bifrost(pool->dev) || !scoreboard->first_tiler)200return transfer;201202/* Okay, we do. Let's generate it. We'll need the job's polygon list203* regardless of size. */204205transfer = pan_pool_alloc_desc(pool, WRITE_VALUE_JOB);206207pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, HEADER, header) {208header.type = MALI_JOB_TYPE_WRITE_VALUE;209header.index = scoreboard->write_value_index;210header.next = scoreboard->first_job;211}212213pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, PAYLOAD, payload) {214payload.address = polygon_list;215payload.type = MALI_WRITE_VALUE_TYPE_ZERO;216}217218scoreboard->first_job = transfer.gpu;219return transfer;220}221222223