Path: blob/21.2-virgl/src/gallium/drivers/freedreno/freedreno_autotune.c
4570 views
/*1* Copyright © 2021 Google, Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#include "freedreno_autotune.h"24#include "freedreno_batch.h"25#include "freedreno_util.h"2627/**28* Tracks, for a given batch key (which maps to a FBO/framebuffer state),29*30* ralloc parent is fd_autotune::ht31*/32struct fd_batch_history {33struct fd_batch_key *key;3435/* Entry in fd_autotune::lru: */36struct list_head node;3738unsigned num_results;3940/**41* List of recent fd_batch_result's42*/43struct list_head results;44#define MAX_RESULTS 545};4647static struct fd_batch_history *48get_history(struct fd_autotune *at, struct fd_batch *batch)49{50struct fd_batch_history *history;5152if (!batch->key)53return NULL;5455struct hash_entry *entry =56_mesa_hash_table_search_pre_hashed(at->ht, batch->hash, batch->key);5758if (entry) {59history = entry->data;60goto found;61}6263history = rzalloc_size(at->ht, sizeof(*history));6465history->key = fd_batch_key_clone(history, batch->key);66list_inithead(&history->node);67list_inithead(&history->results);6869/* Note: We cap # of cached GMEM states at 20.. so assuming double-70* buffering, 40 should be a good place to cap cached autotune state71*/72if (at->ht->entries >= 40) {73struct fd_batch_history *last =74list_last_entry(&at->lru, struct fd_batch_history, node);75_mesa_hash_table_remove_key(at->ht, last->key);76list_del(&last->node);77ralloc_free(last);78}7980_mesa_hash_table_insert_pre_hashed(at->ht, batch->hash, history->key,81history);8283found:84/* Move to the head of the LRU: */85list_delinit(&history->node);86list_add(&history->node, &at->lru);8788return history;89}9091static void92result_destructor(void *r)93{94struct fd_batch_result *result = r;9596/* Just in case we manage to somehow still be on the pending_results list: */97list_del(&result->node);98}99100static struct fd_batch_result *101get_result(struct fd_autotune *at, struct fd_batch_history *history)102{103struct fd_batch_result *result = rzalloc_size(history, sizeof(*result));104105result->fence =106++at->fence_counter; /* pre-increment so zero isn't valid fence */107result->idx = at->idx_counter++;108109if (at->idx_counter >= ARRAY_SIZE(at->results->result))110at->idx_counter = 0;111112result->history = history;113list_addtail(&result->node, &at->pending_results);114115ralloc_set_destructor(result, result_destructor);116117return result;118}119120static void121process_results(struct fd_autotune *at)122{123uint32_t current_fence = at->results->fence;124125list_for_each_entry_safe (struct fd_batch_result, result,126&at->pending_results, node) {127if (result->fence > current_fence)128break;129130struct fd_batch_history *history = result->history;131132result->samples_passed = at->results->result[result->idx].samples_end -133at->results->result[result->idx].samples_start;134135list_delinit(&result->node);136list_add(&result->node, &history->results);137138if (history->num_results < MAX_RESULTS) {139history->num_results++;140} else {141/* Once above a limit, start popping old results off the142* tail of the list:143*/144struct fd_batch_result *old_result =145list_last_entry(&history->results, struct fd_batch_result, node);146list_delinit(&old_result->node);147ralloc_free(old_result);148}149}150}151152static bool153fallback_use_bypass(struct fd_batch *batch)154{155struct pipe_framebuffer_state *pfb = &batch->framebuffer;156157/* Fallback logic if we have no historical data about the rendertarget: */158if (batch->cleared || batch->gmem_reason ||159(batch->num_draws > 5) || (pfb->samples > 1)) {160return false;161}162163return true;164}165166/**167* A magic 8-ball that tells the gmem code whether we should do bypass mode168* for moar fps.169*/170bool171fd_autotune_use_bypass(struct fd_autotune *at, struct fd_batch *batch)172{173struct pipe_framebuffer_state *pfb = &batch->framebuffer;174175process_results(at);176177/* Only enable on gen's that opt-in (and actually have sample-passed178* collection wired up:179*/180if (!batch->ctx->screen->gmem_reason_mask)181return fallback_use_bypass(batch);182183if (batch->gmem_reason & ~batch->ctx->screen->gmem_reason_mask)184return fallback_use_bypass(batch);185186for (unsigned i = 0; i < pfb->nr_cbufs; i++) {187/* If ms-rtt is involved, force GMEM, as we don't currently188* implement a temporary render target that we can MSAA resolve189* from190*/191if (pfb->cbufs[i] && pfb->cbufs[i]->nr_samples)192return fallback_use_bypass(batch);193}194195struct fd_batch_history *history = get_history(at, batch);196if (!history)197return fallback_use_bypass(batch);198199batch->autotune_result = get_result(at, history);200batch->autotune_result->cost = batch->cost;201202bool use_bypass = fallback_use_bypass(batch);203204if (use_bypass)205return true;206207if (history->num_results > 0) {208uint32_t total_samples = 0;209210// TODO we should account for clears somehow211// TODO should we try to notice if there is a drastic change from212// frame to frame?213list_for_each_entry (struct fd_batch_result, result, &history->results,214node) {215total_samples += result->samples_passed;216}217218float avg_samples = (float)total_samples / (float)history->num_results;219220/* Low sample count could mean there was only a clear.. or there was221* a clear plus draws that touch no or few samples222*/223if (avg_samples < 500.0)224return true;225226/* Cost-per-sample is an estimate for the average number of reads+227* writes for a given passed sample.228*/229float sample_cost = batch->cost;230sample_cost /= batch->num_draws;231232float total_draw_cost = (avg_samples * sample_cost) / batch->num_draws;233DBG("%08x:%u\ttotal_samples=%u, avg_samples=%f, sample_cost=%f, "234"total_draw_cost=%f\n",235batch->hash, batch->num_draws, total_samples, avg_samples,236sample_cost, total_draw_cost);237238if (total_draw_cost < 3000.0)239return true;240}241242return use_bypass;243}244245void246fd_autotune_init(struct fd_autotune *at, struct fd_device *dev)247{248at->ht =249_mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);250list_inithead(&at->lru);251252at->results_mem = fd_bo_new(dev, sizeof(struct fd_autotune_results),2530, "autotune");254at->results = fd_bo_map(at->results_mem);255256list_inithead(&at->pending_results);257}258259void260fd_autotune_fini(struct fd_autotune *at)261{262_mesa_hash_table_destroy(at->ht, NULL);263fd_bo_del(at->results_mem);264}265266267