Path: blob/21.2-virgl/src/panfrost/lib/pan_blitter.c
4560 views
/*1* Copyright (C) 2020-2021 Collabora, Ltd.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Alyssa Rosenzweig <[email protected]>24* Boris Brezillon <[email protected]>25*/2627#include <math.h>28#include <stdio.h>29#include "pan_blend.h"30#include "pan_blitter.h"31#include "pan_cs.h"32#include "pan_encoder.h"33#include "pan_pool.h"34#include "pan_shader.h"35#include "pan_scoreboard.h"36#include "pan_texture.h"37#include "panfrost-quirks.h"38#include "compiler/nir/nir_builder.h"39#include "util/u_math.h"4041/* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or42* missing in many cases. We instead use software paths as fallbacks to43* implement blits, which are done as TILER jobs. No vertex shader is44* necessary since we can supply screen-space coordinates directly.45*46* This is primarily designed as a fallback for preloads but could be extended47* for other clears/blits if needed in the future. */4849static enum mali_bifrost_register_file_format50blit_type_to_reg_fmt(nir_alu_type in)51{52switch (in) {53case nir_type_float32:54return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;55case nir_type_int32:56return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;57case nir_type_uint32:58return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;59default:60unreachable("Invalid blit type");61}62}6364struct pan_blit_surface {65gl_frag_result loc : 4;66nir_alu_type type : 8;67enum mali_texture_dimension dim : 2;68bool array : 1;69unsigned src_samples: 5;70unsigned dst_samples: 5;71};7273struct pan_blit_shader_key {74struct pan_blit_surface surfaces[8];75};7677struct pan_blit_shader_data {78struct pan_blit_shader_key key;79mali_ptr address;80unsigned blend_ret_offsets[8];81nir_alu_type blend_types[8];82};8384struct pan_blit_blend_shader_key {85enum pipe_format format;86nir_alu_type type;87unsigned rt : 3;88unsigned nr_samples : 5;89};9091struct pan_blit_blend_shader_data {92struct pan_blit_blend_shader_key key;93mali_ptr address;94};9596struct pan_blit_rsd_key {97struct {98enum pipe_format format;99nir_alu_type type : 8;100unsigned src_samples : 5;101unsigned dst_samples : 5;102enum mali_texture_dimension dim : 2;103bool array : 1;104} rts[8], z, s;105};106107struct pan_blit_rsd_data {108struct pan_blit_rsd_key key;109mali_ptr address;110};111112static void113pan_blitter_prepare_midgard_rsd(const struct panfrost_device *dev,114const struct pan_image_view **rts,115mali_ptr *blend_shaders, bool zs,116struct MALI_RENDERER_STATE *rsd)117{118mali_ptr blend_shader = blend_shaders ? blend_shaders[0] : 0;119120rsd->properties.midgard.work_register_count = 4;121rsd->properties.midgard.force_early_z = !zs;122rsd->stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;123if (!(dev->quirks & MIDGARD_SFBD)) {124rsd->sfbd_blend_shader = blend_shader;125return;126}127128rsd->stencil_mask_misc.sfbd_write_enable = true;129rsd->stencil_mask_misc.sfbd_dither_disable = true;130rsd->multisample_misc.sfbd_blend_shader = !!blend_shader;131rsd->sfbd_blend_shader = blend_shader;132if (rsd->multisample_misc.sfbd_blend_shader)133return;134135rsd->sfbd_blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;136rsd->sfbd_blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;137rsd->sfbd_blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;138rsd->sfbd_blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;139rsd->sfbd_blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;140rsd->sfbd_blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;141rsd->sfbd_blend_constant = 0;142143if (rts && rts[0]) {144rsd->stencil_mask_misc.sfbd_srgb =145util_format_is_srgb(rts[0]->format);146rsd->sfbd_blend_equation.color_mask = 0xf;147}148}149150static void151pan_blitter_prepare_bifrost_rsd(const struct panfrost_device *dev,152bool zs, bool ms,153struct MALI_RENDERER_STATE *rsd)154{155if (zs) {156rsd->properties.bifrost.zs_update_operation =157MALI_PIXEL_KILL_FORCE_LATE;158rsd->properties.bifrost.pixel_kill_operation =159MALI_PIXEL_KILL_FORCE_LATE;160} else {161rsd->properties.bifrost.zs_update_operation =162MALI_PIXEL_KILL_STRONG_EARLY;163rsd->properties.bifrost.pixel_kill_operation =164MALI_PIXEL_KILL_FORCE_EARLY;165}166167/* We can only allow blit shader fragments to kill if they write all168* colour outputs. This is true for our colour (non-Z/S) blit shaders,169* but obviously not true for Z/S shaders. However, blit shaders170* otherwise lack side effects, so other fragments may kill them.171* However, while shaders writing Z/S can normally be killed, on v6172* for frame shaders it can cause GPU timeouts, so only allow colour173* blit shaders to be killed. */174175rsd->properties.bifrost.allow_forward_pixel_to_kill = !zs;176rsd->properties.bifrost.allow_forward_pixel_to_be_killed = (dev->arch >= 7) || !zs;177178rsd->preload.fragment.coverage = true;179rsd->preload.fragment.sample_mask_id = ms;180}181182static void183pan_blitter_emit_midgard_blend(const struct panfrost_device *dev,184unsigned rt,185const struct pan_image_view *iview,186mali_ptr blend_shader,187void *out)188{189assert(!(dev->quirks & MIDGARD_SFBD));190191pan_pack(out, BLEND, cfg) {192if (!iview) {193cfg.midgard.equation.color_mask = 0xf;194cfg.midgard.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;195cfg.midgard.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;196cfg.midgard.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;197cfg.midgard.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;198cfg.midgard.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;199cfg.midgard.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;200continue;201}202203cfg.round_to_fb_precision = true;204cfg.srgb = util_format_is_srgb(iview->format);205206if (!blend_shader) {207cfg.midgard.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;208cfg.midgard.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;209cfg.midgard.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;210cfg.midgard.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;211cfg.midgard.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;212cfg.midgard.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;213cfg.midgard.equation.color_mask = 0xf;214} else {215cfg.midgard.blend_shader = true;216cfg.midgard.shader_pc = blend_shader;217}218}219}220221static void222pan_blitter_emit_bifrost_blend(const struct panfrost_device *dev,223unsigned rt,224const struct pan_image_view *iview,225const struct pan_blit_shader_data *blit_shader,226mali_ptr blend_shader,227void *out)228{229pan_pack(out, BLEND, cfg) {230if (!iview) {231cfg.enable = false;232cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OFF;233continue;234}235236nir_alu_type type = blit_shader->key.surfaces[rt].type;237238cfg.round_to_fb_precision = true;239cfg.srgb = util_format_is_srgb(iview->format);240cfg.bifrost.internal.mode = blend_shader ?241MALI_BIFROST_BLEND_MODE_SHADER :242MALI_BIFROST_BLEND_MODE_OPAQUE;243if (blend_shader) {244cfg.bifrost.internal.shader.pc = blend_shader;245if (blit_shader->blend_ret_offsets[rt]) {246cfg.bifrost.internal.shader.return_value =247blit_shader->address +248blit_shader->blend_ret_offsets[rt];249}250} else {251cfg.bifrost.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;252cfg.bifrost.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;253cfg.bifrost.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;254cfg.bifrost.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;255cfg.bifrost.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;256cfg.bifrost.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;257cfg.bifrost.equation.color_mask = 0xf;258cfg.bifrost.internal.fixed_function.num_comps = 4;259cfg.bifrost.internal.fixed_function.conversion.memory_format =260panfrost_format_to_bifrost_blend(dev, iview->format);261cfg.bifrost.internal.fixed_function.conversion.register_format =262blit_type_to_reg_fmt(type);263264cfg.bifrost.internal.fixed_function.rt = rt;265}266}267}268269static void270pan_blitter_emit_rsd(const struct panfrost_device *dev,271const struct pan_blit_shader_data *blit_shader,272unsigned rt_count,273const struct pan_image_view **rts,274mali_ptr *blend_shaders,275const struct pan_image_view *z,276const struct pan_image_view *s,277void *out)278{279unsigned tex_count = 0;280bool zs = (z || s);281bool ms = false;282283for (unsigned i = 0; i < rt_count; i++) {284if (rts[i]) {285tex_count++;286if (rts[i]->nr_samples > 1)287ms = true;288}289}290291if (z) {292if (z->image->layout.nr_samples > 1)293ms = true;294tex_count++;295}296297if (s) {298if (s->image->layout.nr_samples > 1)299ms = true;300tex_count++;301}302303pan_pack(out, RENDERER_STATE, cfg) {304assert(blit_shader->address);305cfg.shader.shader = blit_shader->address;306cfg.shader.varying_count = 1;307cfg.shader.texture_count = tex_count;308cfg.shader.sampler_count = 1;309310cfg.properties.stencil_from_shader = s != NULL;311cfg.properties.depth_source =312z ?313MALI_DEPTH_SOURCE_SHADER :314MALI_DEPTH_SOURCE_FIXED_FUNCTION;315316cfg.multisample_misc.sample_mask = 0xFFFF;317cfg.multisample_misc.multisample_enable = ms;318cfg.multisample_misc.evaluate_per_sample = ms;319cfg.multisample_misc.depth_write_mask = z != NULL;320cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;321322cfg.stencil_mask_misc.stencil_enable = s != NULL;323cfg.stencil_mask_misc.stencil_mask_front = 0xFF;324cfg.stencil_mask_misc.stencil_mask_back = 0xFF;325cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;326cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;327cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;328cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;329cfg.stencil_front.mask = 0xFF;330cfg.stencil_back = cfg.stencil_front;331332if (pan_is_bifrost(dev)) {333pan_blitter_prepare_bifrost_rsd(dev, zs, ms, &cfg);334} else {335pan_blitter_prepare_midgard_rsd(dev, rts,336blend_shaders, zs,337&cfg);338}339}340341if (dev->quirks & MIDGARD_SFBD)342return;343344for (unsigned i = 0; i < MAX2(rt_count, 1); ++i) {345void *dest = out + MALI_RENDERER_STATE_LENGTH + MALI_BLEND_LENGTH * i;346const struct pan_image_view *rt_view = rts ? rts[i] : NULL;347mali_ptr blend_shader = blend_shaders ? blend_shaders[i] : 0;348349if (pan_is_bifrost(dev)) {350pan_blitter_emit_bifrost_blend(dev, i, rt_view, blit_shader,351blend_shader, dest);352} else {353pan_blitter_emit_midgard_blend(dev, i, rt_view,354blend_shader, dest);355}356}357}358359static void360pan_blitter_get_blend_shaders(struct panfrost_device *dev,361unsigned rt_count,362const struct pan_image_view **rts,363const struct pan_blit_shader_data *blit_shader,364mali_ptr *blend_shaders)365{366if (!rt_count)367return;368369struct pan_blend_state blend_state = {370.rt_count = rt_count,371};372373for (unsigned i = 0; i < rt_count; i++) {374if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal)375continue;376377struct pan_blit_blend_shader_key key = {378.format = rts[i]->format,379.rt = i,380.nr_samples = rts[i]->image->layout.nr_samples,381.type = blit_shader->blend_types[i],382};383384pthread_mutex_lock(&dev->blitter.shaders.lock);385struct hash_entry *he =386_mesa_hash_table_search(dev->blitter.shaders.blend, &key);387struct pan_blit_blend_shader_data *blend_shader = he ? he->data : NULL;388if (blend_shader) {389blend_shaders[i] = blend_shader->address;390pthread_mutex_unlock(&dev->blitter.shaders.lock);391continue;392}393394blend_shader = rzalloc(dev->blitter.shaders.blend,395struct pan_blit_blend_shader_data);396blend_shader->key = key;397398blend_state.rts[i] = (struct pan_blend_rt_state) {399.format = rts[i]->format,400.nr_samples = rts[i]->image->layout.nr_samples,401.equation = {402.blend_enable = true,403.rgb_src_factor = BLEND_FACTOR_ZERO,404.rgb_invert_src_factor = true,405.rgb_dst_factor = BLEND_FACTOR_ZERO,406.rgb_func = BLEND_FUNC_ADD,407.alpha_src_factor = BLEND_FACTOR_ZERO,408.alpha_invert_src_factor = true,409.alpha_dst_factor = BLEND_FACTOR_ZERO,410.alpha_func = BLEND_FUNC_ADD,411.color_mask = 0xf,412},413};414415pthread_mutex_lock(&dev->blend_shaders.lock);416struct pan_blend_shader_variant *b =417pan_blend_get_shader_locked(dev, &blend_state,418blit_shader->blend_types[i],419nir_type_float32, /* unused */420i);421422ASSERTED unsigned full_threads =423(dev->arch >= 7) ? 32 : ((dev->arch == 6) ? 64 : 4);424assert(b->work_reg_count <= full_threads);425struct panfrost_ptr bin =426pan_pool_alloc_aligned(dev->blitter.shaders.pool,427b->binary.size,428pan_is_bifrost(dev) ? 128 : 64);429memcpy(bin.cpu, b->binary.data, b->binary.size);430431blend_shader->address = bin.gpu | b->first_tag;432pthread_mutex_unlock(&dev->blend_shaders.lock);433_mesa_hash_table_insert(dev->blitter.shaders.blend,434&blend_shader->key, blend_shader);435pthread_mutex_unlock(&dev->blitter.shaders.lock);436blend_shaders[i] = blend_shader->address;437}438}439440static const struct pan_blit_shader_data *441pan_blitter_get_blit_shader(struct panfrost_device *dev,442const struct pan_blit_shader_key *key)443{444pthread_mutex_lock(&dev->blitter.shaders.lock);445struct hash_entry *he = _mesa_hash_table_search(dev->blitter.shaders.blit, key);446struct pan_blit_shader_data *shader = he ? he->data : NULL;447448if (shader)449goto out;450451unsigned coord_comps = 0;452unsigned sig_offset = 0;453char sig[256];454bool first = true;455for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {456const char *type_str, *dim_str;457if (key->surfaces[i].type == nir_type_invalid)458continue;459460switch (key->surfaces[i].type) {461case nir_type_float32: type_str = "float"; break;462case nir_type_uint32: type_str = "uint"; break;463case nir_type_int32: type_str = "int"; break;464default: unreachable("Invalid type\n");465}466467switch (key->surfaces[i].dim) {468case MALI_TEXTURE_DIMENSION_CUBE: dim_str = "cube"; break;469case MALI_TEXTURE_DIMENSION_1D: dim_str = "1D"; break;470case MALI_TEXTURE_DIMENSION_2D: dim_str = "2D"; break;471case MALI_TEXTURE_DIMENSION_3D: dim_str = "3D"; break;472default: unreachable("Invalid dim\n");473}474475coord_comps = MAX2(coord_comps,476(key->surfaces[i].dim ? : 3) +477(key->surfaces[i].array ? 1 : 0));478first = false;479480if (sig_offset >= sizeof(sig))481continue;482483sig_offset += snprintf(sig + sig_offset, sizeof(sig) - sig_offset,484"%s[%s;%s;%s%s;src_samples=%d,dst_samples=%d]",485first ? "" : ",",486gl_frag_result_name(key->surfaces[i].loc),487type_str, dim_str,488key->surfaces[i].array ? "[]" : "",489key->surfaces[i].src_samples,490key->surfaces[i].dst_samples);491}492493nir_builder b =494nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,495pan_shader_get_compiler_options(dev),496"pan_blit(%s)", sig);497b.shader->info.internal = true;498499nir_variable *coord_var =500nir_variable_create(b.shader, nir_var_shader_in,501glsl_vector_type(GLSL_TYPE_FLOAT, coord_comps),502"coord");503coord_var->data.location = VARYING_SLOT_TEX0;504505nir_ssa_def *coord = nir_load_var(&b, coord_var);506507unsigned active_count = 0;508for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {509if (key->surfaces[i].type == nir_type_invalid)510continue;511512/* Resolve operations only work for N -> 1 samples. */513assert(key->surfaces[i].dst_samples == 1 ||514key->surfaces[i].src_samples == key->surfaces[i].dst_samples);515516static const char *out_names[] = {517"out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7",518};519520unsigned ncomps = key->surfaces[i].loc >= FRAG_RESULT_DATA0 ? 4 : 1;521nir_variable *out =522nir_variable_create(b.shader, nir_var_shader_out,523glsl_vector_type(GLSL_TYPE_FLOAT, ncomps),524out_names[active_count]);525out->data.location = key->surfaces[i].loc;526out->data.driver_location = active_count;527528bool resolve = key->surfaces[i].src_samples > key->surfaces[i].dst_samples;529bool ms = key->surfaces[i].src_samples > 1;530enum glsl_sampler_dim sampler_dim;531532switch (key->surfaces[i].dim) {533case MALI_TEXTURE_DIMENSION_1D:534sampler_dim = GLSL_SAMPLER_DIM_1D;535break;536case MALI_TEXTURE_DIMENSION_2D:537sampler_dim = ms ?538GLSL_SAMPLER_DIM_MS :539GLSL_SAMPLER_DIM_2D;540break;541case MALI_TEXTURE_DIMENSION_3D:542sampler_dim = GLSL_SAMPLER_DIM_3D;543break;544case MALI_TEXTURE_DIMENSION_CUBE:545sampler_dim = GLSL_SAMPLER_DIM_CUBE;546break;547}548549nir_ssa_def *res = NULL;550551if (resolve) {552/* When resolving a float type, we need to calculate553* the average of all samples. For integer resolve, GL554* and Vulkan say that one sample should be chosen555* without telling which. Let's just pick the first one556* in that case.557*/558nir_alu_type base_type =559nir_alu_type_get_base_type(key->surfaces[i].type);560unsigned nsamples = base_type == nir_type_float ?561key->surfaces[i].src_samples : 1;562563for (unsigned s = 0; s < nsamples; s++) {564nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);565566tex->op = nir_texop_txf_ms;567tex->dest_type = key->surfaces[i].type;568tex->texture_index = active_count;569tex->is_array = key->surfaces[i].array;570tex->sampler_dim = sampler_dim;571572tex->src[0].src_type = nir_tex_src_coord;573tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));574tex->coord_components = coord_comps;575576tex->src[1].src_type = nir_tex_src_ms_index;577tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s));578579tex->src[2].src_type = nir_tex_src_lod;580tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));581nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);582nir_builder_instr_insert(&b, &tex->instr);583584res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa;585}586587if (base_type == nir_type_float) {588unsigned type_sz =589nir_alu_type_get_type_size(key->surfaces[i].type);590res = nir_fmul(&b, res,591nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz));592}593} else {594nir_tex_instr *tex =595nir_tex_instr_create(b.shader, ms ? 3 : 1);596597tex->dest_type = key->surfaces[i].type;598tex->texture_index = active_count;599tex->is_array = key->surfaces[i].array;600tex->sampler_dim = sampler_dim;601602if (ms) {603tex->op = nir_texop_txf_ms;604605tex->src[0].src_type = nir_tex_src_coord;606tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));607tex->coord_components = coord_comps;608609tex->src[1].src_type = nir_tex_src_ms_index;610tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));611612tex->src[2].src_type = nir_tex_src_lod;613tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));614} else {615tex->op = nir_texop_tex;616617tex->src[0].src_type = nir_tex_src_coord;618tex->src[0].src = nir_src_for_ssa(coord);619tex->coord_components = coord_comps;620}621622nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);623nir_builder_instr_insert(&b, &tex->instr);624res = &tex->dest.ssa;625}626627assert(res);628629if (key->surfaces[i].loc >= FRAG_RESULT_DATA0) {630nir_store_var(&b, out, res, 0xFF);631} else {632unsigned c = key->surfaces[i].loc == FRAG_RESULT_STENCIL ? 1 : 0;633nir_store_var(&b, out, nir_channel(&b, res, c), 0xFF);634}635active_count++;636}637638struct panfrost_compile_inputs inputs = {639.gpu_id = dev->gpu_id,640.is_blit = true,641};642struct util_dynarray binary;643struct pan_shader_info info;644645util_dynarray_init(&binary, NULL);646647pan_shader_compile(dev, b.shader, &inputs, &binary, &info);648649shader = rzalloc(dev->blitter.shaders.blit,650struct pan_blit_shader_data);651shader->key = *key;652shader->address =653pan_pool_upload_aligned(dev->blitter.shaders.pool,654binary.data, binary.size,655pan_is_bifrost(dev) ? 128 : 64);656657util_dynarray_fini(&binary);658ralloc_free(b.shader);659660if (!pan_is_bifrost(dev))661shader->address |= info.midgard.first_tag;662663if (pan_is_bifrost(dev)) {664for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) {665shader->blend_ret_offsets[i] = info.bifrost.blend[i].return_offset;666shader->blend_types[i] = info.bifrost.blend[i].type;667}668}669670_mesa_hash_table_insert(dev->blitter.shaders.blit, &shader->key, shader);671672out:673pthread_mutex_unlock(&dev->blitter.shaders.lock);674return shader;675}676677static mali_ptr678pan_blitter_get_rsd(struct panfrost_device *dev,679unsigned rt_count,680const struct pan_image_view **src_rts,681const struct pan_image_view **dst_rts,682const struct pan_image_view *src_z,683const struct pan_image_view *dst_z,684const struct pan_image_view *src_s,685const struct pan_image_view *dst_s)686{687struct pan_blit_rsd_key rsd_key = { 0 };688689assert(!rt_count || (!src_z && !src_s));690691struct pan_blit_shader_key blit_key = { 0 };692693if (src_z) {694assert(dst_z);695rsd_key.z.format = dst_z->format;696blit_key.surfaces[0].loc = FRAG_RESULT_DEPTH;697rsd_key.z.type = blit_key.surfaces[0].type = nir_type_float32;698rsd_key.z.src_samples = blit_key.surfaces[0].src_samples = src_z->image->layout.nr_samples;699rsd_key.z.dst_samples = blit_key.surfaces[0].dst_samples = dst_z->image->layout.nr_samples;700rsd_key.z.dim = blit_key.surfaces[0].dim = src_z->dim;701rsd_key.z.array = blit_key.surfaces[0].array = src_z->first_layer != src_z->last_layer;702}703704if (src_s) {705assert(dst_s);706rsd_key.s.format = dst_s->format;707blit_key.surfaces[1].loc = FRAG_RESULT_STENCIL;708rsd_key.s.type = blit_key.surfaces[1].type = nir_type_uint32;709rsd_key.s.src_samples = blit_key.surfaces[1].src_samples = src_s->image->layout.nr_samples;710rsd_key.s.dst_samples = blit_key.surfaces[1].dst_samples = dst_s->image->layout.nr_samples;711rsd_key.s.dim = blit_key.surfaces[1].dim = src_s->dim;712rsd_key.s.array = blit_key.surfaces[1].array = src_s->first_layer != src_s->last_layer;713}714715for (unsigned i = 0; i < rt_count; i++) {716if (!src_rts[i])717continue;718719assert(dst_rts[i]);720rsd_key.rts[i].format = dst_rts[i]->format;721blit_key.surfaces[i].loc = FRAG_RESULT_DATA0 + i;722rsd_key.rts[i].type = blit_key.surfaces[i].type =723util_format_is_pure_uint(src_rts[i]->format) ? nir_type_uint32 :724util_format_is_pure_sint(src_rts[i]->format) ? nir_type_int32 :725nir_type_float32;726rsd_key.rts[i].src_samples = blit_key.surfaces[i].src_samples = src_rts[i]->image->layout.nr_samples;727rsd_key.rts[i].dst_samples = blit_key.surfaces[i].dst_samples = dst_rts[i]->image->layout.nr_samples;728rsd_key.rts[i].dim = blit_key.surfaces[i].dim = src_rts[i]->dim;729rsd_key.rts[i].array = blit_key.surfaces[i].array = src_rts[i]->first_layer != src_rts[i]->last_layer;730}731732pthread_mutex_lock(&dev->blitter.rsds.lock);733struct hash_entry *he =734_mesa_hash_table_search(dev->blitter.rsds.rsds, &rsd_key);735struct pan_blit_rsd_data *rsd = he ? he->data : NULL;736if (rsd)737goto out;738739rsd = rzalloc(dev->blitter.rsds.rsds, struct pan_blit_rsd_data);740rsd->key = rsd_key;741742struct panfrost_ptr rsd_ptr =743(dev->quirks & MIDGARD_SFBD) ?744pan_pool_alloc_desc(dev->blitter.rsds.pool, RENDERER_STATE) :745pan_pool_alloc_desc_aggregate(dev->blitter.rsds.pool,746PAN_DESC(RENDERER_STATE),747PAN_DESC_ARRAY(MAX2(rt_count, 1), BLEND));748749mali_ptr blend_shaders[8] = { 0 };750751const struct pan_blit_shader_data *blit_shader =752pan_blitter_get_blit_shader(dev, &blit_key);753754pan_blitter_get_blend_shaders(dev, rt_count, dst_rts,755blit_shader, blend_shaders);756757pan_blitter_emit_rsd(dev, blit_shader,758MAX2(rt_count, 1), dst_rts, blend_shaders,759dst_z, dst_s, rsd_ptr.cpu);760rsd->address = rsd_ptr.gpu;761_mesa_hash_table_insert(dev->blitter.rsds.rsds, &rsd->key, rsd);762763out:764pthread_mutex_unlock(&dev->blitter.rsds.lock);765return rsd->address;766}767768static mali_ptr769pan_preload_get_rsd(struct panfrost_device *dev,770const struct pan_fb_info *fb,771bool zs)772{773const struct pan_image_view *rts[8] = { NULL };774const struct pan_image_view *z = NULL, *s = NULL;775struct pan_image_view patched_s_view;776unsigned rt_count = 0;777778if (zs) {779if (fb->zs.preload.z)780z = fb->zs.view.zs;781782if (fb->zs.preload.s) {783const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs;784enum pipe_format fmt = util_format_get_depth_only(view->format);785786switch (view->format) {787case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break;788case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break;789default: fmt = view->format; break;790}791792if (fmt != view->format) {793patched_s_view = *view;794patched_s_view.format = fmt;795s = &patched_s_view;796} else {797s = view;798}799}800} else {801for (unsigned i = 0; i < fb->rt_count; i++) {802if (fb->rts[i].preload)803rts[i] = fb->rts[i].view;804}805806rt_count = fb->rt_count;807}808809return pan_blitter_get_rsd(dev, rt_count, rts, rts, z, z, s, s);810}811812static mali_ptr813pan_blit_get_rsd(struct panfrost_device *dev,814const struct pan_image_view *src_views,815const struct pan_image_view *dst_view)816{817const struct util_format_description *desc =818util_format_description(src_views[0].format);819const struct pan_image_view *src_rt = NULL, *dst_rt = NULL;820const struct pan_image_view *src_z = NULL, *dst_z = NULL;821const struct pan_image_view *src_s = NULL, *dst_s = NULL;822823if (util_format_has_depth(desc)) {824src_z = &src_views[0];825dst_z = dst_view;826}827828if (src_views[1].format) {829src_s = &src_views[1];830dst_s = dst_view;831} else if (util_format_has_stencil(desc)) {832src_s = &src_views[0];833dst_s = dst_view;834}835836if (!src_z && !src_s) {837src_rt = &src_views[0];838dst_rt = dst_view;839}840841return pan_blitter_get_rsd(dev, src_rt ? 1 : 0, &src_rt, &dst_rt,842src_z, dst_z, src_s, dst_s);843}844845static bool846pan_preload_needed(const struct pan_fb_info *fb, bool zs)847{848if (zs) {849if (fb->zs.preload.z || fb->zs.preload.s)850return true;851} else {852for (unsigned i = 0; i < fb->rt_count; i++) {853if (fb->rts[i].preload)854return true;855}856}857858return false;859}860861static void862pan_blitter_emit_varying(struct pan_pool *pool,863mali_ptr coordinates,864struct MALI_DRAW *draw)865{866/* Bifrost needs an empty desc to mark end of prefetching */867bool padding_buffer = pan_is_bifrost(pool->dev);868869struct panfrost_ptr varying =870pan_pool_alloc_desc(pool, ATTRIBUTE);871struct panfrost_ptr varying_buffer =872pan_pool_alloc_desc_array(pool, (padding_buffer ? 2 : 1),873ATTRIBUTE_BUFFER);874875pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {876cfg.pointer = coordinates;877cfg.stride = 4 * sizeof(float);878cfg.size = cfg.stride * 4;879}880881if (padding_buffer) {882pan_pack(varying_buffer.cpu + MALI_ATTRIBUTE_BUFFER_LENGTH,883ATTRIBUTE_BUFFER, cfg);884}885886pan_pack(varying.cpu, ATTRIBUTE, cfg) {887cfg.buffer_index = 0;888cfg.offset_enable = !pan_is_bifrost(pool->dev);889cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;890}891892draw->varyings = varying.gpu;893draw->varying_buffers = varying_buffer.gpu;894}895896static mali_ptr897pan_blitter_emit_bifrost_sampler(struct pan_pool *pool,898bool nearest_filter)899{900struct panfrost_ptr sampler =901pan_pool_alloc_desc(pool, BIFROST_SAMPLER);902903pan_pack(sampler.cpu, BIFROST_SAMPLER, cfg) {904cfg.seamless_cube_map = false;905cfg.normalized_coordinates = false;906cfg.point_sample_minify = nearest_filter;907cfg.point_sample_magnify = nearest_filter;908}909910return sampler.gpu;911}912913static mali_ptr914pan_blitter_emit_midgard_sampler(struct pan_pool *pool,915bool nearest_filter)916{917struct panfrost_ptr sampler =918pan_pool_alloc_desc(pool, MIDGARD_SAMPLER);919920pan_pack(sampler.cpu, MIDGARD_SAMPLER, cfg) {921cfg.normalized_coordinates = false;922cfg.magnify_nearest = nearest_filter;923cfg.minify_nearest = nearest_filter;924}925926return sampler.gpu;927}928929static mali_ptr930pan_blitter_emit_bifrost_textures(struct pan_pool *pool,931unsigned tex_count,932const struct pan_image_view **views)933{934struct panfrost_ptr textures =935pan_pool_alloc_desc_array(pool, tex_count, BIFROST_TEXTURE);936937for (unsigned i = 0; i < tex_count; i++) {938void *texture = textures.cpu + (MALI_BIFROST_TEXTURE_LENGTH * i);939size_t payload_size =940panfrost_estimate_texture_payload_size(pool->dev, views[i]);941struct panfrost_ptr surfaces =942pan_pool_alloc_aligned(pool, payload_size,943MALI_SURFACE_WITH_STRIDE_ALIGN);944945panfrost_new_texture(pool->dev, views[i], texture, &surfaces);946}947948return textures.gpu;949}950951static mali_ptr952pan_blitter_emit_midgard_textures(struct pan_pool *pool,953unsigned tex_count,954const struct pan_image_view **views)955{956mali_ptr textures[8] = { 0 };957958for (unsigned i = 0; i < tex_count; i++) {959size_t sz = MALI_MIDGARD_TEXTURE_LENGTH +960panfrost_estimate_texture_payload_size(pool->dev, views[i]);961struct panfrost_ptr texture =962pan_pool_alloc_aligned(pool, sz, MALI_MIDGARD_TEXTURE_ALIGN);963struct panfrost_ptr surfaces = {964.cpu = texture.cpu + MALI_MIDGARD_TEXTURE_LENGTH,965.gpu = texture.gpu + MALI_MIDGARD_TEXTURE_LENGTH,966};967968panfrost_new_texture(pool->dev, views[i], texture.cpu, &surfaces);969textures[i] = texture.gpu;970}971972return pan_pool_upload_aligned(pool, textures,973tex_count * sizeof(mali_ptr),974sizeof(mali_ptr));975}976977static void978pan_preload_emit_textures(struct pan_pool *pool,979const struct pan_fb_info *fb, bool zs,980struct MALI_DRAW *draw)981{982const struct pan_image_view *views[8];983struct pan_image_view patched_s_view;984unsigned tex_count = 0;985986if (zs) {987if (fb->zs.preload.z)988views[tex_count++] = fb->zs.view.zs;989990if (fb->zs.preload.s) {991const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs;992enum pipe_format fmt = util_format_get_depth_only(view->format);993994switch (view->format) {995case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break;996case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break;997default: fmt = view->format; break;998}9991000if (fmt != view->format) {1001patched_s_view = *view;1002patched_s_view.format = fmt;1003view = &patched_s_view;1004}1005views[tex_count++] = view;1006}1007} else {1008for (unsigned i = 0; i < fb->rt_count; i++) {1009if (fb->rts[i].preload)1010views[tex_count++] = fb->rts[i].view;1011}10121013}10141015if (pan_is_bifrost(pool->dev))1016draw->textures = pan_blitter_emit_bifrost_textures(pool, tex_count, views);1017else1018draw->textures = pan_blitter_emit_midgard_textures(pool, tex_count, views);1019}10201021static mali_ptr1022pan_blitter_emit_viewport(struct pan_pool *pool,1023uint16_t minx, uint16_t miny,1024uint16_t maxx, uint16_t maxy)1025{1026struct panfrost_ptr vp = pan_pool_alloc_desc(pool, VIEWPORT);10271028pan_pack(vp.cpu, VIEWPORT, cfg) {1029cfg.scissor_minimum_x = minx;1030cfg.scissor_minimum_y = miny;1031cfg.scissor_maximum_x = maxx;1032cfg.scissor_maximum_y = maxy;1033}10341035return vp.gpu;1036}10371038static void1039pan_preload_emit_dcd(struct pan_pool *pool,1040struct pan_fb_info *fb, bool zs,1041mali_ptr coordinates,1042mali_ptr tsd, mali_ptr rsd,1043void *out, bool always_write)1044{1045pan_pack(out, DRAW, cfg) {1046cfg.four_components_per_vertex = true;1047cfg.draw_descriptor_is_64b = true;1048cfg.thread_storage = tsd;1049cfg.state = rsd;10501051cfg.position = coordinates;1052pan_blitter_emit_varying(pool, coordinates, &cfg);1053uint16_t minx = 0, miny = 0, maxx, maxy;1054if (pool->dev->quirks & MIDGARD_SFBD) {1055maxx = fb->width - 1;1056maxy = fb->height - 1;1057} else {1058/* Align on 32x32 tiles */1059minx = fb->extent.minx & ~31;1060miny = fb->extent.miny & ~31;1061maxx = MIN2(ALIGN_POT(fb->extent.maxx + 1, 32), fb->width) - 1;1062maxy = MIN2(ALIGN_POT(fb->extent.maxy + 1, 32), fb->height) - 1;1063}10641065cfg.viewport =1066pan_blitter_emit_viewport(pool, minx, miny, maxx, maxy);10671068pan_preload_emit_textures(pool, fb, zs, &cfg);10691070if (pan_is_bifrost(pool->dev)) {1071cfg.samplers = pan_blitter_emit_bifrost_sampler(pool, true);10721073/* Tiles updated by blit shaders are still considered1074* clean (separate for colour and Z/S), allowing us to1075* suppress unnecessary writeback */1076cfg.clean_fragment_write = !always_write;1077} else {1078cfg.samplers = pan_blitter_emit_midgard_sampler(pool, true);1079cfg.texture_descriptor_is_64b = true;1080}1081}1082}10831084static void1085pan_blit_emit_dcd(struct pan_pool *pool,1086mali_ptr src_coords, mali_ptr dst_coords,1087mali_ptr textures, mali_ptr samplers,1088mali_ptr vpd, mali_ptr tsd, mali_ptr rsd,1089void *out)1090{1091pan_pack(out, DRAW, cfg) {1092cfg.four_components_per_vertex = true;1093cfg.draw_descriptor_is_64b = true;1094cfg.thread_storage = tsd;1095cfg.state = rsd;10961097cfg.position = dst_coords;1098pan_blitter_emit_varying(pool, src_coords, &cfg);1099cfg.viewport = vpd;1100cfg.texture_descriptor_is_64b = !pan_is_bifrost(pool->dev);1101cfg.textures = textures;1102cfg.samplers = samplers;1103}1104}11051106static void1107pan_preload_fb_bifrost_alloc_pre_post_dcds(struct pan_pool *desc_pool,1108struct pan_fb_info *fb)1109{1110assert(pan_is_bifrost(desc_pool->dev));11111112if (fb->bifrost.pre_post.dcds.gpu)1113return;11141115fb->bifrost.pre_post.dcds =1116pan_pool_alloc_desc_aggregate(desc_pool,1117PAN_DESC(DRAW),1118PAN_DESC(DRAW_PADDING),1119PAN_DESC(DRAW),1120PAN_DESC(DRAW_PADDING),1121PAN_DESC(DRAW),1122PAN_DESC(DRAW_PADDING));1123}11241125static void1126pan_preload_emit_midgard_tiler_job(struct pan_pool *desc_pool,1127struct pan_scoreboard *scoreboard,1128struct pan_fb_info *fb, bool zs,1129mali_ptr coords, mali_ptr rsd, mali_ptr tsd)1130{1131struct panfrost_ptr job =1132pan_pool_alloc_desc(desc_pool, MIDGARD_TILER_JOB);11331134pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, rsd,1135pan_section_ptr(job.cpu, MIDGARD_TILER_JOB, DRAW),1136false);11371138pan_section_pack(job.cpu, MIDGARD_TILER_JOB, PRIMITIVE, cfg) {1139cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;1140cfg.index_count = 4;1141cfg.job_task_split = 6;1142}11431144pan_section_pack(job.cpu, MIDGARD_TILER_JOB, PRIMITIVE_SIZE, cfg) {1145cfg.constant = 1.0f;1146}11471148void *invoc = pan_section_ptr(job.cpu,1149MIDGARD_TILER_JOB,1150INVOCATION);1151panfrost_pack_work_groups_compute(invoc, 1, 4,11521, 1, 1, 1, true, false);11531154panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,1155false, false, 0, 0, &job, true);1156}11571158static void1159pan_blit_emit_midgard_tiler_job(struct pan_pool *desc_pool,1160struct pan_scoreboard *scoreboard,1161mali_ptr src_coords, mali_ptr dst_coords,1162mali_ptr textures, mali_ptr samplers,1163mali_ptr vpd, mali_ptr rsd, mali_ptr tsd)1164{1165struct panfrost_ptr job =1166pan_pool_alloc_desc(desc_pool, MIDGARD_TILER_JOB);11671168pan_blit_emit_dcd(desc_pool,1169src_coords, dst_coords, textures, samplers,1170vpd, tsd, rsd,1171pan_section_ptr(job.cpu, MIDGARD_TILER_JOB, DRAW));11721173pan_section_pack(job.cpu, MIDGARD_TILER_JOB, PRIMITIVE, cfg) {1174cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;1175cfg.index_count = 4;1176cfg.job_task_split = 6;1177}11781179pan_section_pack(job.cpu, MIDGARD_TILER_JOB, PRIMITIVE_SIZE, cfg) {1180cfg.constant = 1.0f;1181}11821183void *invoc = pan_section_ptr(job.cpu,1184MIDGARD_TILER_JOB,1185INVOCATION);1186panfrost_pack_work_groups_compute(invoc, 1, 4,11871, 1, 1, 1, true, false);11881189panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,1190false, false, 0, 0, &job, false);1191}11921193static void1194pan_blit_emit_bifrost_tiler_job(struct pan_pool *desc_pool,1195struct pan_scoreboard *scoreboard,1196mali_ptr src_coords, mali_ptr dst_coords,1197mali_ptr textures, mali_ptr samplers,1198mali_ptr vpd, mali_ptr rsd,1199mali_ptr tsd, mali_ptr tiler)1200{1201struct panfrost_ptr job =1202pan_pool_alloc_desc(desc_pool, BIFROST_TILER_JOB);12031204pan_blit_emit_dcd(desc_pool,1205src_coords, dst_coords, textures, samplers,1206vpd, tsd, rsd,1207pan_section_ptr(job.cpu, BIFROST_TILER_JOB, DRAW));12081209pan_section_pack(job.cpu, BIFROST_TILER_JOB, PRIMITIVE, cfg) {1210cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;1211cfg.index_count = 4;1212cfg.job_task_split = 6;1213}12141215pan_section_pack(job.cpu, BIFROST_TILER_JOB, PRIMITIVE_SIZE, cfg) {1216cfg.constant = 1.0f;1217}12181219void *invoc = pan_section_ptr(job.cpu,1220BIFROST_TILER_JOB,1221INVOCATION);1222panfrost_pack_work_groups_compute(invoc, 1, 4,12231, 1, 1, 1, true, false);12241225pan_section_pack(job.cpu, BIFROST_TILER_JOB, PADDING, cfg);1226pan_section_pack(job.cpu, BIFROST_TILER_JOB, TILER, cfg) {1227cfg.address = tiler;1228}12291230panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,1231false, false, 0, 0, &job, false);1232}12331234static void1235pan_preload_emit_bifrost_pre_frame_dcd(struct pan_pool *desc_pool,1236struct pan_fb_info *fb, bool zs,1237mali_ptr coords, mali_ptr rsd,1238mali_ptr tsd)1239{1240struct panfrost_device *dev = desc_pool->dev;12411242unsigned dcd_idx = zs ? 0 : 1;1243pan_preload_fb_bifrost_alloc_pre_post_dcds(desc_pool, fb);1244assert(fb->bifrost.pre_post.dcds.cpu);1245void *dcd = fb->bifrost.pre_post.dcds.cpu +1246(dcd_idx * (MALI_DRAW_LENGTH + MALI_DRAW_PADDING_LENGTH));12471248int crc_rt = pan_select_crc_rt(dev, fb);12491250bool always_write = false;12511252/* If CRC data is currently invalid and this batch will make it valid,1253* write even clean tiles to make sure CRC data is updated. */1254if (crc_rt >= 0) {1255bool *valid = fb->rts[crc_rt].crc_valid;1256bool full = !fb->extent.minx && !fb->extent.miny &&1257fb->extent.maxx == (fb->width - 1) &&1258fb->extent.maxy == (fb->height - 1);12591260if (full && !(*valid))1261always_write = true;1262}12631264pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, rsd, dcd, always_write);1265if (zs) {1266enum pipe_format fmt = fb->zs.view.zs->image->layout.format;1267bool always = false;12681269/* If we're dealing with a combined ZS resource and only one1270* component is cleared, we need to reload the whole surface1271* because the zs_clean_pixel_write_enable flag is set in that1272* case.1273*/1274if (util_format_is_depth_and_stencil(fmt) &&1275fb->zs.clear.z != fb->zs.clear.s)1276always = true;12771278/* We could use INTERSECT on Bifrost v7 too, but1279* EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile1280* buffer one or more tiles ahead, making ZS data immediately1281* available for any ZS tests taking place in other shaders.1282* Thing's haven't been benchmarked to determine what's1283* preferable (saving bandwidth vs having ZS preloaded1284* earlier), so let's leave it like that for now.1285*/1286fb->bifrost.pre_post.modes[dcd_idx] =1287desc_pool->dev->arch > 6 ?1288MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS :1289always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS :1290MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;1291} else {1292fb->bifrost.pre_post.modes[dcd_idx] =1293always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS :1294MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;1295}1296}12971298static void1299pan_preload_fb_part(struct pan_pool *pool,1300struct pan_scoreboard *scoreboard,1301struct pan_fb_info *fb, bool zs,1302mali_ptr coords, mali_ptr tsd, mali_ptr tiler)1303{1304struct panfrost_device *dev = pool->dev;1305mali_ptr rsd = pan_preload_get_rsd(dev, fb, zs);13061307if (pan_is_bifrost(dev)) {1308pan_preload_emit_bifrost_pre_frame_dcd(pool, fb, zs,1309coords, rsd, tsd);1310} else {1311pan_preload_emit_midgard_tiler_job(pool, scoreboard,1312fb, zs, coords, rsd, tsd);1313}1314}13151316void1317pan_preload_fb(struct pan_pool *pool,1318struct pan_scoreboard *scoreboard,1319struct pan_fb_info *fb,1320mali_ptr tsd, mali_ptr tiler)1321{1322bool preload_zs = pan_preload_needed(fb, true);1323bool preload_rts = pan_preload_needed(fb, false);1324mali_ptr coords;13251326if (!preload_zs && !preload_rts)1327return;13281329float rect[] = {13300.0, 0.0, 0.0, 1.0,1331fb->width, 0.0, 0.0, 1.0,13320.0, fb->height, 0.0, 1.0,1333fb->width, fb->height, 0.0, 1.0,1334};13351336coords = pan_pool_upload_aligned(pool, rect,1337sizeof(rect), 64);13381339if (preload_zs)1340pan_preload_fb_part(pool, scoreboard, fb, true, coords,1341tsd, tiler);13421343if (preload_rts)1344pan_preload_fb_part(pool, scoreboard, fb, false, coords,1345tsd, tiler);1346}13471348void1349pan_blit_ctx_init(struct panfrost_device *dev,1350const struct pan_blit_info *info,1351struct pan_pool *blit_pool,1352struct pan_blit_context *ctx)1353{1354memset(ctx, 0, sizeof(*ctx));13551356ctx->z_scale = (float)(info->dst.end.z - info->dst.start.z + 1) /1357(info->src.end.z - info->src.start.z + 1);13581359struct pan_image_view sviews[2] = {1360{1361.format = info->src.planes[0].format,1362.image = info->src.planes[0].image,1363.dim = info->src.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?1364MALI_TEXTURE_DIMENSION_2D : info->src.planes[0].image->layout.dim,1365.first_level = info->src.level,1366.last_level = info->src.level,1367.first_layer = info->src.start.layer,1368.last_layer = info->src.end.layer,1369.swizzle = {1370PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,1371PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,1372},1373},1374};13751376struct pan_image_view dview = {1377.format = info->dst.planes[0].format,1378.image = info->dst.planes[0].image,1379.dim = info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_1D ?1380MALI_TEXTURE_DIMENSION_1D : MALI_TEXTURE_DIMENSION_2D,1381.first_level = info->dst.level,1382.last_level = info->dst.level,1383.first_layer = info->dst.start.layer,1384.last_layer = info->dst.start.layer,1385.swizzle = {1386PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,1387PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,1388},1389};13901391ctx->src.start.x = info->src.start.x;1392ctx->src.start.y = info->src.start.y;1393ctx->src.end.x = info->src.end.x;1394ctx->src.end.y = info->src.end.y;1395ctx->src.dim = sviews[0].dim;1396if (sviews[0].dim == MALI_TEXTURE_DIMENSION_3D)1397ctx->src.z_offset = info->src.start.z;1398else1399ctx->src.layer_offset = info->src.start.layer;14001401if (info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_3D) {1402ctx->dst.layer_offset = info->dst.start.z;1403ctx->dst.cur_layer = info->dst.start.z;1404ctx->dst.last_layer = info->dst.end.z;1405} else {1406ctx->dst.layer_offset = info->dst.start.layer;1407ctx->dst.cur_layer = info->dst.start.layer;1408ctx->dst.last_layer = info->dst.end.layer;1409}14101411/* Split depth and stencil */1412if (util_format_is_depth_and_stencil(sviews[0].format)) {1413sviews[1] = sviews[0];1414sviews[0].format = util_format_get_depth_only(sviews[0].format);1415sviews[1].format = util_format_stencil_only(sviews[1].format);1416} else if (info->src.planes[1].format) {1417sviews[1] = sviews[0];1418sviews[1].format = info->src.planes[1].format;1419sviews[1].image = info->src.planes[1].image;1420}14211422ctx->rsd = pan_blit_get_rsd(dev, sviews, &dview);14231424ASSERTED unsigned nlayers = info->src.end.layer - info->src.start.layer + 1;14251426assert(nlayers == (info->dst.end.layer - info->dst.start.layer + 1));14271428unsigned dst_w = u_minify(info->dst.planes[0].image->layout.width, info->dst.level);1429unsigned dst_h = u_minify(info->dst.planes[0].image->layout.height, info->dst.level);1430unsigned minx = MAX2(info->dst.start.x, 0);1431unsigned miny = MAX2(info->dst.start.y, 0);1432unsigned maxx = MIN2(info->dst.end.x, dst_w - 1);1433unsigned maxy = MIN2(info->dst.end.y, dst_h - 1);14341435if (info->scissor.enable) {1436minx = MAX2(minx, info->scissor.minx);1437miny = MAX2(miny, info->scissor.miny);1438maxx = MIN2(maxx, info->scissor.maxx);1439maxy = MIN2(maxy, info->scissor.maxy);1440}14411442const struct pan_image_view *sview_ptrs[] = { &sviews[0], &sviews[1] };1443unsigned nviews = sviews[1].format ? 2 : 1;14441445if (pan_is_bifrost(dev)) {1446ctx->textures =1447pan_blitter_emit_bifrost_textures(blit_pool, nviews, sview_ptrs);1448ctx->samplers =1449pan_blitter_emit_bifrost_sampler(blit_pool, info->nearest);1450} else {1451ctx->textures =1452pan_blitter_emit_midgard_textures(blit_pool, nviews, sview_ptrs);1453ctx->samplers =1454pan_blitter_emit_midgard_sampler(blit_pool, info->nearest);1455}14561457ctx->vpd = pan_blitter_emit_viewport(blit_pool,1458minx, miny, maxx, maxy);14591460float dst_rect[] = {1461info->dst.start.x, info->dst.start.y, 0.0, 1.0,1462info->dst.end.x + 1, info->dst.start.y, 0.0, 1.0,1463info->dst.start.x, info->dst.end.y + 1, 0.0, 1.0,1464info->dst.end.x + 1, info->dst.end.y + 1, 0.0, 1.0,1465};14661467ctx->position =1468pan_pool_upload_aligned(blit_pool, dst_rect,1469sizeof(dst_rect), 64);1470}14711472bool1473pan_blit_next_surface(struct pan_blit_context *ctx)1474{1475if (ctx->dst.cur_layer >= ctx->dst.last_layer)1476return false;14771478ctx->dst.cur_layer++;1479return true;1480}14811482void1483pan_blit(struct pan_blit_context *ctx,1484struct pan_pool *pool,1485struct pan_scoreboard *scoreboard,1486mali_ptr tsd, mali_ptr tiler)1487{1488if (ctx->dst.cur_layer < 0 || ctx->dst.cur_layer > ctx->dst.last_layer)1489return;14901491int32_t layer = ctx->dst.cur_layer - ctx->dst.layer_offset;1492float src_z;1493if (ctx->src.dim == MALI_TEXTURE_DIMENSION_3D)1494src_z = (ctx->z_scale * layer) + ctx->src.z_offset;1495else1496src_z = ctx->src.layer_offset + layer;14971498float src_rect[] = {1499ctx->src.start.x, ctx->src.start.y, src_z, 1.0,1500ctx->src.end.x + 1, ctx->src.start.y, src_z, 1.0,1501ctx->src.start.x, ctx->src.end.y + 1, src_z, 1.0,1502ctx->src.end.x + 1, ctx->src.end.y + 1, src_z, 1.0,1503};15041505mali_ptr src_coords =1506pan_pool_upload_aligned(pool, src_rect,1507sizeof(src_rect), 64);15081509if (pan_is_bifrost(pool->dev)) {1510pan_blit_emit_bifrost_tiler_job(pool, scoreboard,1511src_coords, ctx->position,1512ctx->textures, ctx->samplers,1513ctx->vpd, ctx->rsd, tsd, tiler);1514} else {1515pan_blit_emit_midgard_tiler_job(pool, scoreboard,1516src_coords, ctx->position,1517ctx->textures, ctx->samplers,1518ctx->vpd, ctx->rsd, tsd);1519}1520}15211522static uint32_t pan_blit_shader_key_hash(const void *key)1523{1524return _mesa_hash_data(key, sizeof(struct pan_blit_shader_key));1525}15261527static bool pan_blit_shader_key_equal(const void *a, const void *b)1528{1529return !memcmp(a, b, sizeof(struct pan_blit_shader_key));1530}15311532static uint32_t pan_blit_blend_shader_key_hash(const void *key)1533{1534return _mesa_hash_data(key, sizeof(struct pan_blit_blend_shader_key));1535}15361537static bool pan_blit_blend_shader_key_equal(const void *a, const void *b)1538{1539return !memcmp(a, b, sizeof(struct pan_blit_blend_shader_key));1540}15411542static uint32_t pan_blit_rsd_key_hash(const void *key)1543{1544return _mesa_hash_data(key, sizeof(struct pan_blit_rsd_key));1545}15461547static bool pan_blit_rsd_key_equal(const void *a, const void *b)1548{1549return !memcmp(a, b, sizeof(struct pan_blit_rsd_key));1550}15511552static void1553pan_blitter_prefill_blit_shader_cache(struct panfrost_device *dev)1554{1555static const struct pan_blit_shader_key prefill[] = {1556{1557.surfaces[0] = {1558.loc = FRAG_RESULT_DEPTH,1559.type = nir_type_float32,1560.dim = MALI_TEXTURE_DIMENSION_2D,1561.src_samples = 1,1562.dst_samples = 1,1563},1564},1565{1566.surfaces[1] = {1567.loc = FRAG_RESULT_STENCIL,1568.type = nir_type_uint32,1569.dim = MALI_TEXTURE_DIMENSION_2D,1570.src_samples = 1,1571.dst_samples = 1,1572},1573},1574{1575.surfaces[0] = {1576.loc = FRAG_RESULT_DATA0,1577.type = nir_type_float32,1578.dim = MALI_TEXTURE_DIMENSION_2D,1579.src_samples = 1,1580.dst_samples = 1,1581},1582},1583};15841585for (unsigned i = 0; i < ARRAY_SIZE(prefill); i++)1586pan_blitter_get_blit_shader(dev, &prefill[i]);1587}15881589void1590pan_blitter_init(struct panfrost_device *dev,1591struct pan_pool *bin_pool,1592struct pan_pool *desc_pool)1593{1594dev->blitter.shaders.blit =1595_mesa_hash_table_create(NULL, pan_blit_shader_key_hash,1596pan_blit_shader_key_equal);1597dev->blitter.shaders.blend =1598_mesa_hash_table_create(NULL, pan_blit_blend_shader_key_hash,1599pan_blit_blend_shader_key_equal);1600dev->blitter.shaders.pool = bin_pool;1601pthread_mutex_init(&dev->blitter.shaders.lock, NULL);1602pan_blitter_prefill_blit_shader_cache(dev);16031604dev->blitter.rsds.pool = desc_pool;1605dev->blitter.rsds.rsds =1606_mesa_hash_table_create(NULL, pan_blit_rsd_key_hash,1607pan_blit_rsd_key_equal);1608pthread_mutex_init(&dev->blitter.rsds.lock, NULL);1609}16101611void1612pan_blitter_cleanup(struct panfrost_device *dev)1613{1614_mesa_hash_table_destroy(dev->blitter.shaders.blit, NULL);1615_mesa_hash_table_destroy(dev->blitter.shaders.blend, NULL);1616pthread_mutex_destroy(&dev->blitter.shaders.lock);1617_mesa_hash_table_destroy(dev->blitter.rsds.rsds, NULL);1618pthread_mutex_destroy(&dev->blitter.rsds.lock);1619}162016211622