Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
4574 views
/*1* Copyright (C) 2016 Rob Clark <[email protected]>2* Copyright © 2018 Google, Inc.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,20* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*23* Authors:24* Rob Clark <[email protected]>25*/2627#include "pipe/p_state.h"28#include "util/format/u_format.h"29#include "util/u_helpers.h"30#include "util/u_memory.h"31#include "util/u_string.h"32#include "util/u_viewport.h"3334#include "common/freedreno_guardband.h"35#include "freedreno_query_hw.h"36#include "freedreno_resource.h"37#include "freedreno_state.h"38#include "freedreno_tracepoints.h"3940#include "fd6_blend.h"41#include "fd6_const.h"42#include "fd6_context.h"43#include "fd6_emit.h"44#include "fd6_format.h"45#include "fd6_image.h"46#include "fd6_pack.h"47#include "fd6_program.h"48#include "fd6_rasterizer.h"49#include "fd6_texture.h"50#include "fd6_zsa.h"5152/* Border color layout is diff from a4xx/a5xx.. if it turns out to be53* the same as a6xx then move this somewhere common ;-)54*55* Entry layout looks like (total size, 0x60 bytes):56*/5758struct PACKED bcolor_entry {59uint32_t fp32[4];60uint16_t ui16[4];61int16_t si16[4];62uint16_t fp16[4];63uint16_t rgb565;64uint16_t rgb5a1;65uint16_t rgba4;66uint8_t __pad0[2];67uint8_t ui8[4];68int8_t si8[4];69uint32_t rgb10a2;70uint32_t z24; /* also s8? */71uint16_t72srgb[4]; /* appears to duplicate fp16[], but clamped, used for srgb */73uint8_t __pad1[56];74};7576#define FD6_BORDER_COLOR_SIZE sizeof(struct bcolor_entry)77#define FD6_BORDER_COLOR_UPLOAD_SIZE \78(2 * PIPE_MAX_SAMPLERS * FD6_BORDER_COLOR_SIZE)7980static void81setup_border_colors(struct fd_texture_stateobj *tex,82struct bcolor_entry *entries)83{84unsigned i, j;85STATIC_ASSERT(sizeof(struct bcolor_entry) == FD6_BORDER_COLOR_SIZE);8687for (i = 0; i < tex->num_samplers; i++) {88struct bcolor_entry *e = &entries[i];89struct pipe_sampler_state *sampler = tex->samplers[i];90union pipe_color_union *bc;9192if (!sampler)93continue;9495bc = &sampler->border_color;9697/*98* XXX HACK ALERT XXX99*100* The border colors need to be swizzled in a particular101* format-dependent order. Even though samplers don't know about102* formats, we can assume that with a GL state tracker, there's a103* 1:1 correspondence between sampler and texture. Take advantage104* of that knowledge.105*/106if ((i >= tex->num_textures) || !tex->textures[i])107continue;108109struct pipe_sampler_view *view = tex->textures[i];110enum pipe_format format = view->format;111const struct util_format_description *desc =112util_format_description(format);113114e->rgb565 = 0;115e->rgb5a1 = 0;116e->rgba4 = 0;117e->rgb10a2 = 0;118e->z24 = 0;119120unsigned char swiz[4];121122fd6_tex_swiz(format, swiz, view->swizzle_r, view->swizzle_g,123view->swizzle_b, view->swizzle_a);124125for (j = 0; j < 4; j++) {126int c = swiz[j];127int cd = c;128129/*130* HACK: for PIPE_FORMAT_X24S8_UINT we end up w/ the131* stencil border color value in bc->ui[0] but according132* to desc->swizzle and desc->channel, the .x/.w component133* is NONE and the stencil value is in the y component.134* Meanwhile the hardware wants this in the .w component135* for x24s8 and the .x component for x32_s8x24.136*/137if ((format == PIPE_FORMAT_X24S8_UINT) ||138(format == PIPE_FORMAT_X32_S8X24_UINT)) {139if (j == 0) {140c = 1;141cd = (format == PIPE_FORMAT_X32_S8X24_UINT) ? 0 : 3;142} else {143continue;144}145}146147if (c >= 4)148continue;149150if (desc->channel[c].pure_integer) {151uint16_t clamped;152switch (desc->channel[c].size) {153case 2:154assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);155clamped = CLAMP(bc->ui[j], 0, 0x3);156break;157case 8:158if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)159clamped = CLAMP(bc->i[j], -128, 127);160else161clamped = CLAMP(bc->ui[j], 0, 255);162break;163case 10:164assert(desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED);165clamped = CLAMP(bc->ui[j], 0, 0x3ff);166break;167case 16:168if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)169clamped = CLAMP(bc->i[j], -32768, 32767);170else171clamped = CLAMP(bc->ui[j], 0, 65535);172break;173default:174assert(!"Unexpected bit size");175case 32:176clamped = 0;177break;178}179e->fp32[cd] = bc->ui[j];180e->fp16[cd] = clamped;181} else {182float f = bc->f[j];183float f_u = CLAMP(f, 0, 1);184float f_s = CLAMP(f, -1, 1);185186e->fp32[c] = fui(f);187e->fp16[c] = _mesa_float_to_half(f);188e->srgb[c] = _mesa_float_to_half(f_u);189e->ui16[c] = f_u * 0xffff;190e->si16[c] = f_s * 0x7fff;191e->ui8[c] = f_u * 0xff;192e->si8[c] = f_s * 0x7f;193if (c == 1)194e->rgb565 |= (int)(f_u * 0x3f) << 5;195else if (c < 3)196e->rgb565 |= (int)(f_u * 0x1f) << (c ? 11 : 0);197if (c == 3)198e->rgb5a1 |= (f_u > 0.5) ? 0x8000 : 0;199else200e->rgb5a1 |= (int)(f_u * 0x1f) << (c * 5);201if (c == 3)202e->rgb10a2 |= (int)(f_u * 0x3) << 30;203else204e->rgb10a2 |= (int)(f_u * 0x3ff) << (c * 10);205e->rgba4 |= (int)(f_u * 0xf) << (c * 4);206if (c == 0)207e->z24 = f_u * 0xffffff;208}209}210211#ifdef DEBUG212memset(&e->__pad0, 0, sizeof(e->__pad0));213memset(&e->__pad1, 0, sizeof(e->__pad1));214#endif215}216}217218static void219emit_border_color(struct fd_context *ctx, struct fd_ringbuffer *ring) assert_dt220{221struct fd6_context *fd6_ctx = fd6_context(ctx);222struct bcolor_entry *entries;223unsigned off;224void *ptr;225226STATIC_ASSERT(sizeof(struct bcolor_entry) == FD6_BORDER_COLOR_SIZE);227228u_upload_alloc(fd6_ctx->border_color_uploader, 0,229FD6_BORDER_COLOR_UPLOAD_SIZE, FD6_BORDER_COLOR_UPLOAD_SIZE,230&off, &fd6_ctx->border_color_buf, &ptr);231232entries = ptr;233234setup_border_colors(&ctx->tex[PIPE_SHADER_VERTEX], &entries[0]);235setup_border_colors(&ctx->tex[PIPE_SHADER_FRAGMENT],236&entries[ctx->tex[PIPE_SHADER_VERTEX].num_samplers]);237238OUT_PKT4(ring, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR, 2);239OUT_RELOC(ring, fd_resource(fd6_ctx->border_color_buf)->bo, off, 0, 0);240241u_upload_unmap(fd6_ctx->border_color_uploader);242}243244static void245fd6_emit_fb_tex(struct fd_ringbuffer *state, struct fd_context *ctx) assert_dt246{247struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;248struct pipe_surface *psurf = pfb->cbufs[0];249struct fd_resource *rsc = fd_resource(psurf->texture);250251OUT_RINGP(state, 0, &ctx->batch->fb_read_patches); /* texconst0, patched in gmem emit */252OUT_RING(state, A6XX_TEX_CONST_1_WIDTH(pfb->width) |253A6XX_TEX_CONST_1_HEIGHT(pfb->height));254OUT_RING(state, 0); /* texconst2, patched in gmem emit */255OUT_RING(state, A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size));256OUT_RING(state, 0); /* BASE_LO, patched in gmem emit */257OUT_RING(state, 0); /* BASE_HI, patched in gmem emit */258OUT_RING(state, 0); /* texconst6 */259OUT_RING(state, 0); /* texconst7 */260OUT_RING(state, 0); /* texconst8 */261OUT_RING(state, 0); /* texconst9 */262OUT_RING(state, 0); /* texconst10 */263OUT_RING(state, 0); /* texconst11 */264OUT_RING(state, 0);265OUT_RING(state, 0);266OUT_RING(state, 0);267OUT_RING(state, 0);268}269270bool271fd6_emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,272enum pipe_shader_type type, struct fd_texture_stateobj *tex,273unsigned bcolor_offset,274/* can be NULL if no image/SSBO/fb state to merge in: */275const struct ir3_shader_variant *v)276{277bool needs_border = false;278unsigned opcode, tex_samp_reg, tex_const_reg, tex_count_reg;279enum a6xx_state_block sb;280281switch (type) {282case PIPE_SHADER_VERTEX:283sb = SB6_VS_TEX;284opcode = CP_LOAD_STATE6_GEOM;285tex_samp_reg = REG_A6XX_SP_VS_TEX_SAMP;286tex_const_reg = REG_A6XX_SP_VS_TEX_CONST;287tex_count_reg = REG_A6XX_SP_VS_TEX_COUNT;288break;289case PIPE_SHADER_TESS_CTRL:290sb = SB6_HS_TEX;291opcode = CP_LOAD_STATE6_GEOM;292tex_samp_reg = REG_A6XX_SP_HS_TEX_SAMP;293tex_const_reg = REG_A6XX_SP_HS_TEX_CONST;294tex_count_reg = REG_A6XX_SP_HS_TEX_COUNT;295break;296case PIPE_SHADER_TESS_EVAL:297sb = SB6_DS_TEX;298opcode = CP_LOAD_STATE6_GEOM;299tex_samp_reg = REG_A6XX_SP_DS_TEX_SAMP;300tex_const_reg = REG_A6XX_SP_DS_TEX_CONST;301tex_count_reg = REG_A6XX_SP_DS_TEX_COUNT;302break;303case PIPE_SHADER_GEOMETRY:304sb = SB6_GS_TEX;305opcode = CP_LOAD_STATE6_GEOM;306tex_samp_reg = REG_A6XX_SP_GS_TEX_SAMP;307tex_const_reg = REG_A6XX_SP_GS_TEX_CONST;308tex_count_reg = REG_A6XX_SP_GS_TEX_COUNT;309break;310case PIPE_SHADER_FRAGMENT:311sb = SB6_FS_TEX;312opcode = CP_LOAD_STATE6_FRAG;313tex_samp_reg = REG_A6XX_SP_FS_TEX_SAMP;314tex_const_reg = REG_A6XX_SP_FS_TEX_CONST;315tex_count_reg = REG_A6XX_SP_FS_TEX_COUNT;316break;317case PIPE_SHADER_COMPUTE:318sb = SB6_CS_TEX;319opcode = CP_LOAD_STATE6_FRAG;320tex_samp_reg = REG_A6XX_SP_CS_TEX_SAMP;321tex_const_reg = REG_A6XX_SP_CS_TEX_CONST;322tex_count_reg = REG_A6XX_SP_CS_TEX_COUNT;323break;324default:325unreachable("bad state block");326}327328if (tex->num_samplers > 0) {329struct fd_ringbuffer *state =330fd_ringbuffer_new_object(ctx->pipe, tex->num_samplers * 4 * 4);331for (unsigned i = 0; i < tex->num_samplers; i++) {332static const struct fd6_sampler_stateobj dummy_sampler = {};333const struct fd6_sampler_stateobj *sampler =334tex->samplers[i] ? fd6_sampler_stateobj(tex->samplers[i])335: &dummy_sampler;336OUT_RING(state, sampler->texsamp0);337OUT_RING(state, sampler->texsamp1);338OUT_RING(state, sampler->texsamp2 |339A6XX_TEX_SAMP_2_BCOLOR(i + bcolor_offset));340OUT_RING(state, sampler->texsamp3);341needs_border |= sampler->needs_border;342}343344/* output sampler state: */345OUT_PKT7(ring, opcode, 3);346OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |347CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |348CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |349CP_LOAD_STATE6_0_STATE_BLOCK(sb) |350CP_LOAD_STATE6_0_NUM_UNIT(tex->num_samplers));351OUT_RB(ring, state); /* SRC_ADDR_LO/HI */352353OUT_PKT4(ring, tex_samp_reg, 2);354OUT_RB(ring, state); /* SRC_ADDR_LO/HI */355356fd_ringbuffer_del(state);357}358359unsigned num_merged_textures = tex->num_textures;360unsigned num_textures = tex->num_textures;361if (v) {362num_merged_textures += v->image_mapping.num_tex;363364if (v->fb_read)365num_merged_textures++;366367/* There could be more bound textures than what the shader uses.368* Which isn't known at shader compile time. So in the case we369* are merging tex state, only emit the textures that the shader370* uses (since the image/SSBO related tex state comes immediately371* after)372*/373num_textures = v->image_mapping.tex_base;374}375376if (num_merged_textures > 0) {377struct fd_ringbuffer *state =378fd_ringbuffer_new_object(ctx->pipe, num_merged_textures * 16 * 4);379for (unsigned i = 0; i < num_textures; i++) {380const struct fd6_pipe_sampler_view *view;381382if (tex->textures[i]) {383view = fd6_pipe_sampler_view(tex->textures[i]);384if (unlikely(view->rsc_seqno !=385fd_resource(view->base.texture)->seqno)) {386fd6_sampler_view_update(ctx,387fd6_pipe_sampler_view(tex->textures[i]));388}389} else {390static const struct fd6_pipe_sampler_view dummy_view = {};391view = &dummy_view;392}393394OUT_RING(state, view->texconst0);395OUT_RING(state, view->texconst1);396OUT_RING(state, view->texconst2);397OUT_RING(state, view->texconst3);398399if (view->ptr1) {400OUT_RELOC(state, view->ptr1->bo, view->offset1,401(uint64_t)view->texconst5 << 32, 0);402} else {403OUT_RING(state, 0x00000000);404OUT_RING(state, view->texconst5);405}406407OUT_RING(state, view->texconst6);408409if (view->ptr2) {410OUT_RELOC(state, view->ptr2->bo, view->offset2, 0, 0);411} else {412OUT_RING(state, 0);413OUT_RING(state, 0);414}415416OUT_RING(state, view->texconst9);417OUT_RING(state, view->texconst10);418OUT_RING(state, view->texconst11);419OUT_RING(state, 0);420OUT_RING(state, 0);421OUT_RING(state, 0);422OUT_RING(state, 0);423}424425if (v) {426const struct ir3_ibo_mapping *mapping = &v->image_mapping;427struct fd_shaderbuf_stateobj *buf = &ctx->shaderbuf[type];428struct fd_shaderimg_stateobj *img = &ctx->shaderimg[type];429430for (unsigned i = 0; i < mapping->num_tex; i++) {431unsigned idx = mapping->tex_to_image[i];432if (idx & IBO_SSBO) {433fd6_emit_ssbo_tex(state, &buf->sb[idx & ~IBO_SSBO]);434} else {435fd6_emit_image_tex(state, &img->si[idx]);436}437}438439if (v->fb_read) {440fd6_emit_fb_tex(state, ctx);441}442}443444/* emit texture state: */445OUT_PKT7(ring, opcode, 3);446OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |447CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |448CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |449CP_LOAD_STATE6_0_STATE_BLOCK(sb) |450CP_LOAD_STATE6_0_NUM_UNIT(num_merged_textures));451OUT_RB(ring, state); /* SRC_ADDR_LO/HI */452453OUT_PKT4(ring, tex_const_reg, 2);454OUT_RB(ring, state); /* SRC_ADDR_LO/HI */455456fd_ringbuffer_del(state);457}458459OUT_PKT4(ring, tex_count_reg, 1);460OUT_RING(ring, num_merged_textures);461462return needs_border;463}464465/* Emits combined texture state, which also includes any Image/SSBO466* related texture state merged in (because we must have all texture467* state for a given stage in a single buffer). In the fast-path, if468* we don't need to merge in any image/ssbo related texture state, we469* just use cached texture stateobj. Otherwise we generate a single-470* use stateobj.471*472* TODO Is there some sane way we can still use cached texture stateobj473* with image/ssbo in use?474*475* returns whether border_color is required:476*/477static bool478fd6_emit_combined_textures(struct fd_ringbuffer *ring, struct fd6_emit *emit,479enum pipe_shader_type type,480const struct ir3_shader_variant *v) assert_dt481{482struct fd_context *ctx = emit->ctx;483bool needs_border = false;484485static const struct {486enum fd6_state_id state_id;487unsigned enable_mask;488} s[PIPE_SHADER_TYPES] = {489[PIPE_SHADER_VERTEX] = {FD6_GROUP_VS_TEX, ENABLE_ALL},490[PIPE_SHADER_TESS_CTRL] = {FD6_GROUP_HS_TEX, ENABLE_ALL},491[PIPE_SHADER_TESS_EVAL] = {FD6_GROUP_DS_TEX, ENABLE_ALL},492[PIPE_SHADER_GEOMETRY] = {FD6_GROUP_GS_TEX, ENABLE_ALL},493[PIPE_SHADER_FRAGMENT] = {FD6_GROUP_FS_TEX, ENABLE_DRAW},494};495496debug_assert(s[type].state_id);497498if (!v->image_mapping.num_tex && !v->fb_read) {499/* in the fast-path, when we don't have to mix in any image/SSBO500* related texture state, we can just lookup the stateobj and501* re-emit that:502*503* Also, framebuffer-read is a slow-path because an extra504* texture needs to be inserted.505*506* TODO we can probably simmplify things if we also treated507* border_color as a slow-path.. this way the tex state key508* wouldn't depend on bcolor_offset.. but fb_read might rather509* be *somehow* a fast-path if we eventually used it for PLS.510* I suppose there would be no harm in just *always* inserting511* an fb_read texture?512*/513if ((ctx->dirty_shader[type] & FD_DIRTY_SHADER_TEX) &&514ctx->tex[type].num_textures > 0) {515struct fd6_texture_state *tex =516fd6_texture_state(ctx, type, &ctx->tex[type]);517518needs_border |= tex->needs_border;519520fd6_emit_add_group(emit, tex->stateobj, s[type].state_id,521s[type].enable_mask);522523fd6_texture_state_reference(&tex, NULL);524}525} else {526/* In the slow-path, create a one-shot texture state object527* if either TEX|PROG|SSBO|IMAGE state is dirty:528*/529if ((ctx->dirty_shader[type] &530(FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE |531FD_DIRTY_SHADER_SSBO)) ||532v->fb_read) {533struct fd_texture_stateobj *tex = &ctx->tex[type];534struct fd_ringbuffer *stateobj = fd_submit_new_ringbuffer(535ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);536unsigned bcolor_offset = fd6_border_color_offset(ctx, type, tex);537538needs_border |=539fd6_emit_textures(ctx, stateobj, type, tex, bcolor_offset, v);540541fd6_emit_take_group(emit, stateobj, s[type].state_id,542s[type].enable_mask);543}544}545546return needs_border;547}548549static struct fd_ringbuffer *550build_vbo_state(struct fd6_emit *emit) assert_dt551{552const struct fd_vertex_state *vtx = emit->vtx;553554struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(555emit->ctx->batch->submit, 4 * (1 + vtx->vertexbuf.count * 4),556FD_RINGBUFFER_STREAMING);557558OUT_PKT4(ring, REG_A6XX_VFD_FETCH(0), 4 * vtx->vertexbuf.count);559for (int32_t j = 0; j < vtx->vertexbuf.count; j++) {560const struct pipe_vertex_buffer *vb = &vtx->vertexbuf.vb[j];561struct fd_resource *rsc = fd_resource(vb->buffer.resource);562if (rsc == NULL) {563OUT_RING(ring, 0);564OUT_RING(ring, 0);565OUT_RING(ring, 0);566OUT_RING(ring, 0);567} else {568uint32_t off = vb->buffer_offset;569uint32_t size = fd_bo_size(rsc->bo) - off;570571OUT_RELOC(ring, rsc->bo, off, 0, 0);572OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */573OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */574}575}576577return ring;578}579580static enum a6xx_ztest_mode581compute_ztest_mode(struct fd6_emit *emit, bool lrz_valid) assert_dt582{583struct fd_context *ctx = emit->ctx;584struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;585struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa);586const struct ir3_shader_variant *fs = emit->fs;587588if (fs->shader->nir->info.fs.early_fragment_tests)589return A6XX_EARLY_Z;590591if (fs->no_earlyz || fs->writes_pos || !zsa->base.depth_enabled ||592fs->writes_stencilref) {593return A6XX_LATE_Z;594} else if ((fs->has_kill || zsa->alpha_test) &&595(zsa->writes_zs || !pfb->zsbuf)) {596/* Slightly odd, but seems like the hw wants us to select597* LATE_Z mode if there is no depth buffer + discard. Either598* that, or when occlusion query is enabled. See:599*600* dEQP-GLES31.functional.fbo.no_attachments.*601*/602return lrz_valid ? A6XX_EARLY_LRZ_LATE_Z : A6XX_LATE_Z;603} else {604return A6XX_EARLY_Z;605}606}607608/**609* Calculate normalized LRZ state based on zsa/prog/blend state, updating610* the zsbuf's lrz state as necessary to detect the cases where we need611* to invalidate lrz.612*/613static struct fd6_lrz_state614compute_lrz_state(struct fd6_emit *emit, bool binning_pass) assert_dt615{616struct fd_context *ctx = emit->ctx;617struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;618const struct ir3_shader_variant *fs = emit->fs;619struct fd6_lrz_state lrz;620621if (!pfb->zsbuf) {622memset(&lrz, 0, sizeof(lrz));623if (!binning_pass) {624lrz.z_mode = compute_ztest_mode(emit, false);625}626return lrz;627}628629struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);630struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa);631struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);632633lrz = zsa->lrz;634635/* normalize lrz state: */636if (blend->reads_dest || fs->writes_pos || fs->no_earlyz || fs->has_kill) {637lrz.write = false;638if (binning_pass)639lrz.enable = false;640}641642/* if we change depthfunc direction, bail out on using LRZ. The643* LRZ buffer encodes a min/max depth value per block, but if644* we switch from GT/GE <-> LT/LE, those values cannot be645* interpreted properly.646*/647if (zsa->base.depth_enabled && (rsc->lrz_direction != FD_LRZ_UNKNOWN) &&648(rsc->lrz_direction != lrz.direction)) {649rsc->lrz_valid = false;650}651652if (zsa->invalidate_lrz || !rsc->lrz_valid) {653rsc->lrz_valid = false;654memset(&lrz, 0, sizeof(lrz));655}656657if (fs->no_earlyz || fs->writes_pos) {658lrz.enable = false;659lrz.write = false;660lrz.test = false;661}662663if (!binning_pass) {664lrz.z_mode = compute_ztest_mode(emit, rsc->lrz_valid);665}666667/* Once we start writing to the real depth buffer, we lock in the668* direction for LRZ.. if we have to skip a LRZ write for any669* reason, it is still safe to have LRZ until there is a direction670* reversal. Prior to the reversal, since we disabled LRZ writes671* in the "unsafe" cases, this just means that the LRZ test may672* not early-discard some things that end up not passing a later673* test (ie. be overly concervative). But once you have a reversal674* of direction, it is possible to increase/decrease the z value675* to the point where the overly-conservative test is incorrect.676*/677if (zsa->base.depth_writemask) {678rsc->lrz_direction = lrz.direction;679}680681return lrz;682}683684static struct fd_ringbuffer *685build_lrz(struct fd6_emit *emit, bool binning_pass) assert_dt686{687struct fd_context *ctx = emit->ctx;688struct fd6_context *fd6_ctx = fd6_context(ctx);689struct fd6_lrz_state lrz = compute_lrz_state(emit, binning_pass);690691/* If the LRZ state has not changed, we can skip the emit: */692if (!ctx->last.dirty &&693!memcmp(&fd6_ctx->last.lrz[binning_pass], &lrz, sizeof(lrz)))694return NULL;695696fd6_ctx->last.lrz[binning_pass] = lrz;697698struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(699ctx->batch->submit, 8 * 4, FD_RINGBUFFER_STREAMING);700701OUT_REG(ring,702A6XX_GRAS_LRZ_CNTL(.enable = lrz.enable, .lrz_write = lrz.write,703.greater = lrz.direction == FD_LRZ_GREATER,704.z_test_enable = lrz.test, ));705OUT_REG(ring, A6XX_RB_LRZ_CNTL(.enable = lrz.enable, ));706707OUT_REG(ring, A6XX_RB_DEPTH_PLANE_CNTL(.z_mode = lrz.z_mode, ));708709OUT_REG(ring, A6XX_GRAS_SU_DEPTH_PLANE_CNTL(.z_mode = lrz.z_mode, ));710711return ring;712}713714static struct fd_ringbuffer *715build_scissor(struct fd6_emit *emit) assert_dt716{717struct fd_context *ctx = emit->ctx;718struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);719720struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(721emit->ctx->batch->submit, 3 * 4, FD_RINGBUFFER_STREAMING);722723OUT_REG(724ring,725A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0, .x = scissor->minx, .y = scissor->miny),726A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0, .x = MAX2(scissor->maxx, 1) - 1,727.y = MAX2(scissor->maxy, 1) - 1));728729ctx->batch->max_scissor.minx =730MIN2(ctx->batch->max_scissor.minx, scissor->minx);731ctx->batch->max_scissor.miny =732MIN2(ctx->batch->max_scissor.miny, scissor->miny);733ctx->batch->max_scissor.maxx =734MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);735ctx->batch->max_scissor.maxy =736MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);737738return ring;739}740741/* Combination of FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER_DISCARD |742* FD_DIRTY_PROG | FD_DIRTY_DUAL_BLEND743*/744static struct fd_ringbuffer *745build_prog_fb_rast(struct fd6_emit *emit) assert_dt746{747struct fd_context *ctx = emit->ctx;748struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;749const struct fd6_program_state *prog = fd6_emit_get_prog(emit);750const struct ir3_shader_variant *fs = emit->fs;751752struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(753ctx->batch->submit, 9 * 4, FD_RINGBUFFER_STREAMING);754755unsigned nr = pfb->nr_cbufs;756757if (ctx->rasterizer->rasterizer_discard)758nr = 0;759760struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);761762if (blend->use_dual_src_blend)763nr++;764765OUT_PKT4(ring, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2);766OUT_RING(ring, COND(fs->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) |767COND(fs->writes_smask && pfb->samples > 1,768A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK) |769COND(fs->writes_stencilref,770A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_STENCILREF) |771COND(blend->use_dual_src_blend,772A6XX_RB_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));773OUT_RING(ring, A6XX_RB_FS_OUTPUT_CNTL1_MRT(nr));774775OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_CNTL1, 1);776OUT_RING(ring, A6XX_SP_FS_OUTPUT_CNTL1_MRT(nr));777778unsigned mrt_components = 0;779for (unsigned i = 0; i < pfb->nr_cbufs; i++) {780if (!pfb->cbufs[i])781continue;782mrt_components |= 0xf << (i * 4);783}784785/* dual source blending has an extra fs output in the 2nd slot */786if (blend->use_dual_src_blend)787mrt_components |= 0xf << 4;788789mrt_components &= prog->mrt_components;790791OUT_REG(ring, A6XX_SP_FS_RENDER_COMPONENTS(.dword = mrt_components));792OUT_REG(ring, A6XX_RB_RENDER_COMPONENTS(.dword = mrt_components));793794return ring;795}796797static struct fd_ringbuffer *798build_blend_color(struct fd6_emit *emit) assert_dt799{800struct fd_context *ctx = emit->ctx;801struct pipe_blend_color *bcolor = &ctx->blend_color;802struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(803ctx->batch->submit, 5 * 4, FD_RINGBUFFER_STREAMING);804805OUT_REG(ring, A6XX_RB_BLEND_RED_F32(bcolor->color[0]),806A6XX_RB_BLEND_GREEN_F32(bcolor->color[1]),807A6XX_RB_BLEND_BLUE_F32(bcolor->color[2]),808A6XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));809810return ring;811}812813static struct fd_ringbuffer *814build_ibo(struct fd6_emit *emit) assert_dt815{816struct fd_context *ctx = emit->ctx;817818if (emit->hs) {819debug_assert(ir3_shader_nibo(emit->hs) == 0);820debug_assert(ir3_shader_nibo(emit->ds) == 0);821}822if (emit->gs) {823debug_assert(ir3_shader_nibo(emit->gs) == 0);824}825826struct fd_ringbuffer *ibo_state =827fd6_build_ibo_state(ctx, emit->fs, PIPE_SHADER_FRAGMENT);828struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(829ctx->batch->submit, 0x100, FD_RINGBUFFER_STREAMING);830831OUT_PKT7(ring, CP_LOAD_STATE6, 3);832OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |833CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |834CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |835CP_LOAD_STATE6_0_STATE_BLOCK(SB6_IBO) |836CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(emit->fs)));837OUT_RB(ring, ibo_state);838839OUT_PKT4(ring, REG_A6XX_SP_IBO, 2);840OUT_RB(ring, ibo_state);841842/* TODO if we used CP_SET_DRAW_STATE for compute shaders, we could843* de-duplicate this from program->config_stateobj844*/845OUT_PKT4(ring, REG_A6XX_SP_IBO_COUNT, 1);846OUT_RING(ring, ir3_shader_nibo(emit->fs));847848fd_ringbuffer_del(ibo_state);849850return ring;851}852853static void854fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt855{856struct fd_context *ctx = emit->ctx;857const struct fd6_program_state *prog = fd6_emit_get_prog(emit);858struct ir3_stream_output_info *info = prog->stream_output;859struct fd_streamout_stateobj *so = &ctx->streamout;860861emit->streamout_mask = 0;862863if (!info)864return;865866for (unsigned i = 0; i < so->num_targets; i++) {867struct fd_stream_output_target *target =868fd_stream_output_target(so->targets[i]);869870if (!target)871continue;872873target->stride = info->stride[i];874875OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_BASE(i), 3);876/* VPC_SO[i].BUFFER_BASE_LO: */877OUT_RELOC(ring, fd_resource(target->base.buffer)->bo, 0, 0, 0);878OUT_RING(ring, target->base.buffer_size + target->base.buffer_offset);879880struct fd_bo *offset_bo = fd_resource(target->offset_buf)->bo;881882if (so->reset & (1 << i)) {883assert(so->offsets[i] == 0);884885OUT_PKT7(ring, CP_MEM_WRITE, 3);886OUT_RELOC(ring, offset_bo, 0, 0, 0);887OUT_RING(ring, target->base.buffer_offset);888889OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 1);890OUT_RING(ring, target->base.buffer_offset);891} else {892OUT_PKT7(ring, CP_MEM_TO_REG, 3);893OUT_RING(ring, CP_MEM_TO_REG_0_REG(REG_A6XX_VPC_SO_BUFFER_OFFSET(i)) |894CP_MEM_TO_REG_0_SHIFT_BY_2 | CP_MEM_TO_REG_0_UNK31 |895CP_MEM_TO_REG_0_CNT(0));896OUT_RELOC(ring, offset_bo, 0, 0, 0);897}898899// After a draw HW would write the new offset to offset_bo900OUT_PKT4(ring, REG_A6XX_VPC_SO_FLUSH_BASE(i), 2);901OUT_RELOC(ring, offset_bo, 0, 0, 0);902903so->reset &= ~(1 << i);904905emit->streamout_mask |= (1 << i);906}907908if (emit->streamout_mask) {909fd6_emit_add_group(emit, prog->streamout_stateobj, FD6_GROUP_SO,910ENABLE_ALL);911} else {912/* If we transition from a draw with streamout to one without, turn913* off streamout.914*/915if (ctx->last.streamout_mask != 0) {916struct fd_ringbuffer *obj = fd_submit_new_ringbuffer(917emit->ctx->batch->submit, 5 * 4, FD_RINGBUFFER_STREAMING);918919OUT_PKT7(obj, CP_CONTEXT_REG_BUNCH, 4);920OUT_RING(obj, REG_A6XX_VPC_SO_CNTL);921OUT_RING(obj, 0);922OUT_RING(obj, REG_A6XX_VPC_SO_STREAM_CNTL);923OUT_RING(obj, 0);924925fd6_emit_take_group(emit, obj, FD6_GROUP_SO, ENABLE_ALL);926}927}928929ctx->last.streamout_mask = emit->streamout_mask;930}931932/**933* Stuff that less frequently changes and isn't (yet) moved into stategroups934*/935static void936fd6_emit_non_ring(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt937{938struct fd_context *ctx = emit->ctx;939const enum fd_dirty_3d_state dirty = emit->dirty;940941if (dirty & FD_DIRTY_STENCIL_REF) {942struct pipe_stencil_ref *sr = &ctx->stencil_ref;943944OUT_PKT4(ring, REG_A6XX_RB_STENCILREF, 1);945OUT_RING(ring, A6XX_RB_STENCILREF_REF(sr->ref_value[0]) |946A6XX_RB_STENCILREF_BFREF(sr->ref_value[1]));947}948949if (dirty & FD_DIRTY_VIEWPORT) {950struct pipe_scissor_state *scissor = &ctx->viewport_scissor;951952OUT_REG(ring, A6XX_GRAS_CL_VPORT_XOFFSET(0, ctx->viewport.translate[0]),953A6XX_GRAS_CL_VPORT_XSCALE(0, ctx->viewport.scale[0]),954A6XX_GRAS_CL_VPORT_YOFFSET(0, ctx->viewport.translate[1]),955A6XX_GRAS_CL_VPORT_YSCALE(0, ctx->viewport.scale[1]),956A6XX_GRAS_CL_VPORT_ZOFFSET(0, ctx->viewport.translate[2]),957A6XX_GRAS_CL_VPORT_ZSCALE(0, ctx->viewport.scale[2]));958959OUT_REG(960ring,961A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0, .x = scissor->minx,962.y = scissor->miny),963A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(0, .x = MAX2(scissor->maxx, 1) - 1,964.y = MAX2(scissor->maxy, 1) - 1));965966unsigned guardband_x = fd_calc_guardband(ctx->viewport.translate[0],967ctx->viewport.scale[0], false);968unsigned guardband_y = fd_calc_guardband(ctx->viewport.translate[1],969ctx->viewport.scale[1], false);970971OUT_REG(ring, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ(.horz = guardband_x,972.vert = guardband_y));973}974975/* The clamp ranges are only used when the rasterizer wants depth976* clamping.977*/978if ((dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER)) &&979fd_depth_clamp_enabled(ctx)) {980float zmin, zmax;981util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,982&zmin, &zmax);983984OUT_REG(ring, A6XX_GRAS_CL_Z_CLAMP_MIN(0, zmin),985A6XX_GRAS_CL_Z_CLAMP_MAX(0, zmax));986987OUT_REG(ring, A6XX_RB_Z_CLAMP_MIN(zmin), A6XX_RB_Z_CLAMP_MAX(zmax));988}989}990991void992fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)993{994struct fd_context *ctx = emit->ctx;995struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;996const struct fd6_program_state *prog = fd6_emit_get_prog(emit);997const struct ir3_shader_variant *vs = emit->vs;998const struct ir3_shader_variant *hs = emit->hs;999const struct ir3_shader_variant *ds = emit->ds;1000const struct ir3_shader_variant *gs = emit->gs;1001const struct ir3_shader_variant *fs = emit->fs;1002bool needs_border = false;10031004emit_marker6(ring, 5);10051006/* NOTE: we track fb_read differently than _BLEND_ENABLED since we1007* might decide to do sysmem in some cases when blend is enabled:1008*/1009if (fs->fb_read)1010ctx->batch->gmem_reason |= FD_GMEM_FB_READ;10111012u_foreach_bit (b, emit->dirty_groups) {1013enum fd6_state_id group = b;1014struct fd_ringbuffer *state = NULL;1015uint32_t enable_mask = ENABLE_ALL;10161017switch (group) {1018case FD6_GROUP_VTXSTATE:1019state = fd6_vertex_stateobj(ctx->vtx.vtx)->stateobj;1020fd_ringbuffer_ref(state);1021break;1022case FD6_GROUP_VBO:1023state = build_vbo_state(emit);1024break;1025case FD6_GROUP_ZSA:1026state = fd6_zsa_state(1027ctx,1028util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])),1029fd_depth_clamp_enabled(ctx));1030fd_ringbuffer_ref(state);1031break;1032case FD6_GROUP_LRZ:1033state = build_lrz(emit, false);1034if (!state)1035continue;1036enable_mask = ENABLE_DRAW;1037break;1038case FD6_GROUP_LRZ_BINNING:1039state = build_lrz(emit, true);1040if (!state)1041continue;1042enable_mask = CP_SET_DRAW_STATE__0_BINNING;1043break;1044case FD6_GROUP_SCISSOR:1045state = build_scissor(emit);1046break;1047case FD6_GROUP_PROG:1048fd6_emit_add_group(emit, prog->config_stateobj, FD6_GROUP_PROG_CONFIG,1049ENABLE_ALL);1050fd6_emit_add_group(emit, prog->stateobj, FD6_GROUP_PROG, ENABLE_DRAW);1051fd6_emit_add_group(emit, prog->binning_stateobj,1052FD6_GROUP_PROG_BINNING,1053CP_SET_DRAW_STATE__0_BINNING);10541055/* emit remaining streaming program state, ie. what depends on1056* other emit state, so cannot be pre-baked.1057*/1058fd6_emit_take_group(emit, fd6_program_interp_state(emit),1059FD6_GROUP_PROG_INTERP, ENABLE_DRAW);1060continue;1061case FD6_GROUP_RASTERIZER:1062state = fd6_rasterizer_state(ctx, emit->primitive_restart);1063fd_ringbuffer_ref(state);1064break;1065case FD6_GROUP_PROG_FB_RAST:1066state = build_prog_fb_rast(emit);1067break;1068case FD6_GROUP_BLEND:1069state = fd6_blend_variant(ctx->blend, pfb->samples, ctx->sample_mask)1070->stateobj;1071fd_ringbuffer_ref(state);1072break;1073case FD6_GROUP_BLEND_COLOR:1074state = build_blend_color(emit);1075break;1076case FD6_GROUP_IBO:1077state = build_ibo(emit);1078fd6_emit_ibo_consts(emit, fs, PIPE_SHADER_FRAGMENT, ring);1079break;1080case FD6_GROUP_CONST:1081state = fd6_build_user_consts(emit);1082break;1083case FD6_GROUP_VS_DRIVER_PARAMS:1084state = fd6_build_vs_driver_params(emit);1085break;1086case FD6_GROUP_PRIMITIVE_PARAMS:1087state = fd6_build_tess_consts(emit);1088break;1089case FD6_GROUP_VS_TEX:1090needs_border |=1091fd6_emit_combined_textures(ring, emit, PIPE_SHADER_VERTEX, vs);1092continue;1093case FD6_GROUP_HS_TEX:1094if (hs) {1095needs_border |= fd6_emit_combined_textures(1096ring, emit, PIPE_SHADER_TESS_CTRL, hs);1097}1098continue;1099case FD6_GROUP_DS_TEX:1100if (ds) {1101needs_border |= fd6_emit_combined_textures(1102ring, emit, PIPE_SHADER_TESS_EVAL, ds);1103}1104continue;1105case FD6_GROUP_GS_TEX:1106if (gs) {1107needs_border |=1108fd6_emit_combined_textures(ring, emit, PIPE_SHADER_GEOMETRY, gs);1109}1110continue;1111case FD6_GROUP_FS_TEX:1112needs_border |=1113fd6_emit_combined_textures(ring, emit, PIPE_SHADER_FRAGMENT, fs);1114continue;1115case FD6_GROUP_SO:1116fd6_emit_streamout(ring, emit);1117continue;1118case FD6_GROUP_NON_GROUP:1119fd6_emit_non_ring(ring, emit);1120continue;1121default:1122unreachable("bad state group");1123}11241125fd6_emit_take_group(emit, state, group, enable_mask);1126}11271128if (needs_border)1129emit_border_color(ctx, ring);11301131if (emit->num_groups > 0) {1132OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * emit->num_groups);1133for (unsigned i = 0; i < emit->num_groups; i++) {1134struct fd6_state_group *g = &emit->groups[i];1135unsigned n = g->stateobj ? fd_ringbuffer_size(g->stateobj) / 4 : 0;11361137debug_assert((g->enable_mask & ~ENABLE_ALL) == 0);11381139if (n == 0) {1140OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |1141CP_SET_DRAW_STATE__0_DISABLE | g->enable_mask |1142CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));1143OUT_RING(ring, 0x00000000);1144OUT_RING(ring, 0x00000000);1145} else {1146OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(n) | g->enable_mask |1147CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));1148OUT_RB(ring, g->stateobj);1149}11501151if (g->stateobj)1152fd_ringbuffer_del(g->stateobj);1153}1154emit->num_groups = 0;1155}1156}11571158void1159fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,1160struct ir3_shader_variant *cp)1161{1162enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE];11631164if (dirty & (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG |1165FD_DIRTY_SHADER_IMAGE | FD_DIRTY_SHADER_SSBO)) {1166struct fd_texture_stateobj *tex = &ctx->tex[PIPE_SHADER_COMPUTE];1167unsigned bcolor_offset =1168fd6_border_color_offset(ctx, PIPE_SHADER_COMPUTE, tex);11691170bool needs_border = fd6_emit_textures(ctx, ring, PIPE_SHADER_COMPUTE, tex,1171bcolor_offset, cp);11721173if (needs_border)1174emit_border_color(ctx, ring);11751176OUT_PKT4(ring, REG_A6XX_SP_VS_TEX_COUNT, 1);1177OUT_RING(ring, 0);11781179OUT_PKT4(ring, REG_A6XX_SP_HS_TEX_COUNT, 1);1180OUT_RING(ring, 0);11811182OUT_PKT4(ring, REG_A6XX_SP_DS_TEX_COUNT, 1);1183OUT_RING(ring, 0);11841185OUT_PKT4(ring, REG_A6XX_SP_GS_TEX_COUNT, 1);1186OUT_RING(ring, 0);11871188OUT_PKT4(ring, REG_A6XX_SP_FS_TEX_COUNT, 1);1189OUT_RING(ring, 0);1190}11911192if (dirty & (FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)) {1193struct fd_ringbuffer *state =1194fd6_build_ibo_state(ctx, cp, PIPE_SHADER_COMPUTE);11951196OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3);1197OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |1198CP_LOAD_STATE6_0_STATE_TYPE(ST6_IBO) |1199CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |1200CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) |1201CP_LOAD_STATE6_0_NUM_UNIT(ir3_shader_nibo(cp)));1202OUT_RB(ring, state);12031204OUT_PKT4(ring, REG_A6XX_SP_CS_IBO, 2);1205OUT_RB(ring, state);12061207OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_COUNT, 1);1208OUT_RING(ring, ir3_shader_nibo(cp));12091210fd_ringbuffer_del(state);1211}1212}12131214/* emit setup at begin of new cmdstream buffer (don't rely on previous1215* state, there could have been a context switch between ioctls):1216*/1217void1218fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)1219{1220// struct fd_context *ctx = batch->ctx;12211222if (!batch->nondraw) {1223trace_start_state_restore(&batch->trace);1224}12251226fd6_cache_inv(batch, ring);12271228OUT_REG(ring,1229A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true,1230.ds_state = true, .gs_state = true,1231.fs_state = true, .cs_state = true,1232.gfx_ibo = true, .cs_ibo = true,1233.gfx_shared_const = true,1234.cs_shared_const = true,1235.gfx_bindless = 0x1f, .cs_bindless = 0x1f));12361237OUT_WFI5(ring);12381239WRITE(REG_A6XX_RB_UNKNOWN_8E04, 0x0);1240WRITE(REG_A6XX_SP_FLOAT_CNTL, A6XX_SP_FLOAT_CNTL_F16_NO_INF);1241WRITE(REG_A6XX_SP_UNKNOWN_AE00, 0);1242WRITE(REG_A6XX_SP_PERFCTR_ENABLE, 0x3f);1243WRITE(REG_A6XX_TPL1_UNKNOWN_B605, 0x44);1244WRITE(REG_A6XX_TPL1_UNKNOWN_B600, 0x100000);1245WRITE(REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80);1246WRITE(REG_A6XX_HLSQ_UNKNOWN_BE01, 0);12471248WRITE(REG_A6XX_VPC_UNKNOWN_9600, 0);1249WRITE(REG_A6XX_GRAS_UNKNOWN_8600, 0x880);1250WRITE(REG_A6XX_HLSQ_UNKNOWN_BE04, 0x80000);1251WRITE(REG_A6XX_SP_UNKNOWN_AE03, 0x1430);1252WRITE(REG_A6XX_SP_IBO_COUNT, 0);1253WRITE(REG_A6XX_SP_UNKNOWN_B182, 0);1254WRITE(REG_A6XX_HLSQ_SHARED_CONSTS, 0);1255WRITE(REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000);1256WRITE(REG_A6XX_UCHE_CLIENT_PF, 4);1257WRITE(REG_A6XX_RB_UNKNOWN_8E01, 0x1);1258WRITE(REG_A6XX_SP_MODE_CONTROL,1259A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4);1260WRITE(REG_A6XX_VFD_ADD_OFFSET, A6XX_VFD_ADD_OFFSET_VERTEX);1261WRITE(REG_A6XX_RB_UNKNOWN_8811, 0x00000010);1262WRITE(REG_A6XX_PC_MODE_CNTL, 0x1f);12631264WRITE(REG_A6XX_GRAS_UNKNOWN_8101, 0);1265WRITE(REG_A6XX_GRAS_SAMPLE_CNTL, 0);1266WRITE(REG_A6XX_GRAS_UNKNOWN_8110, 0x2);12671268WRITE(REG_A6XX_RB_UNKNOWN_8818, 0);1269WRITE(REG_A6XX_RB_UNKNOWN_8819, 0);1270WRITE(REG_A6XX_RB_UNKNOWN_881A, 0);1271WRITE(REG_A6XX_RB_UNKNOWN_881B, 0);1272WRITE(REG_A6XX_RB_UNKNOWN_881C, 0);1273WRITE(REG_A6XX_RB_UNKNOWN_881D, 0);1274WRITE(REG_A6XX_RB_UNKNOWN_881E, 0);1275WRITE(REG_A6XX_RB_UNKNOWN_88F0, 0);12761277WRITE(REG_A6XX_VPC_POINT_COORD_INVERT, A6XX_VPC_POINT_COORD_INVERT(0).value);1278WRITE(REG_A6XX_VPC_UNKNOWN_9300, 0);12791280WRITE(REG_A6XX_VPC_SO_DISABLE, A6XX_VPC_SO_DISABLE(true).value);12811282WRITE(REG_A6XX_PC_RASTER_CNTL, 0);12831284WRITE(REG_A6XX_PC_MULTIVIEW_CNTL, 0);12851286WRITE(REG_A6XX_SP_UNKNOWN_B183, 0);12871288WRITE(REG_A6XX_GRAS_UNKNOWN_8099, 0);1289WRITE(REG_A6XX_GRAS_VS_LAYER_CNTL, 0);1290WRITE(REG_A6XX_GRAS_UNKNOWN_80A0, 2);1291WRITE(REG_A6XX_GRAS_UNKNOWN_80AF, 0);1292WRITE(REG_A6XX_VPC_UNKNOWN_9210, 0);1293WRITE(REG_A6XX_VPC_UNKNOWN_9211, 0);1294WRITE(REG_A6XX_VPC_UNKNOWN_9602, 0);1295WRITE(REG_A6XX_PC_UNKNOWN_9E72, 0);1296WRITE(REG_A6XX_SP_TP_SAMPLE_CONFIG, 0);1297/* NOTE blob seems to (mostly?) use 0xb2 for SP_TP_UNKNOWN_B3091298* but this seems to kill texture gather offsets.1299*/1300WRITE(REG_A6XX_SP_TP_UNKNOWN_B309, 0xa2);1301WRITE(REG_A6XX_RB_SAMPLE_CONFIG, 0);1302WRITE(REG_A6XX_GRAS_SAMPLE_CONFIG, 0);1303WRITE(REG_A6XX_RB_Z_BOUNDS_MIN, 0);1304WRITE(REG_A6XX_RB_Z_BOUNDS_MAX, 0);1305WRITE(REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc);13061307emit_marker6(ring, 7);13081309OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);1310OUT_RING(ring, 0x00000000); /* VFD_MODE_CNTL */13111312WRITE(REG_A6XX_VFD_MULTIVIEW_CNTL, 0);13131314OUT_PKT4(ring, REG_A6XX_PC_MODE_CNTL, 1);1315OUT_RING(ring, 0x0000001f); /* PC_MODE_CNTL */13161317/* Clear any potential pending state groups to be safe: */1318OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);1319OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |1320CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |1321CP_SET_DRAW_STATE__0_GROUP_ID(0));1322OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));1323OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));13241325OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_CNTL, 1);1326OUT_RING(ring, 0x00000000); /* VPC_SO_STREAM_CNTL */13271328OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);1329OUT_RING(ring, 0x00000000);13301331OUT_PKT4(ring, REG_A6XX_RB_LRZ_CNTL, 1);1332OUT_RING(ring, 0x00000000);13331334if (!batch->nondraw) {1335trace_end_state_restore(&batch->trace);1336}1337}13381339static void1340fd6_mem_to_mem(struct fd_ringbuffer *ring, struct pipe_resource *dst,1341unsigned dst_off, struct pipe_resource *src, unsigned src_off,1342unsigned sizedwords)1343{1344struct fd_bo *src_bo = fd_resource(src)->bo;1345struct fd_bo *dst_bo = fd_resource(dst)->bo;1346unsigned i;13471348for (i = 0; i < sizedwords; i++) {1349OUT_PKT7(ring, CP_MEM_TO_MEM, 5);1350OUT_RING(ring, 0x00000000);1351OUT_RELOC(ring, dst_bo, dst_off, 0, 0);1352OUT_RELOC(ring, src_bo, src_off, 0, 0);13531354dst_off += 4;1355src_off += 4;1356}1357}13581359/* this is *almost* the same as fd6_cache_flush().. which I guess1360* could be re-worked to be something a bit more generic w/ param1361* indicating what needs to be flushed.. although that would mean1362* figuring out which events trigger what state to flush..1363*/1364static void1365fd6_framebuffer_barrier(struct fd_context *ctx) assert_dt1366{1367struct fd6_context *fd6_ctx = fd6_context(ctx);1368struct fd_batch *batch = fd_context_batch_locked(ctx);1369struct fd_ringbuffer *ring = batch->draw;1370unsigned seqno;13711372fd_batch_needs_flush(batch);13731374seqno = fd6_event_write(batch, ring, RB_DONE_TS, true);13751376OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);1377OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |1378CP_WAIT_REG_MEM_0_POLL_MEMORY);1379OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));1380OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno));1381OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0));1382OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));13831384fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);1385fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);13861387seqno = fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);13881389fd6_event_write(batch, ring, 0x31, false);13901391OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4);1392OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0));1393OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));1394OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno));13951396fd_batch_unlock_submit(batch);1397fd_batch_reference(&batch, NULL);1398}13991400void1401fd6_emit_init_screen(struct pipe_screen *pscreen)1402{1403struct fd_screen *screen = fd_screen(pscreen);1404screen->emit_ib = fd6_emit_ib;1405screen->mem_to_mem = fd6_mem_to_mem;1406}14071408void1409fd6_emit_init(struct pipe_context *pctx) disable_thread_safety_analysis1410{1411struct fd_context *ctx = fd_context(pctx);1412ctx->framebuffer_barrier = fd6_framebuffer_barrier;1413}141414151416