Path: blob/21.2-virgl/src/gallium/drivers/etnaviv/etnaviv_emit.c
4570 views
/*1* Copyright (c) 2014-2015 Etnaviv Project2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sub license,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the11* next paragraph) shall be included in all copies or substantial portions12* of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22* Authors:23* Wladimir J. van der Laan <[email protected]>24*/2526#include "etnaviv_emit.h"2728#include "etnaviv_blend.h"29#include "etnaviv_compiler.h"30#include "etnaviv_context.h"31#include "etnaviv_rasterizer.h"32#include "etnaviv_resource.h"33#include "etnaviv_rs.h"34#include "etnaviv_screen.h"35#include "etnaviv_shader.h"36#include "etnaviv_texture.h"37#include "etnaviv_translate.h"38#include "etnaviv_uniforms.h"39#include "etnaviv_util.h"40#include "etnaviv_zsa.h"41#include "hw/common.xml.h"42#include "hw/state.xml.h"43#include "hw/state_blt.xml.h"44#include "util/u_math.h"4546/* Queue a STALL command (queues 2 words) */47static inline void48CMD_STALL(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)49{50etna_cmd_stream_emit(stream, VIV_FE_STALL_HEADER_OP_STALL);51etna_cmd_stream_emit(stream, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));52}5354void55etna_stall(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)56{57bool blt = (from == SYNC_RECIPIENT_BLT) || (to == SYNC_RECIPIENT_BLT);58etna_cmd_stream_reserve(stream, blt ? 8 : 4);5960if (blt) {61etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);62etna_cmd_stream_emit(stream, 1);63}6465/* TODO: set bit 28/29 of token after BLT COPY_BUFFER */66etna_emit_load_state(stream, VIVS_GL_SEMAPHORE_TOKEN >> 2, 1, 0);67etna_cmd_stream_emit(stream, VIVS_GL_SEMAPHORE_TOKEN_FROM(from) | VIVS_GL_SEMAPHORE_TOKEN_TO(to));6869if (from == SYNC_RECIPIENT_FE) {70/* if the frontend is to be stalled, queue a STALL frontend command */71CMD_STALL(stream, from, to);72} else {73/* otherwise, load the STALL token state */74etna_emit_load_state(stream, VIVS_GL_STALL_TOKEN >> 2, 1, 0);75etna_cmd_stream_emit(stream, VIVS_GL_STALL_TOKEN_FROM(from) | VIVS_GL_STALL_TOKEN_TO(to));76}7778if (blt) {79etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);80etna_cmd_stream_emit(stream, 0);81}82}8384#define EMIT_STATE(state_name, src_value) \85etna_coalsence_emit(stream, &coalesce, VIVS_##state_name, src_value)8687#define EMIT_STATE_FIXP(state_name, src_value) \88etna_coalsence_emit_fixp(stream, &coalesce, VIVS_##state_name, src_value)8990#define EMIT_STATE_RELOC(state_name, src_value) \91etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)9293#define ETNA_3D_CONTEXT_SIZE (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */9495static unsigned96required_stream_size(struct etna_context *ctx)97{98unsigned size = ETNA_3D_CONTEXT_SIZE;99100/* stall + flush */101size += 2 + 4;102103/* vertex elements */104size += ctx->vertex_elements->num_elements + 1;105106/* uniforms - worst case (2 words per uniform load) */107size += ctx->shader.vs->uniforms.count * 2;108size += ctx->shader.fs->uniforms.count * 2;109110/* shader */111size += ctx->shader_state.vs_inst_mem_size + 1;112size += ctx->shader_state.ps_inst_mem_size + 1;113114/* DRAW_INDEXED_PRIMITIVES command */115size += 6;116117/* reserve for alignment etc. */118size += 64;119120return size;121}122123/* Emit state that only exists on HALTI5+ */124static void125emit_halti5_only_state(struct etna_context *ctx, int vs_output_count)126{127struct etna_cmd_stream *stream = ctx->stream;128uint32_t dirty = ctx->dirty;129struct etna_coalesce coalesce;130131etna_coalesce_start(stream, &coalesce);132if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {133/* Magic states (load balancing, inter-unit sync, buffers) */134/*007C4*/ EMIT_STATE(FE_HALTI5_ID_CONFIG, ctx->shader_state.FE_HALTI5_ID_CONFIG);135/*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT, vs_output_count | ((vs_output_count * 0x10) << 8));136/*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0, 0x0001000e | ((0x110/vs_output_count) << 20));137for (int x = 0; x < 4; ++x) {138/*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);139}140}141if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {142for (int x = 0; x < 4; ++x) {143/*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x), ctx->shader_state.VS_INPUT[x]);144}145}146if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {147/*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);148/*00A94*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(1), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);149/*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT, vs_output_count);150/*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);151/*01084*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(1), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);152/*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS, ctx->shader_state.GL_HALTI5_SH_SPECIALS);153}154etna_coalesce_end(stream, &coalesce);155}156157/* Emit state that no longer exists on HALTI5 */158static void159emit_pre_halti5_state(struct etna_context *ctx)160{161struct etna_cmd_stream *stream = ctx->stream;162uint32_t dirty = ctx->dirty;163struct etna_coalesce coalesce;164165etna_coalesce_start(stream, &coalesce);166if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {167/*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);168}169if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {170for (int x = 0; x < 4; ++x) {171/*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);172}173}174if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {175for (int x = 0; x < 4; ++x) {176/*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);177}178}179if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {180/*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);181}182if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {183for (int x = 0; x < 10; ++x) {184/*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);185}186}187if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {188/*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);189for (int x = 0; x < 4; ++x) {190/*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);191}192for (int x = 0; x < 16; ++x) {193/*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);194}195}196if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {197/*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);198}199if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {200/*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);201}202if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {203/*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);204for (int x = 0; x < 2; ++x) {205/*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);206}207/*03834*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS2, ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);208}209etna_coalesce_end(stream, &coalesce);210}211212/* Weave state before draw operation. This function merges all the compiled213* state blocks under the context into one device register state. Parts of214* this state that are changed since last call (dirty) will be uploaded as215* state changes in the command buffer. */216void217etna_emit_state(struct etna_context *ctx)218{219struct etna_cmd_stream *stream = ctx->stream;220struct etna_screen *screen = ctx->screen;221unsigned ccw = ctx->rasterizer->front_ccw;222223224/* Pre-reserve the command buffer space which we are likely to need.225* This must cover all the state emitted below, and the following226* draw command. */227etna_cmd_stream_reserve(stream, required_stream_size(ctx));228229uint32_t dirty = ctx->dirty;230231/* Pre-processing: see what caches we need to flush before making state changes. */232uint32_t to_flush = 0;233if (unlikely(dirty & (ETNA_DIRTY_BLEND)))234to_flush |= VIVS_GL_FLUSH_CACHE_COLOR;235if (unlikely(dirty & ETNA_DIRTY_ZSA))236to_flush |= VIVS_GL_FLUSH_CACHE_DEPTH;237if (unlikely(dirty & (ETNA_DIRTY_TEXTURE_CACHES)))238to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE;239if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) /* Framebuffer config changed? */240to_flush |= VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;241if (DBG_ENABLED(ETNA_DBG_CFLUSH_ALL))242to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;243244if (to_flush) {245etna_set_state(stream, VIVS_GL_FLUSH_CACHE, to_flush);246etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);247}248249/* Flush TS cache before changing TS configuration. */250if (unlikely(dirty & ETNA_DIRTY_TS)) {251etna_set_state(stream, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH);252}253254/* Update vertex elements. This is different from any of the other states, in that255* a) the number of vertex elements written matters: so write only active ones256* b) the vertex element states must all be written: do not skip entries that stay the same */257if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {258if (screen->specs.halti >= 5) {259/*17800*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),260ctx->vertex_elements->num_elements,261ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG0);262/*17A00*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_SCALE(0),263ctx->vertex_elements->num_elements,264ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);265/*17A80*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0),266ctx->vertex_elements->num_elements,267ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG1);268} else {269/* Special case: vertex elements must always be sent in full if changed */270/*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),271ctx->vertex_elements->num_elements,272ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);273if (screen->specs.halti >= 2) {274/*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0),275ctx->vertex_elements->num_elements,276ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);277}278}279}280unsigned vs_output_count = etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex281? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE282: ctx->shader_state.VS_OUTPUT_COUNT;283284/* The following code is originally generated by gen_merge_state.py, to285* emit state in increasing order of address (this makes it possible to merge286* consecutive register updates into one SET_STATE command)287*288* There have been some manual changes, where the weaving operation is not289* simply bitwise or:290* - scissor fixp291* - num vertex elements292* - scissor handling293* - num samplers294* - texture lod295* - ETNA_DIRTY_TS296* - removed ETNA_DIRTY_BASE_SETUP statements -- these are guaranteed to not297* change anyway298* - PS / framebuffer interaction for MSAA299* - move update of GL_MULTI_SAMPLE_CONFIG first300* - add unlikely()/likely()301*/302struct etna_coalesce coalesce;303304etna_coalesce_start(stream, &coalesce);305306/* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here307* directly308* or indirectly */309/* multi sample config is set first, and outside of the normal sorting310* order, as changing the multisample state clobbers PS.INPUT_COUNT (and311* possibly PS.TEMP_REGISTER_CONTROL).312*/313if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SAMPLE_MASK))) {314uint32_t val = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(ctx->sample_mask);315val |= ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG;316317/*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, val);318}319if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {320/*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR);321/*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL);322}323if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {324/*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX);325}326if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {327if (screen->specs.halti >= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */328for (int x = 0; x < ctx->vertex_buffer.count; ++x) {329/*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);330}331for (int x = 0; x < ctx->vertex_buffer.count; ++x) {332if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {333/*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);334}335}336} else if(screen->specs.stream_count > 1) { /* hw w/ multiple vertex streams */337for (int x = 0; x < ctx->vertex_buffer.count; ++x) {338/*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);339}340for (int x = 0; x < ctx->vertex_buffer.count; ++x) {341if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {342/*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);343}344}345} else { /* hw w/ single vertex stream */346/*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);347/*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);348}349}350/* gallium has instance divisor as part of elements state */351if ((dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) && screen->specs.halti >= 2) {352for (int x = 0; x < ctx->vertex_elements->num_buffers; ++x) {353/*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_VERTEX_DIVISOR(x), ctx->vertex_elements->NFE_VERTEX_STREAMS_VERTEX_DIVISOR[x]);354}355}356357if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) {358359/*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, vs_output_count);360}361if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {362/*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT);363/*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL);364}365if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {366/*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);367}368if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {369/*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);370/*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, ctx->viewport.PA_VIEWPORT_SCALE_Y);371/*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, ctx->viewport.PA_VIEWPORT_SCALE_Z);372/*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, ctx->viewport.PA_VIEWPORT_OFFSET_X);373/*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, ctx->viewport.PA_VIEWPORT_OFFSET_Y);374/*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, ctx->viewport.PA_VIEWPORT_OFFSET_Z);375}376if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {377struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);378379/*00A18*/ EMIT_STATE(PA_LINE_WIDTH, rasterizer->PA_LINE_WIDTH);380/*00A1C*/ EMIT_STATE(PA_POINT_SIZE, rasterizer->PA_POINT_SIZE);381/*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, rasterizer->PA_SYSTEM_MODE);382}383if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {384/*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, ctx->shader_state.PA_ATTRIBUTE_ELEMENT_COUNT);385}386if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_SHADER))) {387uint32_t val = etna_rasterizer_state(ctx->rasterizer)->PA_CONFIG;388/*00A34*/ EMIT_STATE(PA_CONFIG, val & ctx->shader_state.PA_CONFIG);389}390if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {391struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);392/*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH);393/*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH);394}395if (unlikely(dirty & (ETNA_DIRTY_SCISSOR_CLIP))) {396/*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, ctx->clipping.minx << 16);397/*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, ctx->clipping.miny << 16);398/*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, (ctx->clipping.maxx << 16) + ETNA_SE_SCISSOR_MARGIN_RIGHT);399/*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, (ctx->clipping.maxy << 16) + ETNA_SE_SCISSOR_MARGIN_BOTTOM);400}401if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {402struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);403404/*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, rasterizer->SE_DEPTH_SCALE);405/*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, rasterizer->SE_DEPTH_BIAS);406/*00C18*/ EMIT_STATE(SE_CONFIG, rasterizer->SE_CONFIG);407}408if (unlikely(dirty & (ETNA_DIRTY_SCISSOR_CLIP))) {409/*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT, (ctx->clipping.maxx << 16) + ETNA_SE_CLIP_MARGIN_RIGHT);410/*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM, (ctx->clipping.maxy << 16) + ETNA_SE_CLIP_MARGIN_BOTTOM);411}412if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {413/*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);414}415if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {416/*00E08*/ EMIT_STATE(RA_EARLY_DEPTH, etna_zsa_state(ctx->zsa)->RA_DEPTH_CONFIG);417}418if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {419/*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG);420/*01008*/ EMIT_STATE(PS_INPUT_COUNT,421ctx->framebuffer.msaa_mode422? ctx->shader_state.PS_INPUT_COUNT_MSAA423: ctx->shader_state.PS_INPUT_COUNT);424/*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL,425ctx->framebuffer.msaa_mode426? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA427: ctx->shader_state.PS_TEMP_REGISTER_CONTROL);428/*01010*/ EMIT_STATE(PS_CONTROL, ctx->framebuffer.PS_CONTROL);429/*01030*/ EMIT_STATE(PS_CONTROL_EXT, ctx->framebuffer.PS_CONTROL_EXT);430}431if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SHADER))) {432/*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, (etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG |433ctx->framebuffer.PE_DEPTH_CONFIG));434}435if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {436/*01404*/ EMIT_STATE(PE_DEPTH_NEAR, ctx->viewport.PE_DEPTH_NEAR);437/*01408*/ EMIT_STATE(PE_DEPTH_FAR, ctx->viewport.PE_DEPTH_FAR);438}439if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {440/*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, ctx->framebuffer.PE_DEPTH_NORMALIZE);441442if (screen->specs.pixel_pipes == 1) {443/*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR, &ctx->framebuffer.PE_DEPTH_ADDR);444}445446/*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, ctx->framebuffer.PE_DEPTH_STRIDE);447}448449if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_RASTERIZER))) {450uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_OP[ccw];451/*01418*/ EMIT_STATE(PE_STENCIL_OP, val);452}453if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_STENCIL_REF | ETNA_DIRTY_RASTERIZER))) {454uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG[ccw];455/*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, val | ctx->stencil_ref.PE_STENCIL_CONFIG[ccw]);456}457if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {458uint32_t val = etna_zsa_state(ctx->zsa)->PE_ALPHA_OP;459/*01420*/ EMIT_STATE(PE_ALPHA_OP, val);460}461if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR))) {462/*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, ctx->blend_color.PE_ALPHA_BLEND_COLOR);463}464if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {465uint32_t val = etna_blend_state(ctx->blend)->PE_ALPHA_CONFIG;466/*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, val);467}468if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {469uint32_t val;470/* Use the components and overwrite bits in framebuffer.PE_COLOR_FORMAT471* as a mask to enable the bits from blend PE_COLOR_FORMAT */472val = ~(VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK |473VIVS_PE_COLOR_FORMAT_OVERWRITE);474val |= etna_blend_state(ctx->blend)->PE_COLOR_FORMAT;475val &= ctx->framebuffer.PE_COLOR_FORMAT;476/*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, val);477}478if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {479if (screen->specs.pixel_pipes == 1) {480/*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR, &ctx->framebuffer.PE_COLOR_ADDR);481/*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);482/*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);483} else if (screen->specs.pixel_pipes == 2) {484/*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);485/*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);486/*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[0]);487/*01464*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(1), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[1]);488/*01480*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(0), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[0]);489/*01484*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(1), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[1]);490} else {491abort();492}493}494if (unlikely(dirty & (ETNA_DIRTY_STENCIL_REF | ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_ZSA))) {495uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG_EXT;496/*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, val | ctx->stencil_ref.PE_STENCIL_CONFIG_EXT[ccw]);497}498if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {499struct etna_blend_state *blend = etna_blend_state(ctx->blend);500/*014A4*/ EMIT_STATE(PE_LOGIC_OP, blend->PE_LOGIC_OP | ctx->framebuffer.PE_LOGIC_OP);501}502if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {503struct etna_blend_state *blend = etna_blend_state(ctx->blend);504for (int x = 0; x < 2; ++x) {505/*014A8*/ EMIT_STATE(PE_DITHER(x), blend->PE_DITHER[x]);506}507}508if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR)) &&509VIV_FEATURE(screen, chipMinorFeatures1, HALF_FLOAT)) {510/*014B0*/ EMIT_STATE(PE_ALPHA_COLOR_EXT0, ctx->blend_color.PE_ALPHA_COLOR_EXT0);511/*014B4*/ EMIT_STATE(PE_ALPHA_COLOR_EXT1, ctx->blend_color.PE_ALPHA_COLOR_EXT1);512}513if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_RASTERIZER))) {514/*014B8*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT2, etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG_EXT2[ccw]);515}516if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER)) && screen->specs.halti >= 3)517/*014BC*/ EMIT_STATE(PE_MEM_CONFIG, ctx->framebuffer.PE_MEM_CONFIG);518if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_TS))) {519/*01654*/ EMIT_STATE(TS_MEM_CONFIG, ctx->framebuffer.TS_MEM_CONFIG);520/*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE, &ctx->framebuffer.TS_COLOR_STATUS_BASE);521/*0165C*/ EMIT_STATE_RELOC(TS_COLOR_SURFACE_BASE, &ctx->framebuffer.TS_COLOR_SURFACE_BASE);522/*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, ctx->framebuffer.TS_COLOR_CLEAR_VALUE);523/*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE, &ctx->framebuffer.TS_DEPTH_STATUS_BASE);524/*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, &ctx->framebuffer.TS_DEPTH_SURFACE_BASE);525/*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, ctx->framebuffer.TS_DEPTH_CLEAR_VALUE);526/*016BC*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE_EXT, ctx->framebuffer.TS_COLOR_CLEAR_VALUE_EXT);527}528if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {529/*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS);530}531etna_coalesce_end(stream, &coalesce);532/* end only EMIT_STATE */533534/* Emit strongly architecture-specific state */535if (screen->specs.halti >= 5)536emit_halti5_only_state(ctx, vs_output_count);537else538emit_pre_halti5_state(ctx);539540/* Beginning from Halti0 some of the new shader and sampler states are not541* self-synchronizing anymore. Thus we need to stall the FE on PE completion542* before loading the new states to avoid corrupting the state of the543* in-flight draw.544*/545if (screen->specs.halti >= 0 &&546(ctx->dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF |547ETNA_DIRTY_SAMPLERS | ETNA_DIRTY_SAMPLER_VIEWS)))548etna_stall(ctx->stream, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);549550ctx->emit_texture_state(ctx);551552/* We need to update the uniform cache only if one of the following bits are553* set in ctx->dirty:554* - ETNA_DIRTY_SHADER555* - ETNA_DIRTY_CONSTBUF556* - uniforms_dirty_bits557*558* In case of ETNA_DIRTY_SHADER we need load all uniforms from the cache. In559* all560* other cases we can load on the changed uniforms.561*/562static const uint32_t uniform_dirty_bits =563ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF;564565/**** Large dynamically-sized state ****/566bool do_uniform_flush = screen->specs.halti < 5;567if (dirty & (ETNA_DIRTY_SHADER)) {568/* Special case: a new shader was loaded; simply re-load all uniforms and569* shader code at once */570/* This sequence is special, do not change ordering unless necessary. According to comment571snippets in the Vivante kernel driver a process called "steering" goes on while programming572shader state. This (as I understand it) means certain unified states are "steered"573toward a specific shader unit (VS/PS/...) based on either explicit flags in register57400860, or what other state is written before "auto-steering". So this means some575state can legitimately be programmed multiple times.576*/577578if (screen->specs.halti >= 5) { /* ICACHE (HALTI5) */579assert(ctx->shader_state.VS_INST_ADDR.bo && ctx->shader_state.PS_INST_ADDR.bo);580/* Set icache (VS) */581etna_set_state(stream, VIVS_VS_NEWRANGE_LOW, 0);582etna_set_state(stream, VIVS_VS_NEWRANGE_HIGH, ctx->shader_state.vs_inst_mem_size / 4);583assert(ctx->shader_state.VS_INST_ADDR.bo);584etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);585etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);586etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);587etna_set_state(stream, VIVS_VS_ICACHE_COUNT, ctx->shader_state.vs_inst_mem_size / 4 - 1);588589/* Set icache (PS) */590etna_set_state(stream, VIVS_PS_NEWRANGE_LOW, 0);591etna_set_state(stream, VIVS_PS_NEWRANGE_HIGH, ctx->shader_state.ps_inst_mem_size / 4);592assert(ctx->shader_state.PS_INST_ADDR.bo);593etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);594etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);595etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);596etna_set_state(stream, VIVS_PS_ICACHE_COUNT, ctx->shader_state.ps_inst_mem_size / 4 - 1);597598} else if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {599/* ICACHE (pre-HALTI5) */600assert(screen->specs.has_icache && screen->specs.has_shader_range_registers);601/* Set icache (VS) */602etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);603etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,604VIVS_VS_ICACHE_CONTROL_ENABLE |605VIVS_VS_ICACHE_CONTROL_FLUSH_VS);606assert(ctx->shader_state.VS_INST_ADDR.bo);607etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);608609/* Set icache (PS) */610etna_set_state(stream, VIVS_PS_RANGE, (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16);611etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,612VIVS_VS_ICACHE_CONTROL_ENABLE |613VIVS_VS_ICACHE_CONTROL_FLUSH_PS);614assert(ctx->shader_state.PS_INST_ADDR.bo);615etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);616} else {617/* Upload shader directly, first flushing and disabling icache if618* supported on this hw */619if (screen->specs.has_icache) {620etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,621VIVS_VS_ICACHE_CONTROL_FLUSH_PS |622VIVS_VS_ICACHE_CONTROL_FLUSH_VS);623}624if (screen->specs.has_shader_range_registers) {625etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);626etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |6270x100);628}629etna_set_state_multi(stream, screen->specs.vs_offset,630ctx->shader_state.vs_inst_mem_size,631ctx->shader_state.VS_INST_MEM);632etna_set_state_multi(stream, screen->specs.ps_offset,633ctx->shader_state.ps_inst_mem_size,634ctx->shader_state.PS_INST_MEM);635}636637if (screen->specs.has_unified_uniforms) {638etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);639etna_set_state(stream, VIVS_PS_UNIFORM_BASE, screen->specs.max_vs_uniforms);640}641642if (do_uniform_flush)643etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);644645etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX].cb);646647if (do_uniform_flush)648etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);649650etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);651652if (screen->specs.halti >= 5) {653/* HALTI5 needs to be prompted to pre-fetch shaders */654etna_set_state(stream, VIVS_VS_ICACHE_PREFETCH, 0x00000000);655etna_set_state(stream, VIVS_PS_ICACHE_PREFETCH, 0x00000000);656etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);657}658} else {659/* ideally this cache would only be flushed if there are VS uniform changes */660if (do_uniform_flush)661etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);662663if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))664etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX].cb);665666/* ideally this cache would only be flushed if there are PS uniform changes */667if (do_uniform_flush)668etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);669670if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))671etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);672}673/**** End of state update ****/674#undef EMIT_STATE675#undef EMIT_STATE_FIXP676#undef EMIT_STATE_RELOC677ctx->dirty = 0;678ctx->dirty_sampler_views = 0;679}680681682