Path: blob/21.2-virgl/src/gallium/drivers/i915/i915_state_emit.c
4570 views
/**************************************************************************1*2* Copyright 2003 VMware, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial portions15* of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS18* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.20* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR21* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,22* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE23* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25**************************************************************************/2627#include "i915_batch.h"28#include "i915_context.h"29#include "i915_debug.h"30#include "i915_fpc.h"31#include "i915_reg.h"32#include "i915_resource.h"3334#include "pipe/p_context.h"35#include "pipe/p_defines.h"36#include "pipe/p_format.h"3738#include "util/format/u_format.h"39#include "util/u_math.h"40#include "util/u_memory.h"4142struct i915_tracked_hw_state {43const char *name;44void (*validate)(struct i915_context *, unsigned *batch_space);45void (*emit)(struct i915_context *);46unsigned dirty, batch_space;47};4849static void50validate_flush(struct i915_context *i915, unsigned *batch_space)51{52*batch_space = i915->flush_dirty ? 1 : 0;53}5455static void56emit_flush(struct i915_context *i915)57{58/* Cache handling is very cheap atm. State handling can request to flushes:59* - I915_FLUSH_CACHE which is a flush everything request and60* - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush.61* Because the cache handling is so dumb, no explicit "invalidate map cache".62* Also, the first is a strict superset of the latter, so the following logic63* works. */64if (i915->flush_dirty & I915_FLUSH_CACHE)65OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE);66else if (i915->flush_dirty & I915_PIPELINE_FLUSH)67OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE);68}6970uint32_t invariant_state[] = {71_3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 |72AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0,7374_3DSTATE_DFLT_DIFFUSE_CMD, 0,7576_3DSTATE_DFLT_SPEC_CMD, 0,7778_3DSTATE_DFLT_Z_CMD, 0,7980_3DSTATE_COORD_SET_BINDINGS | CSB_TCB(0, 0) | CSB_TCB(1, 1) | CSB_TCB(2, 2) |81CSB_TCB(3, 3) | CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) |82CSB_TCB(7, 7),8384_3DSTATE_RASTER_RULES_CMD | ENABLE_POINT_RASTER_RULE |85OGL_POINT_RASTER_RULE | ENABLE_LINE_STRIP_PROVOKE_VRTX |86ENABLE_TRI_FAN_PROVOKE_VRTX | LINE_STRIP_PROVOKE_VRTX(1) |87TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D,8889_3DSTATE_DEPTH_SUBRECT_DISABLE,9091/* disable indirect state for now92*/93_3DSTATE_LOAD_INDIRECT | 0, 0};9495static void96emit_invariant(struct i915_context *i915)97{98i915_winsys_batchbuffer_write(99i915->batch, invariant_state,100ARRAY_SIZE(invariant_state) * sizeof(uint32_t));101}102103static void104validate_immediate(struct i915_context *i915, unsigned *batch_space)105{106unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |1071 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |1081 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |1091 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &110i915->immediate_dirty;111112if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0) && i915->vbo)113i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo;114115*batch_space = 1 + util_bitcount(dirty);116}117118static void119emit_immediate_s5(struct i915_context *i915, uint32_t imm)120{121struct i915_surface *surf = i915_surface(i915->framebuffer.cbufs[0]);122123if (surf) {124uint32_t writemask = imm & S5_WRITEDISABLE_MASK;125imm &= ~S5_WRITEDISABLE_MASK;126127/* The register bits are not in order. */128static const uint32_t writedisables[4] = {129S5_WRITEDISABLE_RED,130S5_WRITEDISABLE_GREEN,131S5_WRITEDISABLE_BLUE,132S5_WRITEDISABLE_ALPHA,133};134135for (int i = 0; i < 4; i++) {136if (writemask & writedisables[surf->color_swizzle[i]])137imm |= writedisables[i];138}139}140141OUT_BATCH(imm);142}143144static void145emit_immediate(struct i915_context *i915)146{147/* remove unwanted bits and S7 */148unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 |1491 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 |1501 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 |1511 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) &152i915->immediate_dirty;153int i, num = util_bitcount(dirty);154assert(num && num <= I915_MAX_IMMEDIATE);155156OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | dirty << 4 | (num - 1));157158if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) {159if (i915->vbo)160OUT_RELOC(i915->vbo, I915_USAGE_VERTEX,161i915->current.immediate[I915_IMMEDIATE_S0]);162else163OUT_BATCH(0);164}165166for (i = 1; i < I915_MAX_IMMEDIATE; i++) {167if (dirty & (1 << i)) {168if (i == I915_IMMEDIATE_S5)169emit_immediate_s5(i915, i915->current.immediate[i]);170else171OUT_BATCH(i915->current.immediate[i]);172}173}174}175176static void177validate_dynamic(struct i915_context *i915, unsigned *batch_space)178{179*batch_space =180util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1));181}182183static void184emit_dynamic(struct i915_context *i915)185{186int i;187for (i = 0; i < I915_MAX_DYNAMIC; i++) {188if (i915->dynamic_dirty & (1 << i))189OUT_BATCH(i915->current.dynamic[i]);190}191}192193static void194validate_static(struct i915_context *i915, unsigned *batch_space)195{196*batch_space = 0;197198if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {199i915->validation_buffers[i915->num_validation_buffers++] =200i915->current.cbuf_bo;201*batch_space += 3;202}203204if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {205i915->validation_buffers[i915->num_validation_buffers++] =206i915->current.depth_bo;207*batch_space += 3;208}209210if (i915->static_dirty & I915_DST_VARS)211*batch_space += 2;212213if (i915->static_dirty & I915_DST_RECT)214*batch_space += 5;215}216217static void218emit_static(struct i915_context *i915)219{220if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) {221OUT_BATCH(_3DSTATE_BUF_INFO_CMD);222OUT_BATCH(i915->current.cbuf_flags);223OUT_RELOC(i915->current.cbuf_bo, I915_USAGE_RENDER, 0);224}225226/* What happens if no zbuf??227*/228if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) {229OUT_BATCH(_3DSTATE_BUF_INFO_CMD);230OUT_BATCH(i915->current.depth_flags);231OUT_RELOC(i915->current.depth_bo, I915_USAGE_RENDER, 0);232}233234if (i915->static_dirty & I915_DST_VARS) {235OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);236OUT_BATCH(i915->current.dst_buf_vars);237}238}239240static void241validate_map(struct i915_context *i915, unsigned *batch_space)242{243const uint32_t enabled = i915->current.sampler_enable_flags;244uint32_t unit;245struct i915_texture *tex;246247*batch_space = i915->current.sampler_enable_nr248? 2 + 3 * i915->current.sampler_enable_nr249: 0;250251for (unit = 0; unit < I915_TEX_UNITS; unit++) {252if (enabled & (1 << unit)) {253tex = i915_texture(i915->fragment_sampler_views[unit]->texture);254i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer;255}256}257}258259static void260emit_map(struct i915_context *i915)261{262const uint32_t nr = i915->current.sampler_enable_nr;263if (nr) {264const uint32_t enabled = i915->current.sampler_enable_flags;265uint32_t unit;266uint32_t count = 0;267OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));268OUT_BATCH(enabled);269for (unit = 0; unit < I915_TEX_UNITS; unit++) {270if (enabled & (1 << unit)) {271struct i915_texture *texture =272i915_texture(i915->fragment_sampler_views[unit]->texture);273struct i915_winsys_buffer *buf = texture->buffer;274unsigned offset = i915->current.texbuffer[unit][2];275276assert(buf);277278count++;279280OUT_RELOC(buf, I915_USAGE_SAMPLER, offset);281OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */282OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */283}284}285assert(count == nr);286}287}288289static void290validate_sampler(struct i915_context *i915, unsigned *batch_space)291{292*batch_space = i915->current.sampler_enable_nr293? 2 + 3 * i915->current.sampler_enable_nr294: 0;295}296297static void298emit_sampler(struct i915_context *i915)299{300if (i915->current.sampler_enable_nr) {301int i;302303OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * i915->current.sampler_enable_nr));304305OUT_BATCH(i915->current.sampler_enable_flags);306307for (i = 0; i < I915_TEX_UNITS; i++) {308if (i915->current.sampler_enable_flags & (1 << i)) {309OUT_BATCH(i915->current.sampler[i][0]);310OUT_BATCH(i915->current.sampler[i][1]);311OUT_BATCH(i915->current.sampler[i][2]);312}313}314}315}316317static void318validate_constants(struct i915_context *i915, unsigned *batch_space)319{320int nr = i915->fs->num_constants ? 2 + 4 * i915->fs->num_constants : 0;321322*batch_space = nr;323}324325static void326emit_constants(struct i915_context *i915)327{328/* Collate the user-defined constants with the fragment shader's329* immediates according to the constant_flags[] array.330*/331const uint32_t nr = i915->fs->num_constants;332333assert(nr <= I915_MAX_CONSTANT);334if (nr) {335uint32_t i;336337OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4));338OUT_BATCH((1 << nr) - 1);339340for (i = 0; i < nr; i++) {341const uint32_t *c;342if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {343/* grab user-defined constant */344c = (uint32_t *)i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])345->data;346c += 4 * i;347} else {348/* emit program constant */349c = (uint32_t *)i915->fs->constants[i];350}351#if 0 /* debug */352{353float *f = (float *) c;354printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],355(i915->fs->constant_flags[i] == I915_CONSTFLAG_USER356? "user" : "immediate"));357}358#endif359OUT_BATCH(*c++);360OUT_BATCH(*c++);361OUT_BATCH(*c++);362OUT_BATCH(*c++);363}364}365}366367static void368validate_program(struct i915_context *i915, unsigned *batch_space)369{370/* we need more batch space if we want to emulate rgba framebuffers */371*batch_space = i915->fs->program_len + (i915->current.fixup_swizzle ? 3 : 0);372}373374static void375emit_program(struct i915_context *i915)376{377/* we should always have, at least, a pass-through program */378assert(i915->fs->program_len > 0);379380/* If we're doing a fixup swizzle, that's 3 more dwords to add. */381uint32_t additional_size = 0;382if (i915->current.fixup_swizzle)383additional_size = 3;384385/* output the program: 1 dword of header, then 3 dwords per decl/instruction */386assert(i915->fs->program_len % 3 == 1);387388/* first word has the size, adjust it for fixup swizzle */389OUT_BATCH(i915->fs->program[0] + additional_size);390391for (int i = 1; i < i915->fs->program_len; i++)392OUT_BATCH(i915->fs->program[i]);393394/* we emit an additional mov with swizzle to fake RGBA framebuffers */395if (i915->current.fixup_swizzle) {396/* mov out_color, out_color.zyxw */397OUT_BATCH(A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |398A0_DEST_CHANNEL_ALL | (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) |399(T_DIFFUSE << A0_SRC0_NR_SHIFT));400OUT_BATCH(i915->current.fixup_swizzle);401OUT_BATCH(0);402}403}404405static void406emit_draw_rect(struct i915_context *i915)407{408if (i915->static_dirty & I915_DST_RECT) {409OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);410OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS);411OUT_BATCH(i915->current.draw_offset);412OUT_BATCH(i915->current.draw_size);413OUT_BATCH(i915->current.draw_offset);414}415}416417static bool418i915_validate_state(struct i915_context *i915, unsigned *batch_space)419{420unsigned tmp;421422i915->num_validation_buffers = 0;423if (i915->hardware_dirty & I915_HW_INVARIANT)424*batch_space = ARRAY_SIZE(invariant_state);425else426*batch_space = 0;427428#if 0429static int counter_total = 0;430#define VALIDATE_ATOM(atom, hw_dirty) \431if (i915->hardware_dirty & hw_dirty) { \432static int counter_##atom = 0; \433validate_##atom(i915, &tmp); \434*batch_space += tmp; \435counter_##atom += tmp; \436counter_total += tmp; \437printf("%s: \t%d/%d \t%2.2f\n", #atom, counter_##atom, counter_total, \438counter_##atom * 100.f / counter_total); \439}440#else441#define VALIDATE_ATOM(atom, hw_dirty) \442if (i915->hardware_dirty & hw_dirty) { \443validate_##atom(i915, &tmp); \444*batch_space += tmp; \445}446#endif447VALIDATE_ATOM(flush, I915_HW_FLUSH);448VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE);449VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC);450VALIDATE_ATOM(static, I915_HW_STATIC);451VALIDATE_ATOM(map, I915_HW_MAP);452VALIDATE_ATOM(sampler, I915_HW_SAMPLER);453VALIDATE_ATOM(constants, I915_HW_CONSTANTS);454VALIDATE_ATOM(program, I915_HW_PROGRAM);455#undef VALIDATE_ATOM456457if (i915->num_validation_buffers == 0)458return true;459460if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers,461i915->num_validation_buffers))462return false;463464return true;465}466467/* Push the state into the sarea and/or texture memory.468*/469void470i915_emit_hardware_state(struct i915_context *i915)471{472unsigned batch_space;473uintptr_t save_ptr;474475assert(i915->dirty == 0);476477if (I915_DBG_ON(DBG_ATOMS))478i915_dump_hardware_dirty(i915, __FUNCTION__);479480if (!i915_validate_state(i915, &batch_space)) {481FLUSH_BATCH(NULL, I915_FLUSH_ASYNC);482assert(i915_validate_state(i915, &batch_space));483}484485if (!BEGIN_BATCH(batch_space)) {486FLUSH_BATCH(NULL, I915_FLUSH_ASYNC);487assert(i915_validate_state(i915, &batch_space));488assert(BEGIN_BATCH(batch_space));489}490491save_ptr = (uintptr_t)i915->batch->ptr;492493#define EMIT_ATOM(atom, hw_dirty) \494if (i915->hardware_dirty & hw_dirty) \495emit_##atom(i915);496EMIT_ATOM(flush, I915_HW_FLUSH);497EMIT_ATOM(invariant, I915_HW_INVARIANT);498EMIT_ATOM(immediate, I915_HW_IMMEDIATE);499EMIT_ATOM(dynamic, I915_HW_DYNAMIC);500EMIT_ATOM(static, I915_HW_STATIC);501EMIT_ATOM(map, I915_HW_MAP);502EMIT_ATOM(sampler, I915_HW_SAMPLER);503EMIT_ATOM(constants, I915_HW_CONSTANTS);504EMIT_ATOM(program, I915_HW_PROGRAM);505EMIT_ATOM(draw_rect, I915_HW_STATIC);506#undef EMIT_ATOM507508I915_DBG(DBG_EMIT, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__,509((uintptr_t)i915->batch->ptr - save_ptr) / 4, batch_space);510assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space);511512i915->hardware_dirty = 0;513i915->immediate_dirty = 0;514i915->dynamic_dirty = 0;515i915->static_dirty = 0;516i915->flush_dirty = 0;517}518519520