Path: blob/21.2-virgl/src/gallium/drivers/v3d/v3dx_emit.c
4570 views
/*1* Copyright © 2014-2017 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "util/format/u_format.h"24#include "util/half_float.h"25#include "v3d_context.h"26#include "broadcom/common/v3d_macros.h"27#include "broadcom/cle/v3dx_pack.h"28#include "broadcom/compiler/v3d_compiler.h"2930static uint8_t31v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)32{33/* We may get a bad blendfactor when blending is disabled. */34if (factor == 0)35return V3D_BLEND_FACTOR_ZERO;3637switch (factor) {38case PIPE_BLENDFACTOR_ZERO:39return V3D_BLEND_FACTOR_ZERO;40case PIPE_BLENDFACTOR_ONE:41return V3D_BLEND_FACTOR_ONE;42case PIPE_BLENDFACTOR_SRC_COLOR:43return V3D_BLEND_FACTOR_SRC_COLOR;44case PIPE_BLENDFACTOR_INV_SRC_COLOR:45return V3D_BLEND_FACTOR_INV_SRC_COLOR;46case PIPE_BLENDFACTOR_DST_COLOR:47return V3D_BLEND_FACTOR_DST_COLOR;48case PIPE_BLENDFACTOR_INV_DST_COLOR:49return V3D_BLEND_FACTOR_INV_DST_COLOR;50case PIPE_BLENDFACTOR_SRC_ALPHA:51return V3D_BLEND_FACTOR_SRC_ALPHA;52case PIPE_BLENDFACTOR_INV_SRC_ALPHA:53return V3D_BLEND_FACTOR_INV_SRC_ALPHA;54case PIPE_BLENDFACTOR_DST_ALPHA:55return (dst_alpha_one ?56V3D_BLEND_FACTOR_ONE :57V3D_BLEND_FACTOR_DST_ALPHA);58case PIPE_BLENDFACTOR_INV_DST_ALPHA:59return (dst_alpha_one ?60V3D_BLEND_FACTOR_ZERO :61V3D_BLEND_FACTOR_INV_DST_ALPHA);62case PIPE_BLENDFACTOR_CONST_COLOR:63return V3D_BLEND_FACTOR_CONST_COLOR;64case PIPE_BLENDFACTOR_INV_CONST_COLOR:65return V3D_BLEND_FACTOR_INV_CONST_COLOR;66case PIPE_BLENDFACTOR_CONST_ALPHA:67return V3D_BLEND_FACTOR_CONST_ALPHA;68case PIPE_BLENDFACTOR_INV_CONST_ALPHA:69return V3D_BLEND_FACTOR_INV_CONST_ALPHA;70case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:71return (dst_alpha_one ?72V3D_BLEND_FACTOR_ZERO :73V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE);74default:75unreachable("Bad blend factor");76}77}7879static inline uint16_t80swizzled_border_color(const struct v3d_device_info *devinfo,81struct pipe_sampler_state *sampler,82struct v3d_sampler_view *sview,83int chan)84{85const struct util_format_description *desc =86util_format_description(sview->base.format);87uint8_t swiz = chan;8889/* If we're doing swizzling in the sampler, then only rearrange the90* border color for the mismatch between the V3D texture format and91* the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by92* the sampler's swizzle.93*94* For swizzling in the shader, we don't do any pre-swizzling of the95* border color.96*/97if (v3d_get_tex_return_size(devinfo, sview->base.format,98sampler->compare_mode) != 32)99swiz = desc->swizzle[swiz];100101switch (swiz) {102case PIPE_SWIZZLE_0:103return _mesa_float_to_half(0.0);104case PIPE_SWIZZLE_1:105return _mesa_float_to_half(1.0);106default:107return _mesa_float_to_half(sampler->border_color.f[swiz]);108}109}110111#if V3D_VERSION < 40112static uint32_t113translate_swizzle(unsigned char pipe_swizzle)114{115switch (pipe_swizzle) {116case PIPE_SWIZZLE_0:117return 0;118case PIPE_SWIZZLE_1:119return 1;120case PIPE_SWIZZLE_X:121case PIPE_SWIZZLE_Y:122case PIPE_SWIZZLE_Z:123case PIPE_SWIZZLE_W:124return 2 + pipe_swizzle;125default:126unreachable("unknown swizzle");127}128}129130static void131emit_one_texture(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex,132int i)133{134struct v3d_job *job = v3d->job;135struct pipe_sampler_state *psampler = stage_tex->samplers[i];136struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);137struct pipe_sampler_view *psview = stage_tex->textures[i];138struct v3d_sampler_view *sview = v3d_sampler_view(psview);139struct pipe_resource *prsc = psview->texture;140struct v3d_resource *rsc = v3d_resource(prsc);141const struct v3d_device_info *devinfo = &v3d->screen->devinfo;142143stage_tex->texture_state[i].offset =144v3d_cl_ensure_space(&job->indirect,145cl_packet_length(TEXTURE_SHADER_STATE),14632);147v3d_bo_set_reference(&stage_tex->texture_state[i].bo,148job->indirect.bo);149150uint32_t return_size = v3d_get_tex_return_size(devinfo, psview->format,151psampler->compare_mode);152153struct V3D33_TEXTURE_SHADER_STATE unpacked = {154/* XXX */155.border_color_red = swizzled_border_color(devinfo, psampler,156sview, 0),157.border_color_green = swizzled_border_color(devinfo, psampler,158sview, 1),159.border_color_blue = swizzled_border_color(devinfo, psampler,160sview, 2),161.border_color_alpha = swizzled_border_color(devinfo, psampler,162sview, 3),163164/* In the normal texturing path, the LOD gets clamped between165* min/max, and the base_level field (set in the sampler view166* from first_level) only decides where the min/mag switch167* happens, so we need to use the LOD clamps to keep us168* between min and max.169*170* For txf, the LOD clamp is still used, despite GL not171* wanting that. We will need to have a separate172* TEXTURE_SHADER_STATE that ignores psview->min/max_lod to173* support txf properly.174*/175.min_level_of_detail = MIN2(psview->u.tex.first_level +176MAX2(psampler->min_lod, 0),177psview->u.tex.last_level),178.max_level_of_detail = MIN2(psview->u.tex.first_level +179MAX2(psampler->max_lod,180psampler->min_lod),181psview->u.tex.last_level),182183.texture_base_pointer = cl_address(rsc->bo,184rsc->slices[0].offset),185186.output_32_bit = return_size == 32,187};188189/* Set up the sampler swizzle if we're doing 16-bit sampling. For190* 32-bit, we leave swizzling up to the shader compiler.191*192* Note: Contrary to the docs, the swizzle still applies even if the193* return size is 32. It's just that you probably want to swizzle in194* the shader, because you need the Y/Z/W channels to be defined.195*/196if (return_size == 32) {197unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);198unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);199unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);200unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);201} else {202unpacked.swizzle_r = translate_swizzle(sview->swizzle[0]);203unpacked.swizzle_g = translate_swizzle(sview->swizzle[1]);204unpacked.swizzle_b = translate_swizzle(sview->swizzle[2]);205unpacked.swizzle_a = translate_swizzle(sview->swizzle[3]);206}207208int min_img_filter = psampler->min_img_filter;209int min_mip_filter = psampler->min_mip_filter;210int mag_img_filter = psampler->mag_img_filter;211212if (return_size == 32) {213min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;214min_img_filter = PIPE_TEX_FILTER_NEAREST;215mag_img_filter = PIPE_TEX_FILTER_NEAREST;216}217218bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST;219switch (min_mip_filter) {220case PIPE_TEX_MIPFILTER_NONE:221unpacked.filter += min_nearest ? 2 : 0;222break;223case PIPE_TEX_MIPFILTER_NEAREST:224unpacked.filter += min_nearest ? 4 : 8;225break;226case PIPE_TEX_MIPFILTER_LINEAR:227unpacked.filter += min_nearest ? 4 : 8;228unpacked.filter += 2;229break;230}231232if (mag_img_filter == PIPE_TEX_FILTER_NEAREST)233unpacked.filter++;234235if (psampler->max_anisotropy > 8)236unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1;237else if (psampler->max_anisotropy > 4)238unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1;239else if (psampler->max_anisotropy > 2)240unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1;241else if (psampler->max_anisotropy)242unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1;243244uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];245cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);246247for (int i = 0; i < ARRAY_SIZE(packed); i++)248packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];249250/* TMU indirect structs need to be 32b aligned. */251v3d_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32);252cl_emit_prepacked(&job->indirect, &packed);253}254255static void256emit_textures(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex)257{258for (int i = 0; i < stage_tex->num_textures; i++) {259if (stage_tex->textures[i])260emit_one_texture(v3d, stage_tex, i);261}262}263#endif /* V3D_VERSION < 40 */264265static uint32_t266translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt)267{268if (v3d->swap_color_rb & (1 << rt)) {269colormask = ((colormask & (2 | 8)) |270((colormask & 1) << 2) |271((colormask & 4) >> 2));272}273274return (~colormask) & 0xf;275}276277static void278emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,279struct pipe_blend_state *blend, int rt)280{281struct pipe_rt_blend_state *rtblend = &blend->rt[rt];282283#if V3D_VERSION >= 40284/* We don't need to emit blend state for disabled RTs. */285if (!rtblend->blend_enable)286return;287#endif288289cl_emit(&job->bcl, BLEND_CFG, config) {290#if V3D_VERSION >= 40291if (blend->independent_blend_enable)292config.render_target_mask = 1 << rt;293else294config.render_target_mask = (1 << V3D_MAX_DRAW_BUFFERS) - 1;295#else296assert(rt == 0);297#endif298299config.color_blend_mode = rtblend->rgb_func;300config.color_blend_dst_factor =301v3d_factor(rtblend->rgb_dst_factor,302v3d->blend_dst_alpha_one);303config.color_blend_src_factor =304v3d_factor(rtblend->rgb_src_factor,305v3d->blend_dst_alpha_one);306307config.alpha_blend_mode = rtblend->alpha_func;308config.alpha_blend_dst_factor =309v3d_factor(rtblend->alpha_dst_factor,310v3d->blend_dst_alpha_one);311config.alpha_blend_src_factor =312v3d_factor(rtblend->alpha_src_factor,313v3d->blend_dst_alpha_one);314}315}316317static void318emit_flat_shade_flags(struct v3d_job *job,319int varying_offset,320uint32_t varyings,321enum V3DX(Varying_Flags_Action) lower,322enum V3DX(Varying_Flags_Action) higher)323{324cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {325flags.varying_offset_v0 = varying_offset;326flags.flat_shade_flags_for_varyings_v024 = varyings;327flags.action_for_flat_shade_flags_of_lower_numbered_varyings =328lower;329flags.action_for_flat_shade_flags_of_higher_numbered_varyings =330higher;331}332}333334#if V3D_VERSION >= 40335static void336emit_noperspective_flags(struct v3d_job *job,337int varying_offset,338uint32_t varyings,339enum V3DX(Varying_Flags_Action) lower,340enum V3DX(Varying_Flags_Action) higher)341{342cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {343flags.varying_offset_v0 = varying_offset;344flags.non_perspective_flags_for_varyings_v024 = varyings;345flags.action_for_non_perspective_flags_of_lower_numbered_varyings =346lower;347flags.action_for_non_perspective_flags_of_higher_numbered_varyings =348higher;349}350}351352static void353emit_centroid_flags(struct v3d_job *job,354int varying_offset,355uint32_t varyings,356enum V3DX(Varying_Flags_Action) lower,357enum V3DX(Varying_Flags_Action) higher)358{359cl_emit(&job->bcl, CENTROID_FLAGS, flags) {360flags.varying_offset_v0 = varying_offset;361flags.centroid_flags_for_varyings_v024 = varyings;362flags.action_for_centroid_flags_of_lower_numbered_varyings =363lower;364flags.action_for_centroid_flags_of_higher_numbered_varyings =365higher;366}367}368#endif /* V3D_VERSION >= 40 */369370static bool371emit_varying_flags(struct v3d_job *job, uint32_t *flags,372void (*flag_emit_callback)(struct v3d_job *job,373int varying_offset,374uint32_t flags,375enum V3DX(Varying_Flags_Action) lower,376enum V3DX(Varying_Flags_Action) higher))377{378struct v3d_context *v3d = job->v3d;379bool emitted_any = false;380381for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) {382if (!flags[i])383continue;384385if (emitted_any) {386flag_emit_callback(job, i, flags[i],387V3D_VARYING_FLAGS_ACTION_UNCHANGED,388V3D_VARYING_FLAGS_ACTION_UNCHANGED);389} else if (i == 0) {390flag_emit_callback(job, i, flags[i],391V3D_VARYING_FLAGS_ACTION_UNCHANGED,392V3D_VARYING_FLAGS_ACTION_ZEROED);393} else {394flag_emit_callback(job, i, flags[i],395V3D_VARYING_FLAGS_ACTION_ZEROED,396V3D_VARYING_FLAGS_ACTION_ZEROED);397}398emitted_any = true;399}400401return emitted_any;402}403404static inline struct v3d_uncompiled_shader *405get_tf_shader(struct v3d_context *v3d)406{407if (v3d->prog.bind_gs)408return v3d->prog.bind_gs;409else410return v3d->prog.bind_vs;411}412413void414v3dX(emit_state)(struct pipe_context *pctx)415{416struct v3d_context *v3d = v3d_context(pctx);417struct v3d_job *job = v3d->job;418bool rasterizer_discard = v3d->rasterizer->base.rasterizer_discard;419420if (v3d->dirty & (V3D_DIRTY_SCISSOR | V3D_DIRTY_VIEWPORT |421V3D_DIRTY_RASTERIZER)) {422float *vpscale = v3d->viewport.scale;423float *vptranslate = v3d->viewport.translate;424float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];425float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];426float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];427float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];428429/* Clip to the scissor if it's enabled, but still clip to the430* drawable regardless since that controls where the binner431* tries to put things.432*433* Additionally, always clip the rendering to the viewport,434* since the hardware does guardband clipping, meaning435* primitives would rasterize outside of the view volume.436*/437uint32_t minx, miny, maxx, maxy;438if (!v3d->rasterizer->base.scissor) {439minx = MAX2(vp_minx, 0);440miny = MAX2(vp_miny, 0);441maxx = MIN2(vp_maxx, job->draw_width);442maxy = MIN2(vp_maxy, job->draw_height);443} else {444minx = MAX2(vp_minx, v3d->scissor.minx);445miny = MAX2(vp_miny, v3d->scissor.miny);446maxx = MIN2(vp_maxx, v3d->scissor.maxx);447maxy = MIN2(vp_maxy, v3d->scissor.maxy);448}449450cl_emit(&job->bcl, CLIP_WINDOW, clip) {451clip.clip_window_left_pixel_coordinate = minx;452clip.clip_window_bottom_pixel_coordinate = miny;453if (maxx > minx && maxy > miny) {454clip.clip_window_width_in_pixels = maxx - minx;455clip.clip_window_height_in_pixels = maxy - miny;456} else if (V3D_VERSION < 41) {457/* The HW won't entirely clip out when scissor458* w/h is 0. Just treat it the same as459* rasterizer discard.460*/461rasterizer_discard = true;462clip.clip_window_width_in_pixels = 1;463clip.clip_window_height_in_pixels = 1;464}465}466467job->draw_min_x = MIN2(job->draw_min_x, minx);468job->draw_min_y = MIN2(job->draw_min_y, miny);469job->draw_max_x = MAX2(job->draw_max_x, maxx);470job->draw_max_y = MAX2(job->draw_max_y, maxy);471472if (!v3d->rasterizer->base.scissor) {473job->scissor.disabled = true;474} else if (!job->scissor.disabled &&475(v3d->dirty & V3D_DIRTY_SCISSOR)) {476if (job->scissor.count < MAX_JOB_SCISSORS) {477job->scissor.rects[job->scissor.count].min_x =478v3d->scissor.minx;479job->scissor.rects[job->scissor.count].min_y =480v3d->scissor.miny;481job->scissor.rects[job->scissor.count].max_x =482v3d->scissor.maxx - 1;483job->scissor.rects[job->scissor.count].max_y =484v3d->scissor.maxy - 1;485job->scissor.count++;486} else {487job->scissor.disabled = true;488perf_debug("Too many scissor rects.");489}490}491}492493if (v3d->dirty & (V3D_DIRTY_RASTERIZER |494V3D_DIRTY_ZSA |495V3D_DIRTY_BLEND |496V3D_DIRTY_COMPILED_FS)) {497cl_emit(&job->bcl, CFG_BITS, config) {498config.enable_forward_facing_primitive =499!rasterizer_discard &&500!(v3d->rasterizer->base.cull_face &501PIPE_FACE_FRONT);502config.enable_reverse_facing_primitive =503!rasterizer_discard &&504!(v3d->rasterizer->base.cull_face &505PIPE_FACE_BACK);506/* This seems backwards, but it's what gets the507* clipflat test to pass.508*/509config.clockwise_primitives =510v3d->rasterizer->base.front_ccw;511512config.enable_depth_offset =513v3d->rasterizer->base.offset_tri;514515/* V3D follows GL behavior where the sample mask only516* applies when MSAA is enabled. Gallium has sample517* mask apply anyway, and the MSAA blit shaders will518* set sample mask without explicitly setting519* rasterizer oversample. Just force it on here,520* since the blit shaders are the only way to have521* !multisample && samplemask != 0xf.522*/523config.rasterizer_oversample_mode =524v3d->rasterizer->base.multisample ||525v3d->sample_mask != 0xf;526527config.direct3d_provoking_vertex =528v3d->rasterizer->base.flatshade_first;529530config.blend_enable = v3d->blend->blend_enables;531532/* Note: EZ state may update based on the compiled FS,533* along with ZSA534*/535config.early_z_updates_enable =536(job->ez_state != V3D_EZ_DISABLED);537if (v3d->zsa->base.depth_enabled) {538config.z_updates_enable =539v3d->zsa->base.depth_writemask;540config.early_z_enable =541config.early_z_updates_enable;542config.depth_test_function =543v3d->zsa->base.depth_func;544} else {545config.depth_test_function = PIPE_FUNC_ALWAYS;546}547548config.stencil_enable =549v3d->zsa->base.stencil[0].enabled;550551/* Use nicer line caps when line smoothing is552* enabled553*/554config.line_rasterization =555v3d_line_smoothing_enabled(v3d) ? 1 : 0;556}557558}559560if (v3d->dirty & V3D_DIRTY_RASTERIZER &&561v3d->rasterizer->base.offset_tri) {562if (job->zsbuf &&563job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {564cl_emit_prepacked_sized(&job->bcl,565v3d->rasterizer->depth_offset_z16,566cl_packet_length(DEPTH_OFFSET));567} else {568cl_emit_prepacked_sized(&job->bcl,569v3d->rasterizer->depth_offset,570cl_packet_length(DEPTH_OFFSET));571}572}573574if (v3d->dirty & V3D_DIRTY_RASTERIZER) {575cl_emit(&job->bcl, POINT_SIZE, point_size) {576point_size.point_size = v3d->rasterizer->point_size;577}578579cl_emit(&job->bcl, LINE_WIDTH, line_width) {580line_width.line_width = v3d_get_real_line_width(v3d);581}582}583584if (v3d->dirty & V3D_DIRTY_VIEWPORT) {585cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {586clip.viewport_half_width_in_1_256th_of_pixel =587v3d->viewport.scale[0] * 256.0f;588clip.viewport_half_height_in_1_256th_of_pixel =589v3d->viewport.scale[1] * 256.0f;590}591592cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {593clip.viewport_z_offset_zc_to_zs =594v3d->viewport.translate[2];595clip.viewport_z_scale_zc_to_zs =596v3d->viewport.scale[2];597}598cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {599float z1 = (v3d->viewport.translate[2] -600v3d->viewport.scale[2]);601float z2 = (v3d->viewport.translate[2] +602v3d->viewport.scale[2]);603clip.minimum_zw = MIN2(z1, z2);604clip.maximum_zw = MAX2(z1, z2);605}606607cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {608vp.viewport_centre_x_coordinate =609v3d->viewport.translate[0];610vp.viewport_centre_y_coordinate =611v3d->viewport.translate[1];612}613}614615if (v3d->dirty & V3D_DIRTY_BLEND) {616struct v3d_blend_state *blend = v3d->blend;617618if (blend->blend_enables) {619#if V3D_VERSION >= 40620cl_emit(&job->bcl, BLEND_ENABLES, enables) {621enables.mask = blend->blend_enables;622}623#endif624625if (blend->base.independent_blend_enable) {626for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++)627emit_rt_blend(v3d, job, &blend->base, i);628} else {629emit_rt_blend(v3d, job, &blend->base, 0);630}631}632}633634if (v3d->dirty & V3D_DIRTY_BLEND) {635struct pipe_blend_state *blend = &v3d->blend->base;636637cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {638for (int i = 0; i < 4; i++) {639int rt = blend->independent_blend_enable ? i : 0;640int rt_mask = blend->rt[rt].colormask;641642mask.mask |= translate_colormask(v3d, rt_mask,643i) << (4 * i);644}645}646}647648/* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant649* color.650*/651if (v3d->dirty & V3D_DIRTY_BLEND_COLOR ||652(V3D_VERSION < 41 && (v3d->dirty & V3D_DIRTY_BLEND))) {653cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {654color.red_f16 = (v3d->swap_color_rb ?655v3d->blend_color.hf[2] :656v3d->blend_color.hf[0]);657color.green_f16 = v3d->blend_color.hf[1];658color.blue_f16 = (v3d->swap_color_rb ?659v3d->blend_color.hf[0] :660v3d->blend_color.hf[2]);661color.alpha_f16 = v3d->blend_color.hf[3];662}663}664665if (v3d->dirty & (V3D_DIRTY_ZSA | V3D_DIRTY_STENCIL_REF)) {666struct pipe_stencil_state *front = &v3d->zsa->base.stencil[0];667struct pipe_stencil_state *back = &v3d->zsa->base.stencil[1];668669if (front->enabled) {670cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,671v3d->zsa->stencil_front, config) {672config.stencil_ref_value =673v3d->stencil_ref.ref_value[0];674}675}676677if (back->enabled) {678cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,679v3d->zsa->stencil_back, config) {680config.stencil_ref_value =681v3d->stencil_ref.ref_value[1];682}683}684}685686#if V3D_VERSION < 40687/* Pre-4.x, we have texture state that depends on both the sampler and688* the view, so we merge them together at draw time.689*/690if (v3d->dirty & V3D_DIRTY_FRAGTEX)691emit_textures(v3d, &v3d->tex[PIPE_SHADER_FRAGMENT]);692693if (v3d->dirty & V3D_DIRTY_GEOMTEX)694emit_textures(v3d, &v3d->tex[PIPE_SHADER_GEOMETRY]);695696if (v3d->dirty & V3D_DIRTY_VERTTEX)697emit_textures(v3d, &v3d->tex[PIPE_SHADER_VERTEX]);698#endif699700if (v3d->dirty & V3D_DIRTY_FLAT_SHADE_FLAGS) {701if (!emit_varying_flags(job,702v3d->prog.fs->prog_data.fs->flat_shade_flags,703emit_flat_shade_flags)) {704cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);705}706}707708#if V3D_VERSION >= 40709if (v3d->dirty & V3D_DIRTY_NOPERSPECTIVE_FLAGS) {710if (!emit_varying_flags(job,711v3d->prog.fs->prog_data.fs->noperspective_flags,712emit_noperspective_flags)) {713cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);714}715}716717if (v3d->dirty & V3D_DIRTY_CENTROID_FLAGS) {718if (!emit_varying_flags(job,719v3d->prog.fs->prog_data.fs->centroid_flags,720emit_centroid_flags)) {721cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);722}723}724#endif725726/* Set up the transform feedback data specs (which VPM entries to727* output to which buffers).728*/729if (v3d->dirty & (V3D_DIRTY_STREAMOUT |730V3D_DIRTY_RASTERIZER |731V3D_DIRTY_PRIM_MODE)) {732struct v3d_streamout_stateobj *so = &v3d->streamout;733if (so->num_targets) {734bool psiz_per_vertex = (v3d->prim_mode == PIPE_PRIM_POINTS &&735v3d->rasterizer->base.point_size_per_vertex);736struct v3d_uncompiled_shader *tf_shader =737get_tf_shader(v3d);738uint16_t *tf_specs = (psiz_per_vertex ?739tf_shader->tf_specs_psiz :740tf_shader->tf_specs);741742#if V3D_VERSION >= 40743bool tf_enabled = v3d_transform_feedback_enabled(v3d);744job->tf_enabled |= tf_enabled;745746cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {747tfe.number_of_16_bit_output_data_specs_following =748tf_shader->num_tf_specs;749tfe.enable = tf_enabled;750};751#else /* V3D_VERSION < 40 */752cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {753tfe.number_of_32_bit_output_buffer_address_following =754so->num_targets;755tfe.number_of_16_bit_output_data_specs_following =756tf_shader->num_tf_specs;757};758#endif /* V3D_VERSION < 40 */759for (int i = 0; i < tf_shader->num_tf_specs; i++) {760cl_emit_prepacked(&job->bcl, &tf_specs[i]);761}762} else {763#if V3D_VERSION >= 40764cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {765tfe.enable = false;766};767#endif /* V3D_VERSION >= 40 */768}769}770771/* Set up the transform feedback buffers. */772if (v3d->dirty & V3D_DIRTY_STREAMOUT) {773struct v3d_uncompiled_shader *tf_shader = get_tf_shader(v3d);774struct v3d_streamout_stateobj *so = &v3d->streamout;775for (int i = 0; i < so->num_targets; i++) {776const struct pipe_stream_output_target *target =777so->targets[i];778struct v3d_resource *rsc = target ?779v3d_resource(target->buffer) : NULL;780struct pipe_shader_state *ss = &tf_shader->base;781struct pipe_stream_output_info *info = &ss->stream_output;782uint32_t offset = (v3d->streamout.offsets[i] *783info->stride[i] * 4);784785#if V3D_VERSION >= 40786if (!target)787continue;788789cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) {790output.buffer_address =791cl_address(rsc->bo,792target->buffer_offset +793offset);794output.buffer_size_in_32_bit_words =795(target->buffer_size - offset) >> 2;796output.buffer_number = i;797}798#else /* V3D_VERSION < 40 */799cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {800if (target) {801output.address =802cl_address(rsc->bo,803target->buffer_offset +804offset);805}806};807#endif /* V3D_VERSION < 40 */808if (target) {809v3d_job_add_tf_write_resource(v3d->job,810target->buffer);811}812/* XXX: buffer_size? */813}814}815816if (v3d->dirty & V3D_DIRTY_OQ) {817cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {818if (v3d->active_queries && v3d->current_oq) {819counter.address = cl_address(v3d->current_oq, 0);820}821}822}823824#if V3D_VERSION >= 40825if (v3d->dirty & V3D_DIRTY_SAMPLE_STATE) {826cl_emit(&job->bcl, SAMPLE_STATE, state) {827/* Note: SampleCoverage was handled at the828* frontend level by converting to sample_mask.829*/830state.coverage = 1.0;831state.mask = job->msaa ? v3d->sample_mask : 0xf;832}833}834#endif835}836837838