Path: blob/21.2-virgl/src/intel/compiler/brw_clip_util.c
4550 views
/*1Copyright (C) Intel Corp. 2006. All Rights Reserved.2Intel funded Tungsten Graphics to3develop this 3D driver.45Permission is hereby granted, free of charge, to any person obtaining6a copy of this software and associated documentation files (the7"Software"), to deal in the Software without restriction, including8without limitation the rights to use, copy, modify, merge, publish,9distribute, sublicense, and/or sell copies of the Software, and to10permit persons to whom the Software is furnished to do so, subject to11the following conditions:1213The above copyright notice and this permission notice (including the14next paragraph) shall be included in all copies or substantial15portions of the Software.1617THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,18EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.20IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE21LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION22OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION23WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.2425**********************************************************************/26/*27* Authors:28* Keith Whitwell <[email protected]>29*/303132#include "main/macros.h"33#include "main/enums.h"34#include "program/program.h"3536#include "brw_clip.h"373839struct brw_reg get_tmp( struct brw_clip_compile *c )40{41struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);4243if (++c->last_tmp > c->prog_data.total_grf)44c->prog_data.total_grf = c->last_tmp;4546return tmp;47}4849static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp )50{51if (tmp.nr == c->last_tmp-1)52c->last_tmp--;53}545556static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w)57{58return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x);59}606162void brw_clip_init_planes( struct brw_clip_compile *c )63{64struct brw_codegen *p = &c->func;6566if (!c->key.nr_userclip) {67brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1));68brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1));69brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1));70brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1));71brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1));72brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1));73}74}75767778#define W 37980/* Project 'pos' to screen space (or back again), overwrite with results:81*/82void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )83{84struct brw_codegen *p = &c->func;8586/* calc rhw87*/88brw_math_invert(p, get_element(pos, W), get_element(pos, W));8990/* value.xyz *= value.rhw91*/92brw_set_default_access_mode(p, BRW_ALIGN_16);93brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos,94brw_swizzle(pos, BRW_SWIZZLE_WWWW));95brw_set_default_access_mode(p, BRW_ALIGN_1);96}979899static void brw_clip_project_vertex( struct brw_clip_compile *c,100struct brw_indirect vert_addr )101{102struct brw_codegen *p = &c->func;103struct brw_reg tmp = get_tmp(c);104GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);105GLuint ndc_offset = brw_varying_to_offset(&c->vue_map,106BRW_VARYING_SLOT_NDC);107108/* Fixup position. Extract from the original vertex and re-project109* to screen space:110*/111brw_MOV(p, tmp, deref_4f(vert_addr, hpos_offset));112brw_clip_project_position(c, tmp);113brw_MOV(p, deref_4f(vert_addr, ndc_offset), tmp);114115release_tmp(c, tmp);116}117118119120121/* Interpolate between two vertices and put the result into a0.0.122* Increment a0.0 accordingly.123*124* Beware that dest_ptr can be equal to v0_ptr!125*/126void brw_clip_interp_vertex( struct brw_clip_compile *c,127struct brw_indirect dest_ptr,128struct brw_indirect v0_ptr, /* from */129struct brw_indirect v1_ptr, /* to */130struct brw_reg t0,131bool force_edgeflag)132{133struct brw_codegen *p = &c->func;134struct brw_reg t_nopersp, v0_ndc_copy;135GLuint slot;136137/* Just copy the vertex header:138*/139/*140* After CLIP stage, only first 256 bits of the VUE are read141* back on Ironlake, so needn't change it142*/143brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);144145146/* First handle the 3D and NDC interpolation, in case we147* need noperspective interpolation. Doing it early has no148* performance impact in any case.149*/150151/* Take a copy of the v0 NDC coordinates, in case dest == v0. */152if (c->key.contains_noperspective_varying) {153GLuint offset = brw_varying_to_offset(&c->vue_map,154BRW_VARYING_SLOT_NDC);155v0_ndc_copy = get_tmp(c);156brw_MOV(p, v0_ndc_copy, deref_4f(v0_ptr, offset));157}158159/* Compute the new 3D position160*161* dest_hpos = v0_hpos * (1 - t0) + v1_hpos * t0162*/163{164GLuint delta = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);165struct brw_reg tmp = get_tmp(c);166brw_MUL(p, vec4(brw_null_reg()), deref_4f(v1_ptr, delta), t0);167brw_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t0);168brw_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp);169release_tmp(c, tmp);170}171172/* Recreate the projected (NDC) coordinate in the new vertex header */173brw_clip_project_vertex(c, dest_ptr);174175/* If we have noperspective attributes,176* we need to compute the screen-space t177*/178if (c->key.contains_noperspective_varying) {179GLuint delta = brw_varying_to_offset(&c->vue_map,180BRW_VARYING_SLOT_NDC);181struct brw_reg tmp = get_tmp(c);182t_nopersp = get_tmp(c);183184/* t_nopersp = vec4(v1.xy, dest.xy) */185brw_MOV(p, t_nopersp, deref_4f(v1_ptr, delta));186brw_MOV(p, tmp, deref_4f(dest_ptr, delta));187brw_set_default_access_mode(p, BRW_ALIGN_16);188brw_MOV(p,189brw_writemask(t_nopersp, WRITEMASK_ZW),190brw_swizzle(tmp, BRW_SWIZZLE_XYXY));191192/* t_nopersp = vec4(v1.xy, dest.xy) - v0.xyxy */193brw_ADD(p, t_nopersp, t_nopersp,194negate(brw_swizzle(v0_ndc_copy, BRW_SWIZZLE_XYXY)));195196/* Add the absolute values of the X and Y deltas so that if197* the points aren't in the same place on the screen we get198* nonzero values to divide.199*200* After that, we have vert1 - vert0 in t_nopersp.x and201* vertnew - vert0 in t_nopersp.y202*203* t_nopersp = vec2(|v1.x -v0.x| + |v1.y -v0.y|,204* |dest.x-v0.x| + |dest.y-v0.y|)205*/206brw_ADD(p,207brw_writemask(t_nopersp, WRITEMASK_XY),208brw_abs(brw_swizzle(t_nopersp, BRW_SWIZZLE_XZXZ)),209brw_abs(brw_swizzle(t_nopersp, BRW_SWIZZLE_YWYW)));210brw_set_default_access_mode(p, BRW_ALIGN_1);211212/* If the points are in the same place, just substitute a213* value to avoid divide-by-zero214*/215brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ,216vec1(t_nopersp),217brw_imm_f(0));218brw_IF(p, BRW_EXECUTE_1);219brw_MOV(p, t_nopersp, brw_imm_vf4(brw_float_to_vf(1.0),220brw_float_to_vf(0.0),221brw_float_to_vf(0.0),222brw_float_to_vf(0.0)));223brw_ENDIF(p);224225/* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */226brw_math_invert(p, get_element(t_nopersp, 0), get_element(t_nopersp, 0));227brw_MUL(p, vec1(t_nopersp), vec1(t_nopersp),228vec1(suboffset(t_nopersp, 1)));229brw_set_default_access_mode(p, BRW_ALIGN_16);230brw_MOV(p, t_nopersp, brw_swizzle(t_nopersp, BRW_SWIZZLE_XXXX));231brw_set_default_access_mode(p, BRW_ALIGN_1);232233release_tmp(c, tmp);234release_tmp(c, v0_ndc_copy);235}236237/* Now we can iterate over each attribute238* (could be done in pairs?)239*/240for (slot = 0; slot < c->vue_map.num_slots; slot++) {241int varying = c->vue_map.slot_to_varying[slot];242GLuint delta = brw_vue_slot_to_offset(slot);243244/* HPOS, NDC already handled above */245if (varying == VARYING_SLOT_POS || varying == BRW_VARYING_SLOT_NDC)246continue;247248249if (varying == VARYING_SLOT_EDGE) {250if (force_edgeflag)251brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));252else253brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));254} else if (varying == VARYING_SLOT_PSIZ) {255/* PSIZ doesn't need interpolation because it isn't used by the256* fragment shader.257*/258} else if (varying < VARYING_SLOT_MAX) {259/* This is a true vertex result (and not a special value for the VUE260* header), so interpolate:261*262* New = attr0 + t*attr1 - t*attr0263*264* Unless the attribute is flat shaded -- in which case just copy265* from one of the sources (doesn't matter which; already copied from pv)266*/267GLuint interp = c->key.interp_mode[slot];268269if (interp != INTERP_MODE_FLAT) {270struct brw_reg tmp = get_tmp(c);271struct brw_reg t =272interp == INTERP_MODE_NOPERSPECTIVE ? t_nopersp : t0;273274brw_MUL(p,275vec4(brw_null_reg()),276deref_4f(v1_ptr, delta),277t);278279brw_MAC(p,280tmp,281negate(deref_4f(v0_ptr, delta)),282t);283284brw_ADD(p,285deref_4f(dest_ptr, delta),286deref_4f(v0_ptr, delta),287tmp);288289release_tmp(c, tmp);290}291else {292brw_MOV(p,293deref_4f(dest_ptr, delta),294deref_4f(v0_ptr, delta));295}296}297}298299if (c->vue_map.num_slots % 2) {300GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots);301302brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));303}304305if (c->key.contains_noperspective_varying)306release_tmp(c, t_nopersp);307}308309void brw_clip_emit_vue(struct brw_clip_compile *c,310struct brw_indirect vert,311enum brw_urb_write_flags flags,312GLuint header)313{314struct brw_codegen *p = &c->func;315bool allocate = flags & BRW_URB_WRITE_ALLOCATE;316317brw_clip_ff_sync(c);318319/* Any URB entry that is allocated must subsequently be used or discarded,320* so it doesn't make sense to mark EOT and ALLOCATE at the same time.321*/322assert(!(allocate && (flags & BRW_URB_WRITE_EOT)));323324/* Copy the vertex from vertn into m1..mN+1:325*/326brw_copy_from_indirect(p, brw_message_reg(1), vert, c->nr_regs);327328/* Overwrite PrimType and PrimStart in the message header, for329* each vertex in turn:330*/331brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));332333334/* Send each vertex as a separate write to the urb. This335* is different to the concept in brw_sf_emit.c, where336* subsequent writes are used to build up a single urb337* entry. Each of these writes instantiates a separate338* urb entry - (I think... what about 'allocate'?)339*/340brw_urb_WRITE(p,341allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),3420,343c->reg.R0,344flags,345c->nr_regs + 1, /* msg length */346allocate ? 1 : 0, /* response_length */3470, /* urb offset */348BRW_URB_SWIZZLE_NONE);349}350351352353void brw_clip_kill_thread(struct brw_clip_compile *c)354{355struct brw_codegen *p = &c->func;356357brw_clip_ff_sync(c);358/* Send an empty message to kill the thread and release any359* allocated urb entry:360*/361brw_urb_WRITE(p,362retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),3630,364c->reg.R0,365BRW_URB_WRITE_UNUSED | BRW_URB_WRITE_EOT_COMPLETE,3661, /* msg len */3670, /* response len */3680,369BRW_URB_SWIZZLE_NONE);370}371372373374375struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )376{377return brw_address(c->reg.fixed_planes);378}379380381struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c )382{383if (c->key.nr_userclip) {384return brw_imm_uw(16);385}386else {387return brw_imm_uw(4);388}389}390391392/* Distribute flatshaded attributes from provoking vertex prior to393* clipping.394*/395void brw_clip_copy_flatshaded_attributes( struct brw_clip_compile *c,396GLuint to, GLuint from )397{398struct brw_codegen *p = &c->func;399400for (int i = 0; i < c->vue_map.num_slots; i++) {401if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {402brw_MOV(p,403byte_offset(c->reg.vertex[to], brw_vue_slot_to_offset(i)),404byte_offset(c->reg.vertex[from], brw_vue_slot_to_offset(i)));405}406}407}408409410411void brw_clip_init_clipmask( struct brw_clip_compile *c )412{413struct brw_codegen *p = &c->func;414struct brw_reg incoming = get_element_ud(c->reg.R0, 2);415416/* Shift so that lowest outcode bit is rightmost:417*/418brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));419420if (c->key.nr_userclip) {421struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);422423/* Rearrange userclip outcodes so that they come directly after424* the fixed plane bits.425*/426if (p->devinfo->ver == 5 || p->devinfo->is_g4x)427brw_AND(p, tmp, incoming, brw_imm_ud(0xff<<14));428else429brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));430431brw_SHR(p, tmp, tmp, brw_imm_ud(8));432brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);433434release_tmp(c, tmp);435}436}437438void brw_clip_ff_sync(struct brw_clip_compile *c)439{440struct brw_codegen *p = &c->func;441442if (p->devinfo->ver == 5) {443brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));444brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);445brw_IF(p, BRW_EXECUTE_1);446{447brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));448brw_ff_sync(p,449c->reg.R0,4500,451c->reg.R0,4521, /* allocate */4531, /* response length */4540 /* eot */);455}456brw_ENDIF(p);457brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);458}459}460461void brw_clip_init_ff_sync(struct brw_clip_compile *c)462{463struct brw_codegen *p = &c->func;464465if (p->devinfo->ver == 5) {466brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));467}468}469470471