Path: blob/21.2-virgl/src/intel/compiler/brw_compile_sf.c
4550 views
/*1* Copyright © 2006 - 2017 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "brw_compiler.h"24#include "brw_eu.h"2526#include "dev/intel_debug.h"2728struct brw_sf_compile {29struct brw_codegen func;30struct brw_sf_prog_key key;31struct brw_sf_prog_data prog_data;3233struct brw_reg pv;34struct brw_reg det;35struct brw_reg dx0;36struct brw_reg dx2;37struct brw_reg dy0;38struct brw_reg dy2;3940/* z and 1/w passed in seperately:41*/42struct brw_reg z[3];43struct brw_reg inv_w[3];4445/* The vertices:46*/47struct brw_reg vert[3];4849/* Temporaries, allocated after last vertex reg.50*/51struct brw_reg inv_det;52struct brw_reg a1_sub_a0;53struct brw_reg a2_sub_a0;54struct brw_reg tmp;5556struct brw_reg m1Cx;57struct brw_reg m2Cy;58struct brw_reg m3C0;5960GLuint nr_verts;61GLuint nr_attr_regs;62GLuint nr_setup_regs;63int urb_entry_read_offset;6465/** The last known value of the f0.0 flag register. */66unsigned flag_value;6768struct brw_vue_map vue_map;69};7071/**72* Determine the vue slot corresponding to the given half of the given register.73*/74static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg,75int half)76{77return (reg + c->urb_entry_read_offset) * 2 + half;78}7980/**81* Determine the varying corresponding to the given half of the given82* register. half=0 means the first half of a register, half=1 means the83* second half.84*/85static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg,86int half)87{88int vue_slot = vert_reg_to_vue_slot(c, reg, half);89return c->vue_map.slot_to_varying[vue_slot];90}9192/**93* Determine the register corresponding to the given vue slot94*/95static struct brw_reg get_vue_slot(struct brw_sf_compile *c,96struct brw_reg vert,97int vue_slot)98{99GLuint off = vue_slot / 2 - c->urb_entry_read_offset;100GLuint sub = vue_slot % 2;101102return brw_vec4_grf(vert.nr + off, sub * 4);103}104105/**106* Determine the register corresponding to the given varying.107*/108static struct brw_reg get_varying(struct brw_sf_compile *c,109struct brw_reg vert,110GLuint varying)111{112int vue_slot = c->vue_map.varying_to_slot[varying];113assert (vue_slot >= c->urb_entry_read_offset);114return get_vue_slot(c, vert, vue_slot);115}116117static bool118have_attr(struct brw_sf_compile *c, GLuint attr)119{120return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;121}122123/***********************************************************************124* Twoside lighting125*/126static void copy_bfc( struct brw_sf_compile *c,127struct brw_reg vert )128{129struct brw_codegen *p = &c->func;130GLuint i;131132for (i = 0; i < 2; i++) {133if (have_attr(c, VARYING_SLOT_COL0+i) &&134have_attr(c, VARYING_SLOT_BFC0+i))135brw_MOV(p,136get_varying(c, vert, VARYING_SLOT_COL0+i),137get_varying(c, vert, VARYING_SLOT_BFC0+i));138}139}140141142static void do_twoside_color( struct brw_sf_compile *c )143{144struct brw_codegen *p = &c->func;145GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;146147/* Already done in clip program:148*/149if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)150return;151152/* If the vertex shader provides backface color, do the selection. The VS153* promises to set up the front color if the backface color is provided, but154* it may contain junk if never written to.155*/156if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&157!(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))158return;159160/* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order161* to get all channels active inside the IF. In the clipping code162* we run with NoMask, so it's not an option and we can use163* BRW_EXECUTE_1 for all comparisions.164*/165brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));166brw_IF(p, BRW_EXECUTE_4);167{168switch (c->nr_verts) {169case 3: copy_bfc(c, c->vert[2]); FALLTHROUGH;170case 2: copy_bfc(c, c->vert[1]); FALLTHROUGH;171case 1: copy_bfc(c, c->vert[0]);172}173}174brw_ENDIF(p);175}176177178179/***********************************************************************180* Flat shading181*/182183static void copy_flatshaded_attributes(struct brw_sf_compile *c,184struct brw_reg dst,185struct brw_reg src)186{187struct brw_codegen *p = &c->func;188int i;189190for (i = 0; i < c->vue_map.num_slots; i++) {191if (c->key.interp_mode[i] == INTERP_MODE_FLAT) {192brw_MOV(p,193get_vue_slot(c, dst, i),194get_vue_slot(c, src, i));195}196}197}198199static int count_flatshaded_attributes(struct brw_sf_compile *c)200{201int i;202int count = 0;203204for (i = 0; i < c->vue_map.num_slots; i++)205if (c->key.interp_mode[i] == INTERP_MODE_FLAT)206count++;207208return count;209}210211212213/* Need to use a computed jump to copy flatshaded attributes as the214* vertices are ordered according to y-coordinate before reaching this215* point, so the PV could be anywhere.216*/217static void do_flatshade_triangle( struct brw_sf_compile *c )218{219struct brw_codegen *p = &c->func;220GLuint nr;221GLuint jmpi = 1;222223/* Already done in clip program:224*/225if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)226return;227228if (p->devinfo->ver == 5)229jmpi = 2;230231nr = count_flatshaded_attributes(c);232233brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));234brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);235236copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);237copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);238brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE);239240copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);241copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);242brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE);243244copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);245copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);246}247248249static void do_flatshade_line( struct brw_sf_compile *c )250{251struct brw_codegen *p = &c->func;252GLuint nr;253GLuint jmpi = 1;254255/* Already done in clip program:256*/257if (c->key.primitive == BRW_SF_PRIM_UNFILLED_TRIS)258return;259260if (p->devinfo->ver == 5)261jmpi = 2;262263nr = count_flatshaded_attributes(c);264265brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));266brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);267copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);268269brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE);270copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);271}272273274/***********************************************************************275* Triangle setup.276*/277278279static void alloc_regs( struct brw_sf_compile *c )280{281GLuint reg, i;282283/* Values computed by fixed function unit:284*/285c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);286c->det = brw_vec1_grf(1, 2);287c->dx0 = brw_vec1_grf(1, 3);288c->dx2 = brw_vec1_grf(1, 4);289c->dy0 = brw_vec1_grf(1, 5);290c->dy2 = brw_vec1_grf(1, 6);291292/* z and 1/w passed in seperately:293*/294c->z[0] = brw_vec1_grf(2, 0);295c->inv_w[0] = brw_vec1_grf(2, 1);296c->z[1] = brw_vec1_grf(2, 2);297c->inv_w[1] = brw_vec1_grf(2, 3);298c->z[2] = brw_vec1_grf(2, 4);299c->inv_w[2] = brw_vec1_grf(2, 5);300301/* The vertices:302*/303reg = 3;304for (i = 0; i < c->nr_verts; i++) {305c->vert[i] = brw_vec8_grf(reg, 0);306reg += c->nr_attr_regs;307}308309/* Temporaries, allocated after last vertex reg.310*/311c->inv_det = brw_vec1_grf(reg, 0); reg++;312c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++;313c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++;314c->tmp = brw_vec8_grf(reg, 0); reg++;315316/* Note grf allocation:317*/318c->prog_data.total_grf = reg;319320321/* Outputs of this program - interpolation coefficients for322* rasterization:323*/324c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);325c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);326c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);327}328329330static void copy_z_inv_w( struct brw_sf_compile *c )331{332struct brw_codegen *p = &c->func;333GLuint i;334335/* Copy both scalars with a single MOV:336*/337for (i = 0; i < c->nr_verts; i++)338brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));339}340341342static void invert_det( struct brw_sf_compile *c)343{344/* Looks like we invert all 8 elements just to get 1/det in345* position 2 !?!346*/347gfx4_math(&c->func,348c->inv_det,349BRW_MATH_FUNCTION_INV,3500,351c->det,352BRW_MATH_PRECISION_FULL);353354}355356357static bool358calculate_masks(struct brw_sf_compile *c,359GLuint reg,360GLushort *pc,361GLushort *pc_persp,362GLushort *pc_linear)363{364bool is_last_attr = (reg == c->nr_setup_regs - 1);365enum glsl_interp_mode interp;366367*pc_persp = 0;368*pc_linear = 0;369*pc = 0xf;370371interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 0)];372if (interp == INTERP_MODE_SMOOTH) {373*pc_linear = 0xf;374*pc_persp = 0xf;375} else if (interp == INTERP_MODE_NOPERSPECTIVE)376*pc_linear = 0xf;377378/* Maybe only processs one attribute on the final round:379*/380if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) {381*pc |= 0xf0;382383interp = c->key.interp_mode[vert_reg_to_vue_slot(c, reg, 1)];384if (interp == INTERP_MODE_SMOOTH) {385*pc_linear |= 0xf0;386*pc_persp |= 0xf0;387} else if (interp == INTERP_MODE_NOPERSPECTIVE)388*pc_linear |= 0xf0;389}390391return is_last_attr;392}393394/* Calculates the predicate control for which channels of a reg395* (containing 2 attrs) to do point sprite coordinate replacement on.396*/397static uint16_t398calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)399{400int varying1, varying2;401uint16_t pc = 0;402403varying1 = vert_reg_to_varying(c, reg, 0);404if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {405if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))406pc |= 0x0f;407}408if (varying1 == BRW_VARYING_SLOT_PNTC)409pc |= 0x0f;410411varying2 = vert_reg_to_varying(c, reg, 1);412if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {413if (c->key.point_sprite_coord_replace & (1 << (varying2 -414VARYING_SLOT_TEX0)))415pc |= 0xf0;416}417if (varying2 == BRW_VARYING_SLOT_PNTC)418pc |= 0xf0;419420return pc;421}422423static void424set_predicate_control_flag_value(struct brw_codegen *p,425struct brw_sf_compile *c,426unsigned value)427{428brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);429430if (value != 0xff) {431if (value != c->flag_value) {432brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));433c->flag_value = value;434}435436brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);437}438}439440static void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)441{442struct brw_codegen *p = &c->func;443GLuint i;444445c->flag_value = 0xff;446c->nr_verts = 3;447448if (allocate)449alloc_regs(c);450451invert_det(c);452copy_z_inv_w(c);453454if (c->key.do_twoside_color)455do_twoside_color(c);456457if (c->key.contains_flat_varying)458do_flatshade_triangle(c);459460461for (i = 0; i < c->nr_setup_regs; i++)462{463/* Pair of incoming attributes:464*/465struct brw_reg a0 = offset(c->vert[0], i);466struct brw_reg a1 = offset(c->vert[1], i);467struct brw_reg a2 = offset(c->vert[2], i);468GLushort pc, pc_persp, pc_linear;469bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);470471if (pc_persp)472{473set_predicate_control_flag_value(p, c, pc_persp);474brw_MUL(p, a0, a0, c->inv_w[0]);475brw_MUL(p, a1, a1, c->inv_w[1]);476brw_MUL(p, a2, a2, c->inv_w[2]);477}478479480/* Calculate coefficients for interpolated values:481*/482if (pc_linear)483{484set_predicate_control_flag_value(p, c, pc_linear);485486brw_ADD(p, c->a1_sub_a0, a1, negate(a0));487brw_ADD(p, c->a2_sub_a0, a2, negate(a0));488489/* calculate dA/dx490*/491brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);492brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));493brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);494495/* calculate dA/dy496*/497brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);498brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));499brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);500}501502{503set_predicate_control_flag_value(p, c, pc);504/* start point for interpolation505*/506brw_MOV(p, c->m3C0, a0);507508/* Copy m0..m3 to URB. m0 is implicitly copied from r0 in509* the send instruction:510*/511brw_urb_WRITE(p,512brw_null_reg(),5130,514brw_vec8_grf(0, 0), /* r0, will be copied to m0 */515last ? BRW_URB_WRITE_EOT_COMPLETE516: BRW_URB_WRITE_NO_FLAGS,5174, /* msg len */5180, /* response len */519i*4, /* offset */520BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */521}522}523524brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);525}526527528529static void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)530{531struct brw_codegen *p = &c->func;532GLuint i;533534c->flag_value = 0xff;535c->nr_verts = 2;536537if (allocate)538alloc_regs(c);539540invert_det(c);541copy_z_inv_w(c);542543if (c->key.contains_flat_varying)544do_flatshade_line(c);545546for (i = 0; i < c->nr_setup_regs; i++)547{548/* Pair of incoming attributes:549*/550struct brw_reg a0 = offset(c->vert[0], i);551struct brw_reg a1 = offset(c->vert[1], i);552GLushort pc, pc_persp, pc_linear;553bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);554555if (pc_persp)556{557set_predicate_control_flag_value(p, c, pc_persp);558brw_MUL(p, a0, a0, c->inv_w[0]);559brw_MUL(p, a1, a1, c->inv_w[1]);560}561562/* Calculate coefficients for position, color:563*/564if (pc_linear) {565set_predicate_control_flag_value(p, c, pc_linear);566567brw_ADD(p, c->a1_sub_a0, a1, negate(a0));568569brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);570brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);571572brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);573brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);574}575576{577set_predicate_control_flag_value(p, c, pc);578579/* start point for interpolation580*/581brw_MOV(p, c->m3C0, a0);582583/* Copy m0..m3 to URB.584*/585brw_urb_WRITE(p,586brw_null_reg(),5870,588brw_vec8_grf(0, 0),589last ? BRW_URB_WRITE_EOT_COMPLETE590: BRW_URB_WRITE_NO_FLAGS,5914, /* msg len */5920, /* response len */593i*4, /* urb destination offset */594BRW_URB_SWIZZLE_TRANSPOSE);595}596}597598brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);599}600601static void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)602{603struct brw_codegen *p = &c->func;604GLuint i;605606c->flag_value = 0xff;607c->nr_verts = 1;608609if (allocate)610alloc_regs(c);611612copy_z_inv_w(c);613for (i = 0; i < c->nr_setup_regs; i++)614{615struct brw_reg a0 = offset(c->vert[0], i);616GLushort pc, pc_persp, pc_linear, pc_coord_replace;617bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);618619pc_coord_replace = calculate_point_sprite_mask(c, i);620pc_persp &= ~pc_coord_replace;621622if (pc_persp) {623set_predicate_control_flag_value(p, c, pc_persp);624brw_MUL(p, a0, a0, c->inv_w[0]);625}626627/* Point sprite coordinate replacement: A texcoord with this628* enabled gets replaced with the value (x, y, 0, 1) where x and629* y vary from 0 to 1 across the horizontal and vertical of the630* point.631*/632if (pc_coord_replace) {633set_predicate_control_flag_value(p, c, pc_coord_replace);634/* Caculate 1.0/PointWidth */635gfx4_math(&c->func,636c->tmp,637BRW_MATH_FUNCTION_INV,6380,639c->dx0,640BRW_MATH_PRECISION_FULL);641642brw_set_default_access_mode(p, BRW_ALIGN_16);643644/* dA/dx, dA/dy */645brw_MOV(p, c->m1Cx, brw_imm_f(0.0));646brw_MOV(p, c->m2Cy, brw_imm_f(0.0));647brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);648if (c->key.sprite_origin_lower_left) {649brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));650} else {651brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);652}653654/* attribute constant offset */655brw_MOV(p, c->m3C0, brw_imm_f(0.0));656if (c->key.sprite_origin_lower_left) {657brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));658} else {659brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));660}661662brw_set_default_access_mode(p, BRW_ALIGN_1);663}664665if (pc & ~pc_coord_replace) {666set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);667brw_MOV(p, c->m1Cx, brw_imm_ud(0));668brw_MOV(p, c->m2Cy, brw_imm_ud(0));669brw_MOV(p, c->m3C0, a0); /* constant value */670}671672673set_predicate_control_flag_value(p, c, pc);674/* Copy m0..m3 to URB. */675brw_urb_WRITE(p,676brw_null_reg(),6770,678brw_vec8_grf(0, 0),679last ? BRW_URB_WRITE_EOT_COMPLETE680: BRW_URB_WRITE_NO_FLAGS,6814, /* msg len */6820, /* response len */683i*4, /* urb destination offset */684BRW_URB_SWIZZLE_TRANSPOSE);685}686687brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);688}689690/* Points setup - several simplifications as all attributes are691* constant across the face of the point (point sprites excluded!)692*/693static void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)694{695struct brw_codegen *p = &c->func;696GLuint i;697698c->flag_value = 0xff;699c->nr_verts = 1;700701if (allocate)702alloc_regs(c);703704copy_z_inv_w(c);705706brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */707brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */708709for (i = 0; i < c->nr_setup_regs; i++)710{711struct brw_reg a0 = offset(c->vert[0], i);712GLushort pc, pc_persp, pc_linear;713bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);714715if (pc_persp)716{717/* This seems odd as the values are all constant, but the718* fragment shader will be expecting it:719*/720set_predicate_control_flag_value(p, c, pc_persp);721brw_MUL(p, a0, a0, c->inv_w[0]);722}723724725/* The delta values are always zero, just send the starting726* coordinate. Again, this is to fit in with the interpolation727* code in the fragment shader.728*/729{730set_predicate_control_flag_value(p, c, pc);731732brw_MOV(p, c->m3C0, a0); /* constant value */733734/* Copy m0..m3 to URB.735*/736brw_urb_WRITE(p,737brw_null_reg(),7380,739brw_vec8_grf(0, 0),740last ? BRW_URB_WRITE_EOT_COMPLETE741: BRW_URB_WRITE_NO_FLAGS,7424, /* msg len */7430, /* response len */744i*4, /* urb destination offset */745BRW_URB_SWIZZLE_TRANSPOSE);746}747}748749brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);750}751752static void brw_emit_anyprim_setup( struct brw_sf_compile *c )753{754struct brw_codegen *p = &c->func;755struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);756struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);757struct brw_reg primmask;758int jmp;759struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));760761c->nr_verts = 3;762alloc_regs(c);763764primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);765766brw_MOV(p, primmask, brw_imm_ud(1));767brw_SHL(p, primmask, primmask, payload_prim);768769brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |770(1<<_3DPRIM_TRISTRIP) |771(1<<_3DPRIM_TRIFAN) |772(1<<_3DPRIM_TRISTRIP_REVERSE) |773(1<<_3DPRIM_POLYGON) |774(1<<_3DPRIM_RECTLIST) |775(1<<_3DPRIM_TRIFAN_NOSTIPPLE)));776brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);777jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;778brw_emit_tri_setup(c, false);779brw_land_fwd_jump(p, jmp);780781brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |782(1<<_3DPRIM_LINESTRIP) |783(1<<_3DPRIM_LINELOOP) |784(1<<_3DPRIM_LINESTRIP_CONT) |785(1<<_3DPRIM_LINESTRIP_BF) |786(1<<_3DPRIM_LINESTRIP_CONT_BF)));787brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);788jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;789brw_emit_line_setup(c, false);790brw_land_fwd_jump(p, jmp);791792brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));793brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);794jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;795brw_emit_point_sprite_setup(c, false);796brw_land_fwd_jump(p, jmp);797798brw_emit_point_setup( c, false );799}800801const unsigned *802brw_compile_sf(const struct brw_compiler *compiler,803void *mem_ctx,804const struct brw_sf_prog_key *key,805struct brw_sf_prog_data *prog_data,806struct brw_vue_map *vue_map,807unsigned *final_assembly_size)808{809struct brw_sf_compile c;810memset(&c, 0, sizeof(c));811812/* Begin the compilation:813*/814brw_init_codegen(compiler->devinfo, &c.func, mem_ctx);815816c.key = *key;817c.vue_map = *vue_map;818if (c.key.do_point_coord) {819/*820* gl_PointCoord is a FS instead of VS builtin variable, thus it's821* not included in c.vue_map generated in VS stage. Here we add822* it manually to let SF shader generate the needed interpolation823* coefficient for FS shader.824*/825c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;826c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;827}828c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;829c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;830c.nr_setup_regs = c.nr_attr_regs;831832c.prog_data.urb_read_length = c.nr_attr_regs;833c.prog_data.urb_entry_size = c.nr_setup_regs * 2;834835/* Which primitive? Or all three?836*/837switch (key->primitive) {838case BRW_SF_PRIM_TRIANGLES:839c.nr_verts = 3;840brw_emit_tri_setup( &c, true );841break;842case BRW_SF_PRIM_LINES:843c.nr_verts = 2;844brw_emit_line_setup( &c, true );845break;846case BRW_SF_PRIM_POINTS:847c.nr_verts = 1;848if (key->do_point_sprite)849brw_emit_point_sprite_setup( &c, true );850else851brw_emit_point_setup( &c, true );852break;853case BRW_SF_PRIM_UNFILLED_TRIS:854c.nr_verts = 3;855brw_emit_anyprim_setup( &c );856break;857default:858unreachable("not reached");859}860861/* FINISHME: SF programs use calculated jumps (i.e., JMPI with a register862* source). Compacting would be difficult.863*/864/* brw_compact_instructions(&c.func, 0, 0, NULL); */865866*prog_data = c.prog_data;867868const unsigned *program = brw_get_program(&c.func, final_assembly_size);869870if (INTEL_DEBUG & DEBUG_SF) {871fprintf(stderr, "sf:\n");872brw_disassemble_with_labels(compiler->devinfo,873program, 0, *final_assembly_size, stderr);874fprintf(stderr, "\n");875}876877return program;878}879880881