Path: blob/21.2-virgl/src/intel/compiler/brw_fs.h
4550 views
/*1* Copyright © 2010 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*22* Authors:23* Eric Anholt <[email protected]>24*25*/2627#ifndef BRW_FS_H28#define BRW_FS_H2930#include "brw_shader.h"31#include "brw_ir_fs.h"32#include "brw_fs_builder.h"33#include "brw_fs_live_variables.h"34#include "brw_ir_performance.h"35#include "compiler/nir/nir.h"3637struct bblock_t;38namespace {39struct acp_entry;40}4142class fs_visitor;4344namespace brw {45/**46* Register pressure analysis of a shader. Estimates how many registers47* are live at any point of the program in GRF units.48*/49struct register_pressure {50register_pressure(const fs_visitor *v);51~register_pressure();5253analysis_dependency_class54dependency_class() const55{56return (DEPENDENCY_INSTRUCTION_IDENTITY |57DEPENDENCY_INSTRUCTION_DATA_FLOW |58DEPENDENCY_VARIABLES);59}6061bool62validate(const fs_visitor *) const63{64/* FINISHME */65return true;66}6768unsigned *regs_live_at_ip;69};70}7172struct brw_gs_compile;7374static inline fs_reg75offset(const fs_reg ®, const brw::fs_builder &bld, unsigned delta)76{77return offset(reg, bld.dispatch_width(), delta);78}7980struct shader_stats {81const char *scheduler_mode;82unsigned promoted_constants;83};8485/**86* The fragment shader front-end.87*88* Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.89*/90class fs_visitor : public backend_shader91{92public:93fs_visitor(const struct brw_compiler *compiler, void *log_data,94void *mem_ctx,95const brw_base_prog_key *key,96struct brw_stage_prog_data *prog_data,97const nir_shader *shader,98unsigned dispatch_width,99int shader_time_index,100bool debug_enabled);101fs_visitor(const struct brw_compiler *compiler, void *log_data,102void *mem_ctx,103struct brw_gs_compile *gs_compile,104struct brw_gs_prog_data *prog_data,105const nir_shader *shader,106int shader_time_index,107bool debug_enabled);108void init();109~fs_visitor();110111fs_reg vgrf(const glsl_type *const type);112void import_uniforms(fs_visitor *v);113114void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,115const fs_reg &dst,116const fs_reg &surf_index,117const fs_reg &varying_offset,118uint32_t const_offset,119uint8_t alignment);120void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);121122bool run_fs(bool allow_spilling, bool do_rep_send);123bool run_vs();124bool run_tcs();125bool run_tes();126bool run_gs();127bool run_cs(bool allow_spilling);128bool run_bs(bool allow_spilling);129void optimize();130void allocate_registers(bool allow_spilling);131void setup_fs_payload_gfx4();132void setup_fs_payload_gfx6();133void setup_vs_payload();134void setup_gs_payload();135void setup_cs_payload();136bool fixup_sends_duplicate_payload();137void fixup_3src_null_dest();138bool fixup_nomask_control_flow();139void assign_curb_setup();140void assign_urb_setup();141void convert_attr_sources_to_hw_regs(fs_inst *inst);142void assign_vs_urb_setup();143void assign_tcs_urb_setup();144void assign_tes_urb_setup();145void assign_gs_urb_setup();146bool assign_regs(bool allow_spilling, bool spill_all);147void assign_regs_trivial();148void calculate_payload_ranges(int payload_node_count,149int *payload_last_use_ip) const;150void split_virtual_grfs();151bool compact_virtual_grfs();152void assign_constant_locations();153bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index,154unsigned *out_pull_index);155void lower_constant_loads();156virtual void invalidate_analysis(brw::analysis_dependency_class c);157void validate();158bool opt_algebraic();159bool opt_redundant_halt();160bool opt_cse();161bool opt_cse_local(const brw::fs_live_variables &live, bblock_t *block, int &ip);162163bool opt_copy_propagation();164bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);165bool try_constant_propagate(fs_inst *inst, acp_entry *entry);166bool opt_copy_propagation_local(void *mem_ctx, bblock_t *block,167exec_list *acp);168bool opt_drop_redundant_mov_to_flags();169bool opt_register_renaming();170bool opt_bank_conflicts();171bool register_coalesce();172bool compute_to_mrf();173bool eliminate_find_live_channel();174bool dead_code_eliminate();175bool remove_duplicate_mrf_writes();176bool remove_extra_rounding_modes();177178void schedule_instructions(instruction_scheduler_mode mode);179void insert_gfx4_send_dependency_workarounds();180void insert_gfx4_pre_send_dependency_workarounds(bblock_t *block,181fs_inst *inst);182void insert_gfx4_post_send_dependency_workarounds(bblock_t *block,183fs_inst *inst);184void vfail(const char *msg, va_list args);185void fail(const char *msg, ...);186void limit_dispatch_width(unsigned n, const char *msg);187void lower_uniform_pull_constant_loads();188bool lower_load_payload();189bool lower_pack();190bool lower_regioning();191bool lower_logical_sends();192bool lower_integer_multiplication();193bool lower_minmax();194bool lower_simd_width();195bool lower_barycentrics();196bool lower_derivatives();197bool lower_scoreboard();198bool lower_sub_sat();199bool opt_combine_constants();200201void emit_dummy_fs();202void emit_repclear_shader();203void emit_fragcoord_interpolation(fs_reg wpos);204fs_reg *emit_frontfacing_interpolation();205fs_reg *emit_samplepos_setup();206fs_reg *emit_sampleid_setup();207fs_reg *emit_samplemaskin_setup();208fs_reg *emit_shading_rate_setup();209void emit_interpolation_setup_gfx4();210void emit_interpolation_setup_gfx6();211void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);212fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,213const fs_reg &texture,214const fs_reg &texture_handle);215void emit_gfx6_gather_wa(uint8_t wa, fs_reg dst);216fs_reg resolve_source_modifiers(const fs_reg &src);217void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr,218fs_reg result, fs_reg *op, unsigned fsign_src);219void emit_shader_float_controls_execution_mode();220bool opt_peephole_sel();221bool opt_peephole_predicated_break();222bool opt_saturate_propagation();223bool opt_cmod_propagation();224bool opt_zero_samples();225226void set_tcs_invocation_id();227228void emit_nir_code();229void nir_setup_outputs();230void nir_setup_uniforms();231void nir_emit_system_values();232void nir_emit_impl(nir_function_impl *impl);233void nir_emit_cf_list(exec_list *list);234void nir_emit_if(nir_if *if_stmt);235void nir_emit_loop(nir_loop *loop);236void nir_emit_block(nir_block *block);237void nir_emit_instr(nir_instr *instr);238void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr,239bool need_dest);240bool try_emit_b2fi_of_inot(const brw::fs_builder &bld, fs_reg result,241nir_alu_instr *instr);242void nir_emit_load_const(const brw::fs_builder &bld,243nir_load_const_instr *instr);244void nir_emit_vs_intrinsic(const brw::fs_builder &bld,245nir_intrinsic_instr *instr);246void nir_emit_tcs_intrinsic(const brw::fs_builder &bld,247nir_intrinsic_instr *instr);248void nir_emit_gs_intrinsic(const brw::fs_builder &bld,249nir_intrinsic_instr *instr);250void nir_emit_fs_intrinsic(const brw::fs_builder &bld,251nir_intrinsic_instr *instr);252void nir_emit_cs_intrinsic(const brw::fs_builder &bld,253nir_intrinsic_instr *instr);254void nir_emit_bs_intrinsic(const brw::fs_builder &bld,255nir_intrinsic_instr *instr);256fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,257nir_intrinsic_instr *instr);258fs_reg get_nir_ssbo_intrinsic_index(const brw::fs_builder &bld,259nir_intrinsic_instr *instr);260fs_reg swizzle_nir_scratch_addr(const brw::fs_builder &bld,261const fs_reg &addr,262bool in_dwords);263void nir_emit_intrinsic(const brw::fs_builder &bld,264nir_intrinsic_instr *instr);265void nir_emit_tes_intrinsic(const brw::fs_builder &bld,266nir_intrinsic_instr *instr);267void nir_emit_ssbo_atomic(const brw::fs_builder &bld,268int op, nir_intrinsic_instr *instr);269void nir_emit_ssbo_atomic_float(const brw::fs_builder &bld,270int op, nir_intrinsic_instr *instr);271void nir_emit_shared_atomic(const brw::fs_builder &bld,272int op, nir_intrinsic_instr *instr);273void nir_emit_shared_atomic_float(const brw::fs_builder &bld,274int op, nir_intrinsic_instr *instr);275void nir_emit_global_atomic(const brw::fs_builder &bld,276int op, nir_intrinsic_instr *instr);277void nir_emit_global_atomic_float(const brw::fs_builder &bld,278int op, nir_intrinsic_instr *instr);279void nir_emit_texture(const brw::fs_builder &bld,280nir_tex_instr *instr);281void nir_emit_jump(const brw::fs_builder &bld,282nir_jump_instr *instr);283fs_reg get_nir_src(const nir_src &src);284fs_reg get_nir_src_imm(const nir_src &src);285fs_reg get_nir_dest(const nir_dest &dest);286fs_reg get_indirect_offset(nir_intrinsic_instr *instr);287fs_reg get_tcs_single_patch_icp_handle(const brw::fs_builder &bld,288nir_intrinsic_instr *instr);289fs_reg get_tcs_eight_patch_icp_handle(const brw::fs_builder &bld,290nir_intrinsic_instr *instr);291struct brw_reg get_tcs_output_urb_handle();292293void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,294unsigned wr_mask);295296bool optimize_extract_to_float(nir_alu_instr *instr,297const fs_reg &result);298bool optimize_frontfacing_ternary(nir_alu_instr *instr,299const fs_reg &result);300301void emit_alpha_test();302fs_inst *emit_single_fb_write(const brw::fs_builder &bld,303fs_reg color1, fs_reg color2,304fs_reg src0_alpha, unsigned components);305void emit_alpha_to_coverage_workaround(const fs_reg &src0_alpha);306void emit_fb_writes();307fs_inst *emit_non_coherent_fb_read(const brw::fs_builder &bld,308const fs_reg &dst, unsigned target);309void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg());310void set_gs_stream_control_data_bits(const fs_reg &vertex_count,311unsigned stream_id);312void emit_gs_control_data_bits(const fs_reg &vertex_count);313void emit_gs_end_primitive(const nir_src &vertex_count_nir_src);314void emit_gs_vertex(const nir_src &vertex_count_nir_src,315unsigned stream_id);316void emit_gs_thread_end();317void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,318unsigned base_offset, const nir_src &offset_src,319unsigned num_components, unsigned first_component);320void emit_cs_terminate();321fs_reg *emit_cs_work_group_id_setup();322323void emit_barrier();324325void emit_shader_time_begin();326void emit_shader_time_end();327void SHADER_TIME_ADD(const brw::fs_builder &bld,328int shader_time_subindex,329fs_reg value);330331fs_reg get_timestamp(const brw::fs_builder &bld);332333fs_reg interp_reg(int location, int channel);334335virtual void dump_instructions() const;336virtual void dump_instructions(const char *name) const;337void dump_instruction(const backend_instruction *inst) const;338void dump_instruction(const backend_instruction *inst, FILE *file) const;339340const brw_base_prog_key *const key;341const struct brw_sampler_prog_key_data *key_tex;342343struct brw_gs_compile *gs_compile;344345struct brw_stage_prog_data *prog_data;346347brw_analysis<brw::fs_live_variables, backend_shader> live_analysis;348brw_analysis<brw::register_pressure, fs_visitor> regpressure_analysis;349brw_analysis<brw::performance, fs_visitor> performance_analysis;350351/** Number of uniform variable components visited. */352unsigned uniforms;353354/** Byte-offset for the next available spot in the scratch space buffer. */355unsigned last_scratch;356357/**358* Array mapping UNIFORM register numbers to the pull parameter index,359* or -1 if this uniform register isn't being uploaded as a pull constant.360*/361int *pull_constant_loc;362363/**364* Array mapping UNIFORM register numbers to the push parameter index,365* or -1 if this uniform register isn't being uploaded as a push constant.366*/367int *push_constant_loc;368369fs_reg subgroup_id;370fs_reg group_size[3];371fs_reg scratch_base;372fs_reg frag_depth;373fs_reg frag_stencil;374fs_reg sample_mask;375fs_reg outputs[VARYING_SLOT_MAX];376fs_reg dual_src_output;377int first_non_payload_grf;378/** Either BRW_MAX_GRF or GFX7_MRF_HACK_START */379unsigned max_grf;380381fs_reg *nir_locals;382fs_reg *nir_ssa_values;383fs_reg *nir_system_values;384385bool failed;386char *fail_msg;387388/** Register numbers for thread payload fields. */389struct thread_payload {390uint8_t subspan_coord_reg[2];391uint8_t source_depth_reg[2];392uint8_t source_w_reg[2];393uint8_t aa_dest_stencil_reg[2];394uint8_t dest_depth_reg[2];395uint8_t sample_pos_reg[2];396uint8_t sample_mask_in_reg[2];397uint8_t depth_w_coef_reg[2];398uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT][2];399uint8_t local_invocation_id_reg[2];400401/** The number of thread payload registers the hardware will supply. */402uint8_t num_regs;403} payload;404405bool source_depth_to_render_target;406bool runtime_check_aads_emit;407408fs_reg pixel_x;409fs_reg pixel_y;410fs_reg pixel_z;411fs_reg wpos_w;412fs_reg pixel_w;413fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];414fs_reg shader_start_time;415fs_reg final_gs_vertex_count;416fs_reg control_data_bits;417fs_reg invocation_id;418419unsigned grf_used;420bool spilled_any_registers;421422const unsigned dispatch_width; /**< 8, 16 or 32 */423unsigned max_dispatch_width;424425int shader_time_index;426427struct shader_stats shader_stats;428429brw::fs_builder bld;430431private:432fs_reg prepare_alu_destination_and_sources(const brw::fs_builder &bld,433nir_alu_instr *instr,434fs_reg *op,435bool need_dest);436437void resolve_inot_sources(const brw::fs_builder &bld, nir_alu_instr *instr,438fs_reg *op);439void lower_mul_dword_inst(fs_inst *inst, bblock_t *block);440void lower_mul_qword_inst(fs_inst *inst, bblock_t *block);441void lower_mulh_inst(fs_inst *inst, bblock_t *block);442443unsigned workgroup_size() const;444};445446/**447* Return the flag register used in fragment shaders to keep track of live448* samples. On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32449* dispatch mode, while earlier generations are constrained to f0.1, which450* limits the dispatch width to SIMD16 for fragment shaders that use discard.451*/452static inline unsigned453sample_mask_flag_subreg(const fs_visitor *shader)454{455assert(shader->stage == MESA_SHADER_FRAGMENT);456return shader->devinfo->ver >= 7 ? 2 : 1;457}458459/**460* The fragment shader code generator.461*462* Translates FS IR to actual i965 assembly code.463*/464class fs_generator465{466public:467fs_generator(const struct brw_compiler *compiler, void *log_data,468void *mem_ctx,469struct brw_stage_prog_data *prog_data,470bool runtime_check_aads_emit,471gl_shader_stage stage);472~fs_generator();473474void enable_debug(const char *shader_name);475int generate_code(const cfg_t *cfg, int dispatch_width,476struct shader_stats shader_stats,477const brw::performance &perf,478struct brw_compile_stats *stats);479void add_const_data(void *data, unsigned size);480void add_resume_sbt(unsigned num_resume_shaders, uint64_t *sbt);481const unsigned *get_assembly();482483private:484void fire_fb_write(fs_inst *inst,485struct brw_reg payload,486struct brw_reg implied_header,487GLuint nr);488void generate_send(fs_inst *inst,489struct brw_reg dst,490struct brw_reg desc,491struct brw_reg ex_desc,492struct brw_reg payload,493struct brw_reg payload2);494void generate_fb_write(fs_inst *inst, struct brw_reg payload);495void generate_fb_read(fs_inst *inst, struct brw_reg dst,496struct brw_reg payload);497void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);498void generate_urb_write(fs_inst *inst, struct brw_reg payload);499void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);500void generate_barrier(fs_inst *inst, struct brw_reg src);501bool generate_linterp(fs_inst *inst, struct brw_reg dst,502struct brw_reg *src);503void generate_tex(fs_inst *inst, struct brw_reg dst,504struct brw_reg surface_index,505struct brw_reg sampler_index);506void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,507struct brw_reg src,508struct brw_reg surf_index);509void generate_ddx(const fs_inst *inst,510struct brw_reg dst, struct brw_reg src);511void generate_ddy(const fs_inst *inst,512struct brw_reg dst, struct brw_reg src);513void generate_scratch_write(fs_inst *inst, struct brw_reg src);514void generate_scratch_read(fs_inst *inst, struct brw_reg dst);515void generate_scratch_read_gfx7(fs_inst *inst, struct brw_reg dst);516void generate_scratch_header(fs_inst *inst, struct brw_reg dst);517void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,518struct brw_reg index,519struct brw_reg offset);520void generate_uniform_pull_constant_load_gfx7(fs_inst *inst,521struct brw_reg dst,522struct brw_reg surf_index,523struct brw_reg payload);524void generate_varying_pull_constant_load_gfx4(fs_inst *inst,525struct brw_reg dst,526struct brw_reg index);527void generate_mov_dispatch_to_flags(fs_inst *inst);528529void generate_pixel_interpolator_query(fs_inst *inst,530struct brw_reg dst,531struct brw_reg src,532struct brw_reg msg_data,533unsigned msg_type);534535void generate_set_sample_id(fs_inst *inst,536struct brw_reg dst,537struct brw_reg src0,538struct brw_reg src1);539540void generate_halt(fs_inst *inst);541542void generate_pack_half_2x16_split(fs_inst *inst,543struct brw_reg dst,544struct brw_reg x,545struct brw_reg y);546547void generate_shader_time_add(fs_inst *inst,548struct brw_reg payload,549struct brw_reg offset,550struct brw_reg value);551552void generate_mov_indirect(fs_inst *inst,553struct brw_reg dst,554struct brw_reg reg,555struct brw_reg indirect_byte_offset);556557void generate_shuffle(fs_inst *inst,558struct brw_reg dst,559struct brw_reg src,560struct brw_reg idx);561562void generate_quad_swizzle(const fs_inst *inst,563struct brw_reg dst, struct brw_reg src,564unsigned swiz);565566bool patch_halt_jumps();567568const struct brw_compiler *compiler;569void *log_data; /* Passed to compiler->*_log functions */570571const struct intel_device_info *devinfo;572573struct brw_codegen *p;574struct brw_stage_prog_data * const prog_data;575576unsigned dispatch_width; /**< 8, 16 or 32 */577578exec_list discard_halt_patches;579bool runtime_check_aads_emit;580bool debug_flag;581const char *shader_name;582gl_shader_stage stage;583void *mem_ctx;584};585586namespace brw {587inline fs_reg588fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],589brw_reg_type type = BRW_REGISTER_TYPE_F)590{591if (!regs[0])592return fs_reg();593594if (bld.dispatch_width() > 16) {595const fs_reg tmp = bld.vgrf(type);596const brw::fs_builder hbld = bld.exec_all().group(16, 0);597const unsigned m = bld.dispatch_width() / hbld.dispatch_width();598fs_reg components[2];599assert(m <= 2);600601for (unsigned g = 0; g < m; g++)602components[g] = retype(brw_vec8_grf(regs[g], 0), type);603604hbld.LOAD_PAYLOAD(tmp, components, m, 0);605606return tmp;607608} else {609return fs_reg(retype(brw_vec8_grf(regs[0], 0), type));610}611}612613inline fs_reg614fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2])615{616if (!regs[0])617return fs_reg();618619const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2);620const brw::fs_builder hbld = bld.exec_all().group(8, 0);621const unsigned m = bld.dispatch_width() / hbld.dispatch_width();622fs_reg *const components = new fs_reg[2 * m];623624for (unsigned c = 0; c < 2; c++) {625for (unsigned g = 0; g < m; g++)626components[c * m + g] = offset(brw_vec8_grf(regs[g / 2], 0),627hbld, c + 2 * (g % 2));628}629630hbld.LOAD_PAYLOAD(tmp, components, 2 * m, 0);631632delete[] components;633return tmp;634}635636bool637lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);638}639640void shuffle_from_32bit_read(const brw::fs_builder &bld,641const fs_reg &dst,642const fs_reg &src,643uint32_t first_component,644uint32_t components);645646fs_reg setup_imm_df(const brw::fs_builder &bld,647double v);648649fs_reg setup_imm_b(const brw::fs_builder &bld,650int8_t v);651652fs_reg setup_imm_ub(const brw::fs_builder &bld,653uint8_t v);654655enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode,656nir_intrinsic_op op);657658uint32_t brw_fb_write_msg_control(const fs_inst *inst,659const struct brw_wm_prog_data *prog_data);660661void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);662663#endif /* BRW_FS_H */664665666