Path: blob/21.2-virgl/src/gallium/auxiliary/tgsi/tgsi_exec.h
4565 views
/**************************************************************************1*2* Copyright 2007-2008 VMware, Inc.3* All Rights Reserved.4* Copyright 2009-2010 VMware, Inc. All rights Reserved.5*6* Permission is hereby granted, free of charge, to any person obtaining a7* copy of this software and associated documentation files (the8* "Software"), to deal in the Software without restriction, including9* without limitation the rights to use, copy, modify, merge, publish,10* distribute, sub license, and/or sell copies of the Software, and to11* permit persons to whom the Software is furnished to do so, subject to12* the following conditions:13*14* The above copyright notice and this permission notice (including the15* next paragraph) shall be included in all copies or substantial portions16* of the Software.17*18* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS19* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF20* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.21* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR22* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,23* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE24* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.25*26**************************************************************************/2728#ifndef TGSI_EXEC_H29#define TGSI_EXEC_H3031#include "pipe/p_compiler.h"32#include "pipe/p_state.h"33#include "pipe/p_shader_tokens.h"3435#if defined __cplusplus36extern "C" {37#endif3839#define TGSI_CHAN_X 040#define TGSI_CHAN_Y 141#define TGSI_CHAN_Z 242#define TGSI_CHAN_W 34344#define TGSI_NUM_CHANNELS 4 /* R,G,B,A */45#define TGSI_QUAD_SIZE 4 /* 4 pixel/quad */4647#define TGSI_FOR_EACH_CHANNEL( CHAN )\48for (CHAN = 0; CHAN < TGSI_NUM_CHANNELS; CHAN++)4950#define TGSI_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\51((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))5253#define TGSI_IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\54if (TGSI_IS_DST0_CHANNEL_ENABLED( INST, CHAN ))5556#define TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\57TGSI_FOR_EACH_CHANNEL( CHAN )\58TGSI_IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )5960#define TGSI_IS_DST1_CHANNEL_ENABLED( INST, CHAN )\61((INST)->Dst[1].Register.WriteMask & (1 << (CHAN)))6263#define TGSI_IF_IS_DST1_CHANNEL_ENABLED( INST, CHAN )\64if (TGSI_IS_DST1_CHANNEL_ENABLED( INST, CHAN ))6566#define TGSI_FOR_EACH_DST1_ENABLED_CHANNEL( INST, CHAN )\67TGSI_FOR_EACH_CHANNEL( CHAN )\68TGSI_IF_IS_DST1_CHANNEL_ENABLED( INST, CHAN )6970/**71* Registers may be treated as float, signed int or unsigned int.72*/73union tgsi_exec_channel74{75float f[TGSI_QUAD_SIZE];76int i[TGSI_QUAD_SIZE];77unsigned u[TGSI_QUAD_SIZE];78} ALIGN16;7980/**81* A vector[RGBA] of channels[4 pixels]82*/83struct ALIGN16 tgsi_exec_vector84{85union tgsi_exec_channel xyzw[TGSI_NUM_CHANNELS];86};8788/**89* For fragment programs, information for computing fragment input90* values from plane equation of the triangle/line.91*/92struct tgsi_interp_coef93{94float a0[TGSI_NUM_CHANNELS]; /* in an xyzw layout */95float dadx[TGSI_NUM_CHANNELS];96float dady[TGSI_NUM_CHANNELS];97};9899enum tgsi_sampler_control100{101TGSI_SAMPLER_LOD_NONE,102TGSI_SAMPLER_LOD_BIAS,103TGSI_SAMPLER_LOD_EXPLICIT,104TGSI_SAMPLER_LOD_ZERO,105TGSI_SAMPLER_DERIVS_EXPLICIT,106TGSI_SAMPLER_GATHER,107};108109struct tgsi_image_params {110unsigned unit;111unsigned tgsi_tex_instr;112enum pipe_format format;113unsigned execmask;114};115116struct tgsi_image {117/* image interfaces */118void (*load)(const struct tgsi_image *image,119const struct tgsi_image_params *params,120const int s[TGSI_QUAD_SIZE],121const int t[TGSI_QUAD_SIZE],122const int r[TGSI_QUAD_SIZE],123const int sample[TGSI_QUAD_SIZE],124float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);125126void (*store)(const struct tgsi_image *image,127const struct tgsi_image_params *params,128const int s[TGSI_QUAD_SIZE],129const int t[TGSI_QUAD_SIZE],130const int r[TGSI_QUAD_SIZE],131const int sample[TGSI_QUAD_SIZE],132float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);133134void (*op)(const struct tgsi_image *image,135const struct tgsi_image_params *params,136enum tgsi_opcode opcode,137const int s[TGSI_QUAD_SIZE],138const int t[TGSI_QUAD_SIZE],139const int r[TGSI_QUAD_SIZE],140const int sample[TGSI_QUAD_SIZE],141float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],142float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);143144void (*get_dims)(const struct tgsi_image *image,145const struct tgsi_image_params *params,146int dims[4]);147};148149struct tgsi_buffer_params {150unsigned unit;151unsigned execmask;152unsigned writemask;153};154155/* SSBO interfaces */156struct tgsi_buffer {157void *(*lookup)(const struct tgsi_buffer *buffer,158uint32_t unit, uint32_t *size);159};160161/**162* Information for sampling textures, which must be implemented163* by code outside the TGSI executor.164*/165struct tgsi_sampler166{167/** Get samples for four fragments in a quad */168/* this interface contains 5 sets of channels that vary169* depending on the sampler.170* s - the first texture coordinate for sampling.171* t - the second texture coordinate for sampling - unused for 1D,172layer for 1D arrays.173* r - the third coordinate for sampling for 3D, cube, cube arrays,174* layer for 2D arrays. Compare value for 1D/2D shadows.175* c0 - Compare value for shadow cube and shadow 2d arrays,176* layer for cube arrays.177* derivs - explicit derivatives.178* offset - texel offsets179* lod - lod value, except for shadow cube arrays (compare value there).180*/181void (*get_samples)(struct tgsi_sampler *sampler,182const unsigned sview_index,183const unsigned sampler_index,184const float s[TGSI_QUAD_SIZE],185const float t[TGSI_QUAD_SIZE],186const float r[TGSI_QUAD_SIZE],187const float c0[TGSI_QUAD_SIZE],188const float c1[TGSI_QUAD_SIZE],189float derivs[3][2][TGSI_QUAD_SIZE],190const int8_t offset[3],191enum tgsi_sampler_control control,192float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);193void (*get_dims)(struct tgsi_sampler *sampler,194const unsigned sview_index,195int level, int dims[4]);196void (*get_texel)(struct tgsi_sampler *sampler,197const unsigned sview_index,198const int i[TGSI_QUAD_SIZE],199const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],200const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],201float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);202void (*query_lod)(const struct tgsi_sampler *tgsi_sampler,203const unsigned sview_index,204const unsigned sampler_index,205const float s[TGSI_QUAD_SIZE],206const float t[TGSI_QUAD_SIZE],207const float p[TGSI_QUAD_SIZE],208const float c0[TGSI_QUAD_SIZE],209const enum tgsi_sampler_control control,210float mipmap[TGSI_QUAD_SIZE],211float lod[TGSI_QUAD_SIZE]);212};213214#define TGSI_EXEC_NUM_TEMPS 4096215216#define TGSI_EXEC_MAX_NESTING 32217#define TGSI_EXEC_MAX_COND_NESTING TGSI_EXEC_MAX_NESTING218#define TGSI_EXEC_MAX_LOOP_NESTING TGSI_EXEC_MAX_NESTING219#define TGSI_EXEC_MAX_SWITCH_NESTING TGSI_EXEC_MAX_NESTING220#define TGSI_EXEC_MAX_CALL_NESTING TGSI_EXEC_MAX_NESTING221222/* The maximum number of input attributes per vertex. For 2D223* input register files, this is the stride between two 1D224* arrays.225*/226#define TGSI_EXEC_MAX_INPUT_ATTRIBS 32227228/* The maximum number of bytes per constant buffer.229*/230#define TGSI_EXEC_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4]))231232/* The maximum number of vertices per primitive */233#define TGSI_MAX_PRIM_VERTICES 6234235/* The maximum number of primitives to be generated */236#define TGSI_MAX_PRIMITIVES 64237238/* The maximum total number of vertices */239#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS)240241#define TGSI_MAX_MISC_INPUTS 8242243#define TGSI_MAX_VERTEX_STREAMS 4244245/** function call/activation record */246struct tgsi_call_record247{248uint CondStackTop;249uint LoopStackTop;250uint ContStackTop;251int SwitchStackTop;252int BreakStackTop;253uint ReturnAddr;254};255256257/* Switch-case block state. */258struct tgsi_switch_record {259uint mask; /**< execution mask */260union tgsi_exec_channel selector; /**< a value case statements are compared to */261uint defaultMask; /**< non-execute mask for default case */262};263264265enum tgsi_break_type {266TGSI_EXEC_BREAK_INSIDE_LOOP,267TGSI_EXEC_BREAK_INSIDE_SWITCH268};269270271#define TGSI_EXEC_MAX_BREAK_STACK (TGSI_EXEC_MAX_LOOP_NESTING + TGSI_EXEC_MAX_SWITCH_NESTING)272273typedef float float4[4];274275struct tgsi_exec_machine;276277typedef void (* apply_sample_offset_func)(278const struct tgsi_exec_machine *mach,279unsigned attrib,280unsigned chan,281float ofs_x,282float ofs_y,283union tgsi_exec_channel *out_chan);284285/**286* Run-time virtual machine state for executing TGSI shader.287*/288struct ALIGN16 tgsi_exec_machine289{290/* Total = program temporaries + internal temporaries291*/292struct tgsi_exec_vector Temps[TGSI_EXEC_NUM_TEMPS];293294unsigned ImmsReserved;295float4 *Imms;296297struct tgsi_exec_vector *Inputs;298struct tgsi_exec_vector *Outputs;299apply_sample_offset_func *InputSampleOffsetApply;300301/* System values */302unsigned SysSemanticToIndex[TGSI_SEMANTIC_COUNT];303struct tgsi_exec_vector SystemValue[TGSI_MAX_MISC_INPUTS];304305struct tgsi_exec_vector Addrs[3];306307struct tgsi_sampler *Sampler;308309struct tgsi_image *Image;310struct tgsi_buffer *Buffer;311unsigned ImmLimit;312313const void *Consts[PIPE_MAX_CONSTANT_BUFFERS];314unsigned ConstsSize[PIPE_MAX_CONSTANT_BUFFERS];315316const struct tgsi_token *Tokens; /**< Declarations, instructions */317enum pipe_shader_type ShaderType; /**< PIPE_SHADER_x */318319/* GEOMETRY processor only. */320/* Number of vertices emitted per emitted primitive. */321unsigned *Primitives[TGSI_MAX_VERTEX_STREAMS];322/* Offsets in ->Outputs of the primitives' vertex output data */323unsigned *PrimitiveOffsets[TGSI_MAX_VERTEX_STREAMS];324unsigned NumOutputs;325unsigned MaxOutputVertices;326/* Offset in ->Outputs for the current vertex to be emitted. */327unsigned OutputVertexOffset;328/* Number of primitives emitted. */329unsigned OutputPrimCount[TGSI_MAX_VERTEX_STREAMS];330331/* FRAGMENT processor only. */332const struct tgsi_interp_coef *InterpCoefs;333struct tgsi_exec_vector QuadPos;334float Face; /**< +1 if front facing, -1 if back facing */335bool flatshade_color;336337/* Compute Only */338void *LocalMem;339unsigned LocalMemSize;340341/* See GLSL 4.50 specification for definition of helper invocations */342uint NonHelperMask; /**< non-helpers */343/* Conditional execution masks */344uint CondMask; /**< For IF/ELSE/ENDIF */345uint LoopMask; /**< For BGNLOOP/ENDLOOP */346uint ContMask; /**< For loop CONT statements */347uint FuncMask; /**< For function calls */348uint ExecMask; /**< = CondMask & LoopMask */349uint KillMask; /**< Mask of channels killed in the current shader execution */350351/* Current switch-case state. */352struct tgsi_switch_record Switch;353354/* Current break type. */355enum tgsi_break_type BreakType;356357/** Condition mask stack (for nested conditionals) */358uint CondStack[TGSI_EXEC_MAX_COND_NESTING];359int CondStackTop;360361/** Loop mask stack (for nested loops) */362uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];363int LoopStackTop;364365/** Loop label stack */366uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING];367int LoopLabelStackTop;368369/** Loop continue mask stack (see comments in tgsi_exec.c) */370uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];371int ContStackTop;372373/** Switch case stack */374struct tgsi_switch_record SwitchStack[TGSI_EXEC_MAX_SWITCH_NESTING];375int SwitchStackTop;376377enum tgsi_break_type BreakStack[TGSI_EXEC_MAX_BREAK_STACK];378int BreakStackTop;379380/** Function execution mask stack (for executing subroutine code) */381uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];382int FuncStackTop;383384/** Function call stack for saving/restoring the program counter */385struct tgsi_call_record CallStack[TGSI_EXEC_MAX_CALL_NESTING];386int CallStackTop;387388struct tgsi_full_instruction *Instructions;389uint NumInstructions;390391struct tgsi_full_declaration *Declarations;392uint NumDeclarations;393394struct tgsi_declaration_sampler_view395SamplerViews[PIPE_MAX_SHADER_SAMPLER_VIEWS];396397boolean UsedGeometryShader;398399int pc;400};401402struct tgsi_exec_machine *403tgsi_exec_machine_create(enum pipe_shader_type shader_type);404405void406tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach);407408409void410tgsi_exec_machine_bind_shader(411struct tgsi_exec_machine *mach,412const struct tgsi_token *tokens,413struct tgsi_sampler *sampler,414struct tgsi_image *image,415struct tgsi_buffer *buffer);416417uint418tgsi_exec_machine_run(419struct tgsi_exec_machine *mach, int start_pc );420421422void423tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach);424425426extern void427tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,428unsigned num_bufs,429const void **bufs,430const unsigned *buf_sizes);431432433static inline int434tgsi_exec_get_shader_param(enum pipe_shader_cap param)435{436switch(param) {437case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:438case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:439case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:440case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:441return INT_MAX;442case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:443return TGSI_EXEC_MAX_NESTING;444case PIPE_SHADER_CAP_MAX_INPUTS:445return TGSI_EXEC_MAX_INPUT_ATTRIBS;446case PIPE_SHADER_CAP_MAX_OUTPUTS:447return 32;448case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:449return TGSI_EXEC_MAX_CONST_BUFFER_SIZE;450case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:451return PIPE_MAX_CONSTANT_BUFFERS;452case PIPE_SHADER_CAP_MAX_TEMPS:453return TGSI_EXEC_NUM_TEMPS;454case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:455return 1;456case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:457case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:458case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:459case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:460return 1;461case PIPE_SHADER_CAP_SUBROUTINES:462return 1;463case PIPE_SHADER_CAP_INTEGERS:464return 1;465case PIPE_SHADER_CAP_INT64_ATOMICS:466case PIPE_SHADER_CAP_FP16:467case PIPE_SHADER_CAP_FP16_DERIVATIVES:468case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:469case PIPE_SHADER_CAP_INT16:470case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:471return 0;472case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:473return PIPE_MAX_SAMPLERS;474case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:475return PIPE_MAX_SHADER_SAMPLER_VIEWS;476case PIPE_SHADER_CAP_PREFERRED_IR:477return PIPE_SHADER_IR_TGSI;478case PIPE_SHADER_CAP_SUPPORTED_IRS:479return 1 << PIPE_SHADER_IR_TGSI;480case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:481return 1;482case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:483case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:484case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:485return 1;486case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:487case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:488case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:489case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:490case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:491case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:492return 0;493case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:494return PIPE_MAX_SHADER_BUFFERS;495case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:496return PIPE_MAX_SHADER_IMAGES;497498case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:499return 32;500}501/* if we get here, we missed a shader cap above (and should have seen502* a compiler warning.)503*/504return 0;505}506507#if defined __cplusplus508} /* extern "C" */509#endif510511#endif /* TGSI_EXEC_H */512513514