Path: blob/21.2-virgl/src/gallium/auxiliary/draw/draw_llvm.c
4565 views
/**************************************************************************1*2* Copyright 2010 VMware, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial portions15* of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS18* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.20* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR21* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,22* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE23* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25**************************************************************************/2627#include "draw_llvm.h"2829#include "draw_context.h"30#include "draw_vs.h"31#include "draw_gs.h"3233#include "gallivm/lp_bld_arit.h"34#include "gallivm/lp_bld_arit_overflow.h"35#include "gallivm/lp_bld_bitarit.h"36#include "gallivm/lp_bld_gather.h"37#include "gallivm/lp_bld_logic.h"38#include "gallivm/lp_bld_const.h"39#include "gallivm/lp_bld_coro.h"40#include "gallivm/lp_bld_swizzle.h"41#include "gallivm/lp_bld_struct.h"42#include "gallivm/lp_bld_type.h"43#include "gallivm/lp_bld_flow.h"44#include "gallivm/lp_bld_debug.h"45#include "gallivm/lp_bld_tgsi.h"46#include "gallivm/lp_bld_nir.h"47#include "gallivm/lp_bld_printf.h"48#include "gallivm/lp_bld_intr.h"49#include "gallivm/lp_bld_init.h"50#include "gallivm/lp_bld_type.h"51#include "gallivm/lp_bld_pack.h"52#include "gallivm/lp_bld_format.h"53#include "gallivm/lp_bld_misc.h"54#include "tgsi/tgsi_exec.h"55#include "tgsi/tgsi_dump.h"5657#include "util/u_math.h"58#include "util/u_pointer.h"59#include "util/u_string.h"60#include "util/simple_list.h"61#include "nir_serialize.h"62#include "util/mesa-sha1.h"63#define DEBUG_STORE 0646566static void67draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);686970struct draw_gs_llvm_iface {71struct lp_build_gs_iface base;7273struct draw_gs_llvm_variant *variant;74LLVMValueRef input;75};7677static inline const struct draw_gs_llvm_iface *78draw_gs_llvm_iface(const struct lp_build_gs_iface *iface)79{80return (const struct draw_gs_llvm_iface *)iface;81}8283struct draw_tcs_llvm_iface {84struct lp_build_tcs_iface base;8586struct draw_tcs_llvm_variant *variant;87LLVMValueRef input;88LLVMValueRef output;89};9091static inline const struct draw_tcs_llvm_iface *92draw_tcs_llvm_iface(const struct lp_build_tcs_iface *iface)93{94return (const struct draw_tcs_llvm_iface *)iface;95}9697struct draw_tes_llvm_iface {98struct lp_build_tes_iface base;99100struct draw_tes_llvm_variant *variant;101LLVMValueRef input;102};103104static inline const struct draw_tes_llvm_iface *105draw_tes_llvm_iface(const struct lp_build_tes_iface *iface)106{107return (const struct draw_tes_llvm_iface *)iface;108}109110/**111* Create LLVM type for draw_vertex_buffer.112*/113static LLVMTypeRef114create_jit_dvbuffer_type(struct gallivm_state *gallivm,115const char *struct_name)116{117LLVMTargetDataRef target = gallivm->target;118LLVMTypeRef dvbuffer_type;119LLVMTypeRef elem_types[DRAW_JIT_DVBUFFER_NUM_FIELDS];120LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);121122elem_types[DRAW_JIT_DVBUFFER_MAP] =123LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);124elem_types[DRAW_JIT_DVBUFFER_SIZE] = int32_type;125126dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types,127ARRAY_SIZE(elem_types), 0);128129(void) target; /* silence unused var warning for non-debug build */130LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map,131target, dvbuffer_type,132DRAW_JIT_DVBUFFER_MAP);133LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, size,134target, dvbuffer_type,135DRAW_JIT_DVBUFFER_SIZE);136137return dvbuffer_type;138}139140/**141* Create LLVM type for struct draw_jit_texture142*/143static LLVMTypeRef144create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name)145{146LLVMTargetDataRef target = gallivm->target;147LLVMTypeRef texture_type;148LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];149LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);150151elem_types[DRAW_JIT_TEXTURE_WIDTH] =152elem_types[DRAW_JIT_TEXTURE_HEIGHT] =153elem_types[DRAW_JIT_TEXTURE_DEPTH] =154elem_types[DRAW_JIT_TEXTURE_NUM_SAMPLES] =155elem_types[DRAW_JIT_TEXTURE_SAMPLE_STRIDE] =156elem_types[DRAW_JIT_TEXTURE_FIRST_LEVEL] =157elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type;158elem_types[DRAW_JIT_TEXTURE_BASE] =159LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);160elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =161elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =162elem_types[DRAW_JIT_TEXTURE_MIP_OFFSETS] =163LLVMArrayType(int32_type, PIPE_MAX_TEXTURE_LEVELS);164165texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,166ARRAY_SIZE(elem_types), 0);167168(void) target; /* silence unused var warning for non-debug build */169LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,170target, texture_type,171DRAW_JIT_TEXTURE_WIDTH);172LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,173target, texture_type,174DRAW_JIT_TEXTURE_HEIGHT);175LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,176target, texture_type,177DRAW_JIT_TEXTURE_DEPTH);178LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, base,179target, texture_type,180DRAW_JIT_TEXTURE_BASE);181LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,182target, texture_type,183DRAW_JIT_TEXTURE_ROW_STRIDE);184LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,185target, texture_type,186DRAW_JIT_TEXTURE_IMG_STRIDE);187LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, first_level,188target, texture_type,189DRAW_JIT_TEXTURE_FIRST_LEVEL);190LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,191target, texture_type,192DRAW_JIT_TEXTURE_LAST_LEVEL);193LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, mip_offsets,194target, texture_type,195DRAW_JIT_TEXTURE_MIP_OFFSETS);196LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, num_samples,197target, texture_type,198DRAW_JIT_TEXTURE_NUM_SAMPLES);199LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, sample_stride,200target, texture_type,201DRAW_JIT_TEXTURE_SAMPLE_STRIDE);202203LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, target, texture_type);204205return texture_type;206}207208209/**210* Create LLVM type for struct draw_jit_sampler211*/212static LLVMTypeRef213create_jit_sampler_type(struct gallivm_state *gallivm, const char *struct_name)214{215LLVMTargetDataRef target = gallivm->target;216LLVMTypeRef sampler_type;217LLVMTypeRef elem_types[DRAW_JIT_SAMPLER_NUM_FIELDS];218219elem_types[DRAW_JIT_SAMPLER_MIN_LOD] =220elem_types[DRAW_JIT_SAMPLER_MAX_LOD] =221elem_types[DRAW_JIT_SAMPLER_LOD_BIAS] = LLVMFloatTypeInContext(gallivm->context);222elem_types[DRAW_JIT_SAMPLER_BORDER_COLOR] =223LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);224225sampler_type = LLVMStructTypeInContext(gallivm->context, elem_types,226ARRAY_SIZE(elem_types), 0);227228(void) target; /* silence unused var warning for non-debug build */229LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, min_lod,230target, sampler_type,231DRAW_JIT_SAMPLER_MIN_LOD);232LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, max_lod,233target, sampler_type,234DRAW_JIT_SAMPLER_MAX_LOD);235LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, lod_bias,236target, sampler_type,237DRAW_JIT_SAMPLER_LOD_BIAS);238LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, border_color,239target, sampler_type,240DRAW_JIT_SAMPLER_BORDER_COLOR);241242LP_CHECK_STRUCT_SIZE(struct draw_jit_sampler, target, sampler_type);243244return sampler_type;245}246247/**248* Create LLVM type for struct draw_jit_texture249*/250static LLVMTypeRef251create_jit_image_type(struct gallivm_state *gallivm, const char *struct_name)252{253LLVMTargetDataRef target = gallivm->target;254LLVMTypeRef image_type;255LLVMTypeRef elem_types[DRAW_JIT_IMAGE_NUM_FIELDS];256LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);257258elem_types[DRAW_JIT_IMAGE_WIDTH] =259elem_types[DRAW_JIT_IMAGE_HEIGHT] =260elem_types[DRAW_JIT_IMAGE_DEPTH] =261elem_types[DRAW_JIT_IMAGE_ROW_STRIDE] =262elem_types[DRAW_JIT_IMAGE_IMG_STRIDE] =263elem_types[DRAW_JIT_IMAGE_NUM_SAMPLES] =264elem_types[DRAW_JIT_IMAGE_SAMPLE_STRIDE] = int32_type;265elem_types[DRAW_JIT_IMAGE_BASE] =266LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);267268image_type = LLVMStructTypeInContext(gallivm->context, elem_types,269ARRAY_SIZE(elem_types), 0);270271(void) target; /* silence unused var warning for non-debug build */272LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, width,273target, image_type,274DRAW_JIT_IMAGE_WIDTH);275LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, height,276target, image_type,277DRAW_JIT_IMAGE_HEIGHT);278LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, depth,279target, image_type,280DRAW_JIT_IMAGE_DEPTH);281LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, base,282target, image_type,283DRAW_JIT_IMAGE_BASE);284LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, row_stride,285target, image_type,286DRAW_JIT_IMAGE_ROW_STRIDE);287LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, img_stride,288target, image_type,289DRAW_JIT_IMAGE_IMG_STRIDE);290LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, num_samples,291target, image_type,292DRAW_JIT_IMAGE_NUM_SAMPLES);293LP_CHECK_MEMBER_OFFSET(struct draw_jit_image, sample_stride,294target, image_type,295DRAW_JIT_IMAGE_SAMPLE_STRIDE);296297LP_CHECK_STRUCT_SIZE(struct draw_jit_image, target, image_type);298299return image_type;300}301302/**303* Create LLVM type for struct draw_jit_context304*/305static LLVMTypeRef306create_jit_context_type(struct gallivm_state *gallivm,307LLVMTypeRef texture_type, LLVMTypeRef sampler_type,308LLVMTypeRef image_type,309const char *struct_name)310{311LLVMTargetDataRef target = gallivm->target;312LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);313LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);314LLVMTypeRef elem_types[DRAW_JIT_CTX_NUM_FIELDS];315LLVMTypeRef context_type;316317elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* vs_constants */318LP_MAX_TGSI_CONST_BUFFERS);319elem_types[1] = LLVMArrayType(int_type, /* num_vs_constants */320LP_MAX_TGSI_CONST_BUFFERS);321elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),322DRAW_TOTAL_CLIP_PLANES), 0);323elem_types[3] = LLVMPointerType(float_type, 0); /* viewports */324elem_types[4] = LLVMArrayType(texture_type,325PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */326elem_types[5] = LLVMArrayType(sampler_type,327PIPE_MAX_SAMPLERS); /* samplers */328elem_types[6] = LLVMArrayType(image_type,329PIPE_MAX_SHADER_IMAGES); /* images */330elem_types[7] = LLVMArrayType(LLVMPointerType(int_type, 0), /* vs_ssbo */331LP_MAX_TGSI_SHADER_BUFFERS);332elem_types[8] = LLVMArrayType(int_type, /* num_vs_ssbos */333LP_MAX_TGSI_SHADER_BUFFERS);334context_type = LLVMStructTypeInContext(gallivm->context, elem_types,335ARRAY_SIZE(elem_types), 0);336337(void) target; /* silence unused var warning for non-debug build */338LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,339target, context_type, DRAW_JIT_CTX_CONSTANTS);340LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, num_vs_constants,341target, context_type, DRAW_JIT_CTX_NUM_CONSTANTS);342LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,343target, context_type, DRAW_JIT_CTX_PLANES);344LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, viewports,345target, context_type, DRAW_JIT_CTX_VIEWPORT);346LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,347target, context_type,348DRAW_JIT_CTX_TEXTURES);349LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, samplers,350target, context_type,351DRAW_JIT_CTX_SAMPLERS);352LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, images,353target, context_type, DRAW_JIT_CTX_IMAGES);354LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_ssbos,355target, context_type, DRAW_JIT_CTX_SSBOS);356LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, num_vs_ssbos,357target, context_type, DRAW_JIT_CTX_NUM_SSBOS);358LP_CHECK_STRUCT_SIZE(struct draw_jit_context,359target, context_type);360361return context_type;362}363364365/**366* Create LLVM type for struct draw_gs_jit_context367*/368static LLVMTypeRef369create_gs_jit_context_type(struct gallivm_state *gallivm,370unsigned vector_length,371LLVMTypeRef texture_type, LLVMTypeRef sampler_type,372LLVMTypeRef image_type,373const char *struct_name)374{375LLVMTargetDataRef target = gallivm->target;376LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);377LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);378LLVMTypeRef elem_types[DRAW_GS_JIT_CTX_NUM_FIELDS];379LLVMTypeRef context_type;380381elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */382LP_MAX_TGSI_CONST_BUFFERS);383elem_types[1] = LLVMArrayType(int_type, /* num_constants */384LP_MAX_TGSI_CONST_BUFFERS);385elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),386DRAW_TOTAL_CLIP_PLANES), 0);387elem_types[3] = LLVMPointerType(float_type, 0); /* viewports */388389elem_types[4] = LLVMArrayType(texture_type,390PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */391elem_types[5] = LLVMArrayType(sampler_type,392PIPE_MAX_SAMPLERS); /* samplers */393elem_types[6] = LLVMArrayType(image_type,394PIPE_MAX_SHADER_IMAGES); /* images */395elem_types[7] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);396elem_types[8] = LLVMPointerType(LLVMVectorType(int_type,397vector_length), 0);398elem_types[9] = LLVMPointerType(LLVMVectorType(int_type,399vector_length), 0);400401elem_types[10] = LLVMArrayType(LLVMPointerType(int_type, 0), /* ssbos */402LP_MAX_TGSI_SHADER_BUFFERS);403elem_types[11] = LLVMArrayType(int_type, /* num_ssbos */404LP_MAX_TGSI_SHADER_BUFFERS);405406context_type = LLVMStructTypeInContext(gallivm->context, elem_types,407ARRAY_SIZE(elem_types), 0);408409(void) target; /* silence unused var warning for non-debug build */410LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants,411target, context_type, DRAW_GS_JIT_CTX_CONSTANTS);412LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, num_constants,413target, context_type, DRAW_GS_JIT_CTX_NUM_CONSTANTS);414LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes,415target, context_type, DRAW_GS_JIT_CTX_PLANES);416LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewports,417target, context_type, DRAW_GS_JIT_CTX_VIEWPORT);418LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures,419target, context_type,420DRAW_GS_JIT_CTX_TEXTURES);421LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers,422target, context_type,423DRAW_GS_JIT_CTX_SAMPLERS);424LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,425target, context_type,426DRAW_GS_JIT_CTX_PRIM_LENGTHS);427LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,428target, context_type,429DRAW_GS_JIT_CTX_EMITTED_VERTICES);430LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,431target, context_type,432DRAW_GS_JIT_CTX_EMITTED_PRIMS);433LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, ssbos,434target, context_type, DRAW_GS_JIT_CTX_SSBOS);435LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, num_ssbos,436target, context_type, DRAW_GS_JIT_CTX_NUM_SSBOS);437LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, images,438target, context_type, DRAW_GS_JIT_CTX_IMAGES);439LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,440target, context_type);441442return context_type;443}444445446static LLVMTypeRef447create_gs_jit_input_type(struct gallivm_state *gallivm)448{449LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);450LLVMTypeRef input_array;451452input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */453input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */454input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */455input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */456457return input_array;458}459460/**461* Create LLVM type for struct pipe_vertex_buffer462*/463static LLVMTypeRef464create_jit_vertex_buffer_type(struct gallivm_state *gallivm,465const char *struct_name)466{467LLVMTargetDataRef target = gallivm->target;468LLVMTypeRef elem_types[4];469LLVMTypeRef vb_type;470471elem_types[0] = LLVMInt16TypeInContext(gallivm->context);472elem_types[1] = LLVMInt8TypeInContext(gallivm->context);473elem_types[2] = LLVMInt32TypeInContext(gallivm->context);474elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);475476vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,477ARRAY_SIZE(elem_types), 0);478479(void) target; /* silence unused var warning for non-debug build */480LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,481target, vb_type, 0);482LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, is_user_buffer,483target, vb_type, 1);484LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,485target, vb_type, 2);486LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer.resource,487target, vb_type, 3);488489LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);490491return vb_type;492}493494495/**496* Create LLVM type for struct vertex_header;497*/498static LLVMTypeRef499create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)500{501LLVMTargetDataRef target = gallivm->target;502LLVMTypeRef elem_types[3];503LLVMTypeRef vertex_header;504char struct_name[24];505506snprintf(struct_name, 23, "vertex_header%d", data_elems);507508elem_types[DRAW_JIT_VERTEX_VERTEX_ID] = LLVMIntTypeInContext(gallivm->context, 32);509elem_types[DRAW_JIT_VERTEX_CLIP_POS] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);510elem_types[DRAW_JIT_VERTEX_DATA] = LLVMArrayType(elem_types[1], data_elems);511512vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,513ARRAY_SIZE(elem_types), 0);514515/* these are bit-fields and we can't take address of them516LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,517target, vertex_header,518DRAW_JIT_VERTEX_CLIPMASK);519LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,520target, vertex_header,521DRAW_JIT_VERTEX_EDGEFLAG);522LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,523target, vertex_header,524DRAW_JIT_VERTEX_PAD);525LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,526target, vertex_header,527DRAW_JIT_VERTEX_VERTEX_ID);528*/529(void) target; /* silence unused var warning for non-debug build */530LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip_pos,531target, vertex_header,532DRAW_JIT_VERTEX_CLIP_POS);533LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,534target, vertex_header,535DRAW_JIT_VERTEX_DATA);536537assert(LLVMABISizeOfType(target, vertex_header) ==538offsetof(struct vertex_header, data[data_elems]));539540return vertex_header;541}542543/**544* Create LLVM type for struct draw_tcs_jit_context545*/546static LLVMTypeRef547create_tcs_jit_context_type(struct gallivm_state *gallivm,548unsigned vector_length,549LLVMTypeRef texture_type, LLVMTypeRef sampler_type,550LLVMTypeRef image_type,551const char *struct_name)552{553LLVMTargetDataRef target = gallivm->target;554LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);555LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);556LLVMTypeRef elem_types[DRAW_TCS_JIT_CTX_NUM_FIELDS];557LLVMTypeRef context_type;558559elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */560LP_MAX_TGSI_CONST_BUFFERS);561elem_types[1] = LLVMArrayType(int_type, /* num_constants */562LP_MAX_TGSI_CONST_BUFFERS);563elem_types[2] = LLVMInt32TypeInContext(gallivm->context);564elem_types[3] = LLVMInt32TypeInContext(gallivm->context);565566elem_types[4] = LLVMArrayType(texture_type,567PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */568elem_types[5] = LLVMArrayType(sampler_type,569PIPE_MAX_SAMPLERS); /* samplers */570elem_types[6] = LLVMArrayType(image_type,571PIPE_MAX_SHADER_IMAGES); /* images */572573elem_types[7] = LLVMArrayType(LLVMPointerType(int_type, 0), /* ssbos */574LP_MAX_TGSI_SHADER_BUFFERS);575elem_types[8] = LLVMArrayType(int_type, /* num_ssbos */576LP_MAX_TGSI_SHADER_BUFFERS);577578context_type = LLVMStructTypeInContext(gallivm->context, elem_types,579ARRAY_SIZE(elem_types), 0);580581(void) target; /* silence unused var warning for non-debug build */582LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, constants,583target, context_type, DRAW_TCS_JIT_CTX_CONSTANTS);584LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, num_constants,585target, context_type, DRAW_TCS_JIT_CTX_NUM_CONSTANTS);586LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, textures,587target, context_type,588DRAW_TCS_JIT_CTX_TEXTURES);589LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, samplers,590target, context_type,591DRAW_TCS_JIT_CTX_SAMPLERS);592LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, ssbos,593target, context_type, DRAW_TCS_JIT_CTX_SSBOS);594LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, num_ssbos,595target, context_type, DRAW_TCS_JIT_CTX_NUM_SSBOS);596LP_CHECK_MEMBER_OFFSET(struct draw_tcs_jit_context, images,597target, context_type, DRAW_TCS_JIT_CTX_IMAGES);598LP_CHECK_STRUCT_SIZE(struct draw_tcs_jit_context,599target, context_type);600601return context_type;602}603604static LLVMTypeRef605create_tcs_jit_input_type(struct gallivm_state *gallivm)606{607LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);608LLVMTypeRef input_array;609610input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */611input_array = LLVMArrayType(input_array, NUM_TCS_INPUTS); /* num attrs per vertex */612input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */613614return input_array;615}616617static LLVMTypeRef618create_tcs_jit_output_type(struct gallivm_state *gallivm)619{620LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);621LLVMTypeRef output_array;622623output_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */624output_array = LLVMArrayType(output_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */625output_array = LLVMPointerType(output_array, 0); /* num vertices per prim */626627return output_array;628}629630static LLVMTypeRef631create_tes_jit_input_type(struct gallivm_state *gallivm)632{633LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);634LLVMTypeRef input_array;635636input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */637input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */638input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */639640return input_array;641}642643/**644* Create LLVM type for struct draw_tes_jit_context645*/646static LLVMTypeRef647create_tes_jit_context_type(struct gallivm_state *gallivm,648unsigned vector_length,649LLVMTypeRef texture_type, LLVMTypeRef sampler_type,650LLVMTypeRef image_type,651const char *struct_name)652{653LLVMTargetDataRef target = gallivm->target;654LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);655LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);656LLVMTypeRef elem_types[DRAW_TCS_JIT_CTX_NUM_FIELDS];657LLVMTypeRef context_type;658659elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */660LP_MAX_TGSI_CONST_BUFFERS);661elem_types[1] = LLVMArrayType(int_type, /* num_constants */662LP_MAX_TGSI_CONST_BUFFERS);663elem_types[2] = LLVMInt32TypeInContext(gallivm->context);664elem_types[3] = LLVMInt32TypeInContext(gallivm->context);665666elem_types[4] = LLVMArrayType(texture_type,667PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */668elem_types[5] = LLVMArrayType(sampler_type,669PIPE_MAX_SAMPLERS); /* samplers */670elem_types[6] = LLVMArrayType(image_type,671PIPE_MAX_SHADER_IMAGES); /* images */672673elem_types[7] = LLVMArrayType(LLVMPointerType(int_type, 0), /* ssbos */674LP_MAX_TGSI_SHADER_BUFFERS);675elem_types[8] = LLVMArrayType(int_type, /* num_ssbos */676LP_MAX_TGSI_SHADER_BUFFERS);677678context_type = LLVMStructTypeInContext(gallivm->context, elem_types,679ARRAY_SIZE(elem_types), 0);680681(void) target; /* silence unused var warning for non-debug build */682LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, constants,683target, context_type, DRAW_TCS_JIT_CTX_CONSTANTS);684LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, num_constants,685target, context_type, DRAW_TCS_JIT_CTX_NUM_CONSTANTS);686LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, textures,687target, context_type,688DRAW_TCS_JIT_CTX_TEXTURES);689LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, samplers,690target, context_type,691DRAW_TCS_JIT_CTX_SAMPLERS);692LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, ssbos,693target, context_type, DRAW_TCS_JIT_CTX_SSBOS);694LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, num_ssbos,695target, context_type, DRAW_TCS_JIT_CTX_NUM_SSBOS);696LP_CHECK_MEMBER_OFFSET(struct draw_tes_jit_context, images,697target, context_type, DRAW_TCS_JIT_CTX_IMAGES);698LP_CHECK_STRUCT_SIZE(struct draw_tes_jit_context,699target, context_type);700701return context_type;702}703704/**705* Create LLVM types for various structures.706*/707static void708create_jit_types(struct draw_llvm_variant *variant)709{710struct gallivm_state *gallivm = variant->gallivm;711LLVMTypeRef texture_type, sampler_type, context_type, buffer_type,712vb_type, image_type;713714texture_type = create_jit_texture_type(gallivm, "texture");715sampler_type = create_jit_sampler_type(gallivm, "sampler");716image_type = create_jit_image_type(gallivm, "image");717718context_type = create_jit_context_type(gallivm, texture_type, sampler_type,719image_type,720"draw_jit_context");721variant->context_ptr_type = LLVMPointerType(context_type, 0);722723buffer_type = create_jit_dvbuffer_type(gallivm, "draw_vertex_buffer");724variant->buffer_ptr_type = LLVMPointerType(buffer_type, 0);725726vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");727variant->vb_ptr_type = LLVMPointerType(vb_type, 0);728}729730731static LLVMTypeRef732get_context_ptr_type(struct draw_llvm_variant *variant)733{734if (!variant->context_ptr_type)735create_jit_types(variant);736return variant->context_ptr_type;737}738739740static LLVMTypeRef741get_buffer_ptr_type(struct draw_llvm_variant *variant)742{743if (!variant->buffer_ptr_type)744create_jit_types(variant);745return variant->buffer_ptr_type;746}747748749static LLVMTypeRef750get_vb_ptr_type(struct draw_llvm_variant *variant)751{752if (!variant->vb_ptr_type)753create_jit_types(variant);754return variant->vb_ptr_type;755}756757static LLVMTypeRef758get_vertex_header_ptr_type(struct draw_llvm_variant *variant)759{760if (!variant->vertex_header_ptr_type)761create_jit_types(variant);762return variant->vertex_header_ptr_type;763}764765766/**767* Create per-context LLVM info.768*/769struct draw_llvm *770draw_llvm_create(struct draw_context *draw, LLVMContextRef context)771{772struct draw_llvm *llvm;773774if (!lp_build_init())775return NULL;776777llvm = CALLOC_STRUCT( draw_llvm );778if (!llvm)779return NULL;780781llvm->draw = draw;782783llvm->context = context;784if (!llvm->context) {785llvm->context = LLVMContextCreate();786llvm->context_owned = true;787}788if (!llvm->context)789goto fail;790791llvm->nr_variants = 0;792make_empty_list(&llvm->vs_variants_list);793794llvm->nr_gs_variants = 0;795make_empty_list(&llvm->gs_variants_list);796797llvm->nr_tcs_variants = 0;798make_empty_list(&llvm->tcs_variants_list);799800llvm->nr_tes_variants = 0;801make_empty_list(&llvm->tes_variants_list);802803return llvm;804805fail:806draw_llvm_destroy(llvm);807return NULL;808}809810811/**812* Free per-context LLVM info.813*/814void815draw_llvm_destroy(struct draw_llvm *llvm)816{817if (llvm->context_owned)818LLVMContextDispose(llvm->context);819llvm->context = NULL;820821/* XXX free other draw_llvm data? */822FREE(llvm);823}824825static void826draw_get_ir_cache_key(struct nir_shader *nir,827const void *key, size_t key_size,828uint32_t val_32bit,829unsigned char ir_sha1_cache_key[20])830{831struct blob blob = { 0 };832unsigned ir_size;833void *ir_binary;834835blob_init(&blob);836nir_serialize(&blob, nir, true);837ir_binary = blob.data;838ir_size = blob.size;839840struct mesa_sha1 ctx;841_mesa_sha1_init(&ctx);842_mesa_sha1_update(&ctx, key, key_size);843_mesa_sha1_update(&ctx, ir_binary, ir_size);844_mesa_sha1_update(&ctx, &val_32bit, 4);845_mesa_sha1_final(&ctx, ir_sha1_cache_key);846847blob_finish(&blob);848}849850/**851* Create LLVM-generated code for a vertex shader.852*/853struct draw_llvm_variant *854draw_llvm_create_variant(struct draw_llvm *llvm,855unsigned num_inputs,856const struct draw_llvm_variant_key *key)857{858struct draw_llvm_variant *variant;859struct llvm_vertex_shader *shader =860llvm_vertex_shader(llvm->draw->vs.vertex_shader);861LLVMTypeRef vertex_header;862char module_name[64];863unsigned char ir_sha1_cache_key[20];864struct lp_cached_code cached = { 0 };865bool needs_caching = false;866variant = MALLOC(sizeof *variant +867shader->variant_key_size -868sizeof variant->key);869if (!variant)870return NULL;871872variant->llvm = llvm;873variant->shader = shader;874memcpy(&variant->key, key, shader->variant_key_size);875876snprintf(module_name, sizeof(module_name), "draw_llvm_vs_variant%u",877variant->shader->variants_cached);878879if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {880draw_get_ir_cache_key(shader->base.state.ir.nir,881key,882shader->variant_key_size,883num_inputs,884ir_sha1_cache_key);885886llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,887&cached,888ir_sha1_cache_key);889if (!cached.data_size)890needs_caching = true;891}892variant->gallivm = gallivm_create(module_name, llvm->context, &cached);893894create_jit_types(variant);895896if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {897if (llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_TGSI)898tgsi_dump(llvm->draw->vs.vertex_shader->state.tokens, 0);899else900nir_print_shader(llvm->draw->vs.vertex_shader->state.ir.nir, stderr);901draw_llvm_dump_variant_key(&variant->key);902}903904vertex_header = create_jit_vertex_header(variant->gallivm, num_inputs);905906variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);907908draw_llvm_generate(llvm, variant);909910gallivm_compile_module(variant->gallivm);911912variant->jit_func = (draw_jit_vert_func)913gallivm_jit_function(variant->gallivm, variant->function);914915if (needs_caching)916llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,917&cached,918ir_sha1_cache_key);919gallivm_free_ir(variant->gallivm);920921variant->list_item_global.base = variant;922variant->list_item_local.base = variant;923/*variant->no = */shader->variants_created++;924variant->list_item_global.base = variant;925926return variant;927}928929930static void931generate_vs(struct draw_llvm_variant *variant,932LLVMBuilderRef builder,933struct lp_type vs_type,934LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],935const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],936const struct lp_bld_tgsi_system_values *system_values,937LLVMValueRef context_ptr,938const struct lp_build_sampler_soa *draw_sampler,939const struct lp_build_image_soa *draw_image,940boolean clamp_vertex_color,941struct lp_build_mask_context *bld_mask)942{943struct draw_llvm *llvm = variant->llvm;944const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;945LLVMValueRef consts_ptr =946draw_jit_context_vs_constants(variant->gallivm, context_ptr);947LLVMValueRef num_consts_ptr =948draw_jit_context_num_vs_constants(variant->gallivm, context_ptr);949LLVMValueRef ssbos_ptr =950draw_jit_context_vs_ssbos(variant->gallivm, context_ptr);951LLVMValueRef num_ssbos_ptr =952draw_jit_context_num_vs_ssbos(variant->gallivm, context_ptr);953954struct lp_build_tgsi_params params;955memset(¶ms, 0, sizeof(params));956957params.type = vs_type;958params.mask = bld_mask;959params.consts_ptr = consts_ptr;960params.const_sizes_ptr = num_consts_ptr;961params.system_values = system_values;962params.inputs = inputs;963params.context_ptr = context_ptr;964params.sampler = draw_sampler;965params.info = &llvm->draw->vs.vertex_shader->info;966params.ssbo_ptr = ssbos_ptr;967params.ssbo_sizes_ptr = num_ssbos_ptr;968params.image = draw_image;969970if (llvm->draw->vs.vertex_shader->state.ir.nir &&971llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_NIR)972lp_build_nir_soa(variant->gallivm,973llvm->draw->vs.vertex_shader->state.ir.nir,974¶ms,975outputs);976else977lp_build_tgsi_soa(variant->gallivm,978tokens,979¶ms,980outputs);981982{983LLVMValueRef out;984unsigned chan, attrib;985struct lp_build_context bld;986struct tgsi_shader_info* info = &llvm->draw->vs.vertex_shader->info;987lp_build_context_init(&bld, variant->gallivm, vs_type);988989for (attrib = 0; attrib < info->num_outputs; ++attrib) {990for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {991if (outputs[attrib][chan]) {992switch (info->output_semantic_name[attrib]) {993case TGSI_SEMANTIC_COLOR:994case TGSI_SEMANTIC_BCOLOR:995if (clamp_vertex_color) {996out = LLVMBuildLoad(builder, outputs[attrib][chan], "");997out = lp_build_clamp(&bld, out, bld.zero, bld.one);998LLVMBuildStore(builder, out, outputs[attrib][chan]);999}1000break;1001}1002}1003}1004}1005}1006}100710081009static void1010fetch_instanced(struct gallivm_state *gallivm,1011const struct util_format_description *format_desc,1012struct lp_type vs_type,1013LLVMValueRef vb_stride,1014LLVMValueRef map_ptr,1015LLVMValueRef buffer_size_adj,1016LLVMValueRef *inputs,1017LLVMValueRef index)1018{1019LLVMTypeRef i32_t = LLVMInt32TypeInContext(gallivm->context);1020LLVMTypeRef aosf_t, aosi_t;1021LLVMValueRef zero = LLVMConstNull(i32_t);1022LLVMBuilderRef builder = gallivm->builder;1023LLVMValueRef stride, buffer_overflowed, aos, index_valid;1024unsigned i;10251026aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type());1027aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type());10281029/* This mul can overflow. Wraparound is ok. */1030stride = LLVMBuildMul(builder, vb_stride, index, "");10311032buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,1033stride, buffer_size_adj,1034"buffer_overflowed");10351036if (0) {1037lp_build_print_value(gallivm, " instance index = ", index);1038lp_build_print_value(gallivm, " buffer overflowed = ", buffer_overflowed);1039}10401041index_valid = LLVMBuildNot(builder, buffer_overflowed, "");1042index_valid = LLVMBuildSExt(builder, index_valid, i32_t, "");1043stride = LLVMBuildAnd(builder, stride, index_valid, "");10441045aos = lp_build_fetch_rgba_aos(gallivm,1046format_desc,1047lp_float32_vec4_type(),1048FALSE,1049map_ptr,1050stride, zero, zero,1051NULL);10521053index_valid = lp_build_broadcast(gallivm, aosi_t, index_valid);1054aos = LLVMBuildBitCast(builder, aos, aosi_t, "");1055aos = LLVMBuildAnd(builder, aos, index_valid, "");1056aos = LLVMBuildBitCast(builder, aos, aosf_t, "");10571058for (i = 0; i < TGSI_NUM_CHANNELS; i++) {1059LLVMValueRef index = lp_build_const_int32(gallivm, i);1060inputs[i] = lp_build_extract_broadcast(gallivm,1061lp_float32_vec4_type(),1062vs_type, aos, index);1063}1064}106510661067static void1068fetch_vector(struct gallivm_state *gallivm,1069const struct util_format_description *format_desc,1070struct lp_type vs_type,1071LLVMValueRef vb_stride,1072LLVMValueRef map_ptr,1073LLVMValueRef buffer_size_adj,1074LLVMValueRef *inputs,1075LLVMValueRef indices)1076{1077LLVMBuilderRef builder = gallivm->builder;1078struct lp_build_context blduivec;1079struct lp_type fetch_type = vs_type;1080LLVMValueRef offset, valid_mask;1081unsigned i;10821083lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));10841085vb_stride = lp_build_broadcast_scalar(&blduivec, vb_stride);1086buffer_size_adj = lp_build_broadcast_scalar(&blduivec, buffer_size_adj);10871088/* This mul can overflow. Wraparound is ok. */1089offset = lp_build_mul(&blduivec, vb_stride, indices);10901091valid_mask = lp_build_compare(gallivm, blduivec.type,1092PIPE_FUNC_LESS, offset, buffer_size_adj);10931094/* not valid elements use offset 0 */1095offset = LLVMBuildAnd(builder, offset, valid_mask, "");10961097if (0) {1098lp_build_print_value(gallivm, " indices = ", indices);1099lp_build_print_value(gallivm, " offsets = ", offset);1100lp_build_print_value(gallivm, " valid_mask = ", valid_mask);1101}11021103/*1104* Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches.1105* This should always produce better code.1106*/11071108/* The type handling is annoying here... */1109if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&1110format_desc->channel[0].pure_integer) {1111if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {1112fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length);1113}1114else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {1115fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length);1116}1117}11181119lp_build_fetch_rgba_soa(gallivm, format_desc,1120fetch_type, FALSE, map_ptr, offset,1121blduivec.zero, blduivec.zero,1122NULL, inputs);11231124for (i = 0; i < TGSI_NUM_CHANNELS; i++) {1125inputs[i] = LLVMBuildBitCast(builder, inputs[i],1126lp_build_vec_type(gallivm, vs_type), "");1127}11281129/* out-of-bound fetches return all zeros */1130for (i = 0; i < format_desc->nr_channels; i++) {1131inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, "");1132inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, "");1133inputs[i] = LLVMBuildBitCast(builder, inputs[i],1134lp_build_vec_type(gallivm, vs_type), "");1135}1136}113711381139static void1140store_aos(struct gallivm_state *gallivm,1141LLVMValueRef io_ptr,1142LLVMValueRef index,1143LLVMValueRef value)1144{1145LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0);1146LLVMBuilderRef builder = gallivm->builder;1147LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr);1148LLVMValueRef indices[3];11491150indices[0] = lp_build_const_int32(gallivm, 0);1151indices[1] = index;1152indices[2] = lp_build_const_int32(gallivm, 0);11531154data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");1155data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");11561157#if DEBUG_STORE1158lp_build_printf(gallivm, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);1159#endif11601161/* Unaligned store due to the vertex header */1162LLVMSetAlignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));1163}11641165/**1166* Adjust the mask to architecture endianess. The mask will the store in struct:1167*1168* struct vertex_header {1169* unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;1170* unsigned edgeflag:1;1171* unsigned pad:1;1172* unsigned vertex_id:16;1173* [...]1174* }1175*1176* On little-endian machine nothing needs to done, however on bit-endian machine1177* the mask's fields need to be adjusted with the algorithm:1178*1179* uint32_t reverse (uint32_t x)1180* {1181* return (x >> 16) | // vertex_id1182* ((x & 0x3fff) << 18) | // clipmask1183* ((x & 0x4000) << 3) | // edgeflag1184* ((x & 0x8000) << 1); // pad1185* }1186*/1187static LLVMValueRef1188adjust_mask(struct gallivm_state *gallivm,1189LLVMValueRef mask)1190{1191#if UTIL_ARCH_BIG_ENDIAN1192LLVMBuilderRef builder = gallivm->builder;1193LLVMValueRef vertex_id;1194LLVMValueRef clipmask;1195LLVMValueRef pad;1196LLVMValueRef edgeflag;11971198vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");1199clipmask = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");1200clipmask = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");1201if (0) {1202pad = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");1203pad = LLVMBuildShl(builder, pad, lp_build_const_int32(gallivm, 1), "");1204}1205edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");1206edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 3), "");12071208mask = LLVMBuildOr(builder, vertex_id, clipmask, "");1209if (0) {1210mask = LLVMBuildOr(builder, mask, pad, "");1211}1212mask = LLVMBuildOr(builder, mask, edgeflag, "");1213#endif1214return mask;1215}12161217static void1218store_aos_array(struct gallivm_state *gallivm,1219struct lp_type soa_type,1220LLVMValueRef io_ptr,1221LLVMValueRef *indices,1222LLVMValueRef* aos,1223int attrib,1224int num_outputs,1225LLVMValueRef clipmask,1226boolean need_edgeflag)1227{1228LLVMBuilderRef builder = gallivm->builder;1229LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);1230LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];1231LLVMValueRef linear_inds[LP_MAX_VECTOR_WIDTH / 32];1232LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];1233int vector_length = soa_type.length;1234int i;12351236debug_assert(TGSI_NUM_CHANNELS == 4);12371238for (i = 0; i < vector_length; i++) {1239linear_inds[i] = lp_build_const_int32(gallivm, i);1240if (indices) {1241inds[i] = indices[i];1242} else {1243inds[i] = linear_inds[i];1244}1245io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");1246}12471248if (attrib == 0) {1249/* store vertex header for each of the n vertices */1250LLVMValueRef val, cliptmp;1251int vertex_id_pad_edgeflag;12521253/* If this assertion fails, it means we need to update the bit twidding1254* code here. See struct vertex_header in draw_private.h.1255*/1256assert(DRAW_TOTAL_CLIP_PLANES==14);1257/* initialize vertex id:16 = 0xffff, pad:1 = 0, edgeflag:1 = 1 */1258if (!need_edgeflag) {1259vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);1260}1261else {1262vertex_id_pad_edgeflag = (0xffff << 16);1263}1264val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type),1265vertex_id_pad_edgeflag);1266/* OR with the clipmask */1267cliptmp = LLVMBuildOr(builder, val, clipmask, "");1268for (i = 0; i < vector_length; i++) {1269LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptrs[i]);1270val = LLVMBuildExtractElement(builder, cliptmp, linear_inds[i], "");1271val = adjust_mask(gallivm, val);1272#if DEBUG_STORE1273lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n",1274io_ptrs[i], inds[i], val);1275#endif1276LLVMBuildStore(builder, val, id_ptr);1277}1278}12791280/* store for each of the n vertices */1281for (i = 0; i < vector_length; i++) {1282store_aos(gallivm, io_ptrs[i], attr_index, aos[i]);1283}1284}128512861287static void1288convert_to_aos(struct gallivm_state *gallivm,1289LLVMValueRef io,1290LLVMValueRef *indices,1291LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],1292LLVMValueRef clipmask,1293int num_outputs,1294struct lp_type soa_type,1295boolean need_edgeflag)1296{1297LLVMBuilderRef builder = gallivm->builder;1298unsigned chan, attrib, i;12991300#if DEBUG_STORE1301lp_build_printf(gallivm, " # storing begin\n");1302#endif1303for (attrib = 0; attrib < num_outputs; ++attrib) {1304LLVMValueRef soa[TGSI_NUM_CHANNELS];1305LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];1306for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {1307if (outputs[attrib][chan]) {1308LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");1309lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);1310#if DEBUG_STORE1311lp_build_printf(gallivm, "output %d : %d ",1312LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),1313attrib, 0),1314LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),1315chan, 0));1316lp_build_print_value(gallivm, "val = ", out);1317{1318LLVMValueRef iv =1319LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), "");13201321lp_build_print_value(gallivm, " ival = ", iv);1322}1323#endif1324soa[chan] = out;1325}1326else {1327soa[chan] = 0;1328}1329}133013311332if (soa_type.length == TGSI_NUM_CHANNELS) {1333lp_build_transpose_aos(gallivm, soa_type, soa, aos);1334} else {1335lp_build_transpose_aos(gallivm, soa_type, soa, soa);13361337for (i = 0; i < soa_type.length; ++i) {1338aos[i] = lp_build_extract_range(gallivm,1339soa[i % TGSI_NUM_CHANNELS],1340(i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,1341TGSI_NUM_CHANNELS);1342}1343}13441345store_aos_array(gallivm,1346soa_type,1347io, indices,1348aos,1349attrib,1350num_outputs,1351clipmask,1352need_edgeflag);1353}1354#if DEBUG_STORE1355lp_build_printf(gallivm, " # storing end\n");1356#endif1357}135813591360/**1361* Stores original vertex positions in clip coordinates1362*/1363static void1364store_clip(struct gallivm_state *gallivm,1365const struct lp_type vs_type,1366LLVMValueRef io_ptr,1367LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],1368int idx)1369{1370LLVMBuilderRef builder = gallivm->builder;1371LLVMValueRef soa[4];1372LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];1373LLVMValueRef indices[2];1374LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];1375LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];1376LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];1377LLVMTypeRef clip_ptr_type =1378LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context),13794), 0);1380int i, j;13811382indices[0] =1383indices[1] = lp_build_const_int32(gallivm, 0);13841385for (i = 0; i < vs_type.length; i++) {1386inds[i] = lp_build_const_int32(gallivm, i);1387io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");1388}13891390soa[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 .. xn*/1391soa[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 .. yn*/1392soa[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 .. zn*/1393soa[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 .. wn*/13941395for (i = 0; i < vs_type.length; i++) {1396clip_ptrs[i] = draw_jit_header_clip_pos(gallivm, io_ptrs[i]);1397}13981399lp_build_transpose_aos(gallivm, vs_type, soa, soa);1400for (i = 0; i < vs_type.length; ++i) {1401aos[i] = lp_build_extract_range(gallivm,1402soa[i % TGSI_NUM_CHANNELS],1403(i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,1404TGSI_NUM_CHANNELS);1405}14061407for (j = 0; j < vs_type.length; j++) {1408LLVMValueRef clip_ptr;14091410clip_ptr = LLVMBuildGEP(builder, clip_ptrs[j], indices, 2, "clipo");1411clip_ptr = LLVMBuildPointerCast(builder, clip_ptr, clip_ptr_type, "");14121413/* Unaligned store */1414LLVMSetAlignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));1415}1416}141714181419/**1420* Transforms the outputs for viewport mapping1421*/1422static void1423generate_viewport(struct draw_llvm_variant *variant,1424LLVMBuilderRef builder,1425struct lp_type vs_type,1426LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],1427LLVMValueRef context_ptr)1428{1429int i;1430struct gallivm_state *gallivm = variant->gallivm;1431struct lp_type f32_type = vs_type;1432const unsigned pos = variant->llvm->draw->vs.position_output;1433LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);1434LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn*/1435LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0); /*1.0 1.0 1.0 1.0*/1436LLVMValueRef vp_ptr = draw_jit_context_viewports(gallivm, context_ptr);14371438/* We treat pipe_viewport_state as a float array */1439const int scale_index_offset = offsetof(struct pipe_viewport_state, scale) / sizeof(float);1440const int trans_index_offset = offsetof(struct pipe_viewport_state, translate) / sizeof(float);14411442/* for 1/w convention*/1443out3 = LLVMBuildFDiv(builder, const1, out3, "");1444LLVMBuildStore(builder, out3, outputs[pos][3]);14451446/* Viewport Mapping */1447for (i=0; i<3; i++) {1448LLVMValueRef out = LLVMBuildLoad(builder, outputs[pos][i], ""); /*x0 x1 .. xn*/1449LLVMValueRef scale;1450LLVMValueRef trans;1451LLVMValueRef scale_i;1452LLVMValueRef trans_i;1453LLVMValueRef index;14541455index = lp_build_const_int32(gallivm, i + scale_index_offset);1456scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");14571458index = lp_build_const_int32(gallivm, i + trans_index_offset);1459trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");14601461scale = lp_build_broadcast(gallivm, vs_type_llvm,1462LLVMBuildLoad(builder, scale_i, "scale"));1463trans = lp_build_broadcast(gallivm, vs_type_llvm,1464LLVMBuildLoad(builder, trans_i, "trans"));14651466/* divide by w */1467out = LLVMBuildFMul(builder, out, out3, "");1468/* mult by scale, add translation */1469out = lp_build_fmuladd(builder, out, scale, trans);14701471/* store transformed outputs */1472LLVMBuildStore(builder, out, outputs[pos][i]);1473}14741475}147614771478/**1479* Returns clipmask as nxi32 bitmask for the n vertices1480*/1481static LLVMValueRef1482generate_clipmask(struct draw_llvm *llvm,1483struct gallivm_state *gallivm,1484struct lp_type vs_type,1485LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],1486struct draw_llvm_variant_key *key,1487LLVMValueRef context_ptr,1488boolean *have_clipdist)1489{1490LLVMBuilderRef builder = gallivm->builder;1491LLVMValueRef mask; /* stores the <nxi32> clipmasks */1492LLVMValueRef test, temp;1493LLVMValueRef zero, shift;1494LLVMValueRef pos_x, pos_y, pos_z, pos_w;1495LLVMValueRef cv_x, cv_y, cv_z, cv_w;1496LLVMValueRef plane1, planes, plane_ptr, sum;1497struct lp_type f32_type = vs_type;1498struct lp_type i32_type = lp_int_type(vs_type);1499const unsigned pos = llvm->draw->vs.position_output;1500const unsigned cv = llvm->draw->vs.clipvertex_output;1501int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;1502boolean have_cd = false;1503boolean clip_user = key->clip_user;1504unsigned ucp_enable = key->ucp_enable;1505unsigned cd[2];15061507cd[0] = llvm->draw->vs.ccdistance_output[0];1508cd[1] = llvm->draw->vs.ccdistance_output[1];15091510if (cd[0] != pos || cd[1] != pos)1511have_cd = true;15121513if (num_written_clipdistance && !clip_user) {1514clip_user = true;1515ucp_enable = (1 << num_written_clipdistance) - 1;1516}15171518mask = lp_build_const_int_vec(gallivm, i32_type, 0);1519temp = lp_build_const_int_vec(gallivm, i32_type, 0);1520zero = lp_build_const_vec(gallivm, f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */1521shift = lp_build_const_int_vec(gallivm, i32_type, 1); /* 1 1 1 1 */15221523/*1524* load clipvertex and position from correct locations.1525* if they are the same just load them once.1526*/1527pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 .. xn */1528pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 .. yn */1529pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 .. zn */1530pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn */15311532if (clip_user && cv != pos) {1533cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 .. xn */1534cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 .. yn */1535cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 .. zn */1536cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 .. wn */1537} else {1538cv_x = pos_x;1539cv_y = pos_y;1540cv_z = pos_z;1541cv_w = pos_w;1542}15431544/*1545* Be careful with the comparisons and NaNs (using llvm's unordered1546* comparisons here).1547*/1548/* Cliptest, for hardwired planes */1549/*1550* XXX should take guardband into account (currently not in key).1551* Otherwise might run the draw pipeline stages for nothing.1552*/1553if (key->clip_xy) {1554/* plane 1 */1555test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);1556temp = shift;1557test = LLVMBuildAnd(builder, test, temp, "");1558mask = test;15591560/* plane 2 */1561test = LLVMBuildFAdd(builder, pos_x, pos_w, "");1562test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);1563temp = LLVMBuildShl(builder, temp, shift, "");1564test = LLVMBuildAnd(builder, test, temp, "");1565mask = LLVMBuildOr(builder, mask, test, "");15661567/* plane 3 */1568test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);1569temp = LLVMBuildShl(builder, temp, shift, "");1570test = LLVMBuildAnd(builder, test, temp, "");1571mask = LLVMBuildOr(builder, mask, test, "");15721573/* plane 4 */1574test = LLVMBuildFAdd(builder, pos_y, pos_w, "");1575test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);1576temp = LLVMBuildShl(builder, temp, shift, "");1577test = LLVMBuildAnd(builder, test, temp, "");1578mask = LLVMBuildOr(builder, mask, test, "");1579}15801581if (key->clip_z) {1582temp = lp_build_const_int_vec(gallivm, i32_type, 16);1583if (key->clip_halfz) {1584/* plane 5 */1585test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);1586test = LLVMBuildAnd(builder, test, temp, "");1587mask = LLVMBuildOr(builder, mask, test, "");1588}1589else {1590/* plane 5 */1591test = LLVMBuildFAdd(builder, pos_z, pos_w, "");1592test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);1593test = LLVMBuildAnd(builder, test, temp, "");1594mask = LLVMBuildOr(builder, mask, test, "");1595}1596/* plane 6 */1597test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);1598temp = LLVMBuildShl(builder, temp, shift, "");1599test = LLVMBuildAnd(builder, test, temp, "");1600mask = LLVMBuildOr(builder, mask, test, "");1601}16021603if (clip_user) {1604LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr);1605LLVMValueRef indices[3];1606LLVMValueRef is_nan_or_inf;16071608/* userclip planes */1609while (ucp_enable) {1610unsigned plane_idx = ffs(ucp_enable)-1;1611ucp_enable &= ~(1 << plane_idx);1612plane_idx += 6;16131614if (have_cd && num_written_clipdistance) {1615LLVMValueRef clipdist;1616int i;1617i = plane_idx - 6;16181619*have_clipdist = TRUE;1620if (i < 4) {1621clipdist = LLVMBuildLoad(builder, outputs[cd[0]][i], "");1622} else {1623clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], "");1624}1625test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);1626is_nan_or_inf = lp_build_is_inf_or_nan(gallivm, vs_type, clipdist);1627test = LLVMBuildOr(builder, test, is_nan_or_inf, "");1628temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);1629test = LLVMBuildAnd(builder, test, temp, "");1630mask = LLVMBuildOr(builder, mask, test, "");1631} else {1632LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);1633indices[0] = lp_build_const_int32(gallivm, 0);1634indices[1] = lp_build_const_int32(gallivm, plane_idx);16351636indices[2] = lp_build_const_int32(gallivm, 0);1637plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");1638plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");1639planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);1640sum = LLVMBuildFMul(builder, planes, cv_x, "");16411642indices[2] = lp_build_const_int32(gallivm, 1);1643plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");1644plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");1645planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);1646sum = lp_build_fmuladd(builder, planes, cv_y, sum);16471648indices[2] = lp_build_const_int32(gallivm, 2);1649plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");1650plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");1651planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);1652sum = lp_build_fmuladd(builder, planes, cv_z, sum);16531654indices[2] = lp_build_const_int32(gallivm, 3);1655plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");1656plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");1657planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);1658sum = lp_build_fmuladd(builder, planes, cv_w, sum);16591660test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);1661temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);1662test = LLVMBuildAnd(builder, test, temp, "");1663mask = LLVMBuildOr(builder, mask, test, "");1664}1665}1666}1667if (key->need_edgeflags) {1668/*1669* This isn't really part of clipmask but stored the same in vertex1670* header later, so do it here.1671*/1672unsigned edge_attr = llvm->draw->vs.edgeflag_output;1673LLVMValueRef one = lp_build_const_vec(gallivm, f32_type, 1.0);1674LLVMValueRef edgeflag = LLVMBuildLoad(builder, outputs[edge_attr][0], "");1675test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_EQUAL, one, edgeflag);1676temp = lp_build_const_int_vec(gallivm, i32_type,16771LL << DRAW_TOTAL_CLIP_PLANES);1678test = LLVMBuildAnd(builder, test, temp, "");1679mask = LLVMBuildOr(builder, mask, test, "");1680}1681return mask;1682}168316841685/**1686* Returns boolean if any clipping has occurred1687* Used zero/one i8 value to represent boolean1688*/1689static LLVMValueRef1690clipmask_booli8(struct gallivm_state *gallivm,1691const struct lp_type vs_type,1692LLVMValueRef clipmask_bool_ptr,1693boolean edgeflag_in_clipmask)1694{1695LLVMBuilderRef builder = gallivm->builder;1696LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);1697LLVMValueRef clipmask_bool = LLVMBuildLoad(builder, clipmask_bool_ptr, "");1698LLVMValueRef ret;1699struct lp_build_context bldivec;17001701lp_build_context_init(&bldivec, gallivm, lp_int_type(vs_type));17021703/*1704* We need to invert the edgeflag bit from the clipmask here1705* (because the result is really if we want to run the pipeline or not1706* and we (may) need it if edgeflag was 0).1707*/1708if (edgeflag_in_clipmask) {1709LLVMValueRef edge = lp_build_const_int_vec(gallivm, bldivec.type,17101LL << DRAW_TOTAL_CLIP_PLANES);1711clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, "");1712}17131714/*1715* XXX: probably should mask off bits from the mask which come from1716* vertices which were beyond the count (i.e. indices_valid for1717* linear fetches, for elts ones we don't have the correct mask1718* right now). Otherwise might run the pipeline for nothing,1719* though everything should still work.1720*/1721ret = lp_build_any_true_range(&bldivec, vs_type.length, clipmask_bool);1722ret = LLVMBuildZExt(builder, ret, int8_type, "");1723return ret;1724}17251726static LLVMValueRef1727draw_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,1728struct lp_build_context * bld,1729boolean is_vindex_indirect,1730LLVMValueRef vertex_index,1731boolean is_aindex_indirect,1732LLVMValueRef attrib_index,1733LLVMValueRef swizzle_index)1734{1735const struct draw_gs_llvm_iface *gs = draw_gs_llvm_iface(gs_iface);1736struct gallivm_state *gallivm = bld->gallivm;1737LLVMBuilderRef builder = gallivm->builder;1738LLVMValueRef indices[3];1739LLVMValueRef res;1740struct lp_type type = bld->type;17411742if (is_vindex_indirect || is_aindex_indirect) {1743int i;1744res = bld->zero;1745for (i = 0; i < type.length; ++i) {1746LLVMValueRef idx = lp_build_const_int32(gallivm, i);1747LLVMValueRef vert_chan_index = vertex_index;1748LLVMValueRef attr_chan_index = attrib_index;1749LLVMValueRef channel_vec, value;17501751if (is_vindex_indirect) {1752vert_chan_index = LLVMBuildExtractElement(builder,1753vertex_index, idx, "");1754}1755if (is_aindex_indirect) {1756attr_chan_index = LLVMBuildExtractElement(builder,1757attrib_index, idx, "");1758}17591760indices[0] = vert_chan_index;1761indices[1] = attr_chan_index;1762indices[2] = swizzle_index;17631764channel_vec = LLVMBuildGEP(builder, gs->input, indices, 3, "");1765channel_vec = LLVMBuildLoad(builder, channel_vec, "");1766value = LLVMBuildExtractElement(builder, channel_vec, idx, "");17671768res = LLVMBuildInsertElement(builder, res, value, idx, "");1769}1770} else {1771indices[0] = vertex_index;1772indices[1] = attrib_index;1773indices[2] = swizzle_index;17741775res = LLVMBuildGEP(builder, gs->input, indices, 3, "");1776res = LLVMBuildLoad(builder, res, "");1777}17781779return res;1780}17811782static void1783draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,1784struct lp_build_context * bld,1785LLVMValueRef (*outputs)[4],1786LLVMValueRef emitted_vertices_vec,1787LLVMValueRef mask_vec, LLVMValueRef stream_id)1788{1789const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);1790struct draw_gs_llvm_variant *variant = gs_iface->variant;1791struct gallivm_state *gallivm = variant->gallivm;1792LLVMBuilderRef builder = gallivm->builder;1793struct lp_type gs_type = bld->type;1794LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,1795lp_int_type(gs_type), 0);1796LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];1797LLVMValueRef next_prim_offset =1798lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);1799LLVMValueRef io = variant->io_ptr;1800unsigned i;1801const struct tgsi_shader_info *gs_info = &variant->shader->base.info;18021803LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");1804for (i = 0; i < gs_type.length; ++i) {1805LLVMValueRef ind = lp_build_const_int32(gallivm, i);1806LLVMValueRef currently_emitted =1807LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");1808indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");1809indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");1810indices[i] = LLVMBuildSelect(builder, LLVMBuildExtractElement(builder, cond, ind, ""), indices[i],1811lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary - 1), "");1812}18131814LLVMValueRef stream_idx = LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), "");1815LLVMValueRef cnd = LLVMBuildICmp(builder, LLVMIntULT, stream_idx, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");1816struct lp_build_if_state if_ctx;1817lp_build_if(&if_ctx, gallivm, cnd);1818io = lp_build_pointer_get(builder, io, LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), ""));18191820convert_to_aos(gallivm, io, indices,1821outputs, clipmask,1822gs_info->num_outputs, gs_type,1823FALSE);1824lp_build_endif(&if_ctx);1825}18261827static void1828draw_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,1829struct lp_build_context * bld,1830LLVMValueRef total_emitted_vertices_vec_ptr,1831LLVMValueRef verts_per_prim_vec,1832LLVMValueRef emitted_prims_vec,1833LLVMValueRef mask_vec, unsigned stream)1834{1835const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);1836struct draw_gs_llvm_variant *variant = gs_iface->variant;1837struct gallivm_state *gallivm = variant->gallivm;1838LLVMBuilderRef builder = gallivm->builder;1839LLVMValueRef prim_lengts_ptr =1840draw_gs_jit_prim_lengths(variant->gallivm, variant->context_ptr);1841unsigned i;18421843LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");1844for (i = 0; i < bld->type.length; ++i) {1845LLVMValueRef ind = lp_build_const_int32(gallivm, i);1846LLVMValueRef prims_emitted =1847LLVMBuildExtractElement(builder, emitted_prims_vec, ind, "");1848LLVMValueRef store_ptr;1849LLVMValueRef num_vertices =1850LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");18511852LLVMValueRef this_cond = LLVMBuildExtractElement(gallivm->builder, cond, ind, "");1853struct lp_build_if_state ifthen;1854lp_build_if(&ifthen, gallivm, this_cond);1855prims_emitted = LLVMBuildMul(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");1856prims_emitted = LLVMBuildAdd(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, stream), "");1857store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, "");1858store_ptr = LLVMBuildLoad(builder, store_ptr, "");1859store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, "");1860LLVMBuildStore(builder, num_vertices, store_ptr);1861lp_build_endif(&ifthen);1862}1863}18641865static void1866draw_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,1867LLVMValueRef total_emitted_vertices_vec,1868LLVMValueRef emitted_prims_vec, unsigned stream)1869{1870const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);1871struct draw_gs_llvm_variant *variant = gs_iface->variant;1872struct gallivm_state *gallivm = variant->gallivm;1873LLVMBuilderRef builder = gallivm->builder;1874LLVMValueRef emitted_verts_ptr =1875draw_gs_jit_emitted_vertices(gallivm, variant->context_ptr);1876LLVMValueRef emitted_prims_ptr =1877draw_gs_jit_emitted_prims(gallivm, variant->context_ptr);1878LLVMValueRef stream_val = lp_build_const_int32(gallivm, stream);18791880emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &stream_val, 1, "");1881emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &stream_val, 1, "");18821883LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);1884LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);1885}18861887static void1888draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)1889{1890struct gallivm_state *gallivm = variant->gallivm;1891LLVMContextRef context = gallivm->context;1892LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);1893LLVMTypeRef arg_types[13];1894unsigned num_arg_types = ARRAY_SIZE(arg_types);1895LLVMTypeRef func_type;1896LLVMValueRef context_ptr;1897LLVMBasicBlockRef block;1898LLVMBuilderRef builder;1899char func_name[64];1900struct lp_type vs_type;1901LLVMValueRef count, fetch_elts, start_or_maxelt;1902LLVMValueRef vertex_id_offset;1903LLVMValueRef stride, step, io_itr;1904LLVMValueRef ind_vec, start_vec, have_elts, fetch_max, tmp;1905LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;1906LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS];1907LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS];1908LLVMValueRef buffer_size_adj[PIPE_MAX_ATTRIBS];1909LLVMValueRef instance_index[PIPE_MAX_ATTRIBS];1910LLVMValueRef fake_buf_ptr, fake_buf;19111912struct draw_context *draw = llvm->draw;1913const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;1914unsigned i, j;1915struct lp_build_context bld, blduivec;1916struct lp_build_loop_state lp_loop;1917struct lp_build_if_state if_ctx;1918const int vector_length = lp_native_vector_width / 32;1919LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];1920struct lp_build_sampler_soa *sampler = 0;1921struct lp_build_image_soa *image = NULL;1922LLVMValueRef ret, clipmask_bool_ptr;1923struct draw_llvm_variant_key *key = &variant->key;1924/* If geometry shader is present we need to skip both the viewport1925* transformation and clipping otherwise the inputs to the geometry1926* shader will be incorrect.1927* The code can't handle vp transform when vs writes vp index neither1928* (though this would be fixable here, but couldn't just broadcast1929* the values).1930*/1931const boolean bypass_viewport = key->has_gs_or_tes || key->bypass_viewport ||1932vs_info->writes_viewport_index;1933const boolean enable_cliptest = !key->has_gs_or_tes && (key->clip_xy ||1934key->clip_z ||1935key->clip_user ||1936key->need_edgeflags);1937LLVMValueRef variant_func;1938const unsigned pos = draw->vs.position_output;1939const unsigned cv = draw->vs.clipvertex_output;1940boolean have_clipdist = FALSE;1941struct lp_bld_tgsi_system_values system_values;19421943memset(&system_values, 0, sizeof(system_values));1944memset(&outputs, 0, sizeof(outputs));1945snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant");19461947i = 0;1948arg_types[i++] = get_context_ptr_type(variant); /* context */1949arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */1950arg_types[i++] = get_buffer_ptr_type(variant); /* vbuffers */1951arg_types[i++] = int32_type; /* count */1952arg_types[i++] = int32_type; /* start/fetch_elt_max */1953arg_types[i++] = int32_type; /* stride */1954arg_types[i++] = get_vb_ptr_type(variant); /* pipe_vertex_buffer's */1955arg_types[i++] = int32_type; /* instance_id */1956arg_types[i++] = int32_type; /* vertex_id_offset */1957arg_types[i++] = int32_type; /* start_instance */1958arg_types[i++] = LLVMPointerType(int32_type, 0); /* fetch_elts */1959arg_types[i++] = int32_type; /* draw_id */1960arg_types[i++] = int32_type; /* view_id */19611962func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),1963arg_types, num_arg_types, 0);19641965variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);1966variant->function = variant_func;19671968LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);1969for (i = 0; i < num_arg_types; ++i)1970if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)1971lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);19721973if (gallivm->cache && gallivm->cache->data_size)1974return;1975context_ptr = LLVMGetParam(variant_func, 0);1976io_ptr = LLVMGetParam(variant_func, 1);1977vbuffers_ptr = LLVMGetParam(variant_func, 2);1978count = LLVMGetParam(variant_func, 3);1979/*1980* XXX: the maxelt part is unused. Not really useful, since we cannot1981* get index buffer overflows due to vsplit (which provides its own1982* elts buffer, with a different size than what's passed in here).1983*/1984start_or_maxelt = LLVMGetParam(variant_func, 4);1985/*1986* XXX: stride is actually unused. The stride we use is strictly calculated1987* from the number of outputs (including the draw_extra outputs).1988* Should probably fix some day (we need a new vs just because of extra1989* outputs which the generated vs won't touch).1990*/1991stride = LLVMGetParam(variant_func, 5);1992vb_ptr = LLVMGetParam(variant_func, 6);1993system_values.instance_id = LLVMGetParam(variant_func, 7);1994vertex_id_offset = LLVMGetParam(variant_func, 8);1995system_values.base_instance = LLVMGetParam(variant_func, 9);1996fetch_elts = LLVMGetParam(variant_func, 10);1997system_values.draw_id = LLVMGetParam(variant_func, 11);1998system_values.view_index = LLVMGetParam(variant_func, 12);19992000lp_build_name(context_ptr, "context");2001lp_build_name(io_ptr, "io");2002lp_build_name(vbuffers_ptr, "vbuffers");2003lp_build_name(count, "count");2004lp_build_name(start_or_maxelt, "start_or_maxelt");2005lp_build_name(stride, "stride");2006lp_build_name(vb_ptr, "vb");2007lp_build_name(system_values.instance_id, "instance_id");2008lp_build_name(vertex_id_offset, "vertex_id_offset");2009lp_build_name(system_values.base_instance, "start_instance");2010lp_build_name(fetch_elts, "fetch_elts");2011lp_build_name(system_values.draw_id, "draw_id");20122013/*2014* Function body2015*/20162017block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");2018builder = gallivm->builder;2019LLVMPositionBuilderAtEnd(builder, block);20202021memset(&vs_type, 0, sizeof vs_type);2022vs_type.floating = TRUE; /* floating point values */2023vs_type.sign = TRUE; /* values are signed */2024vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */2025vs_type.width = 32; /* 32-bit float */2026vs_type.length = vector_length;20272028lp_build_context_init(&bld, gallivm, lp_type_uint(32));2029lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));20302031/* hold temporary "bool" clipmask */2032clipmask_bool_ptr = lp_build_alloca(gallivm, blduivec.vec_type, "");20332034fake_buf = lp_build_alloca_undef(gallivm,2035LLVMVectorType(LLVMInt64TypeInContext(context), 4), "");2036fake_buf = LLVMBuildBitCast(builder, fake_buf,2037LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");2038fake_buf_ptr = LLVMBuildGEP(builder, fake_buf, &bld.zero, 1, "");20392040/* code generated texture sampling */2041sampler = draw_llvm_sampler_soa_create(draw_llvm_variant_key_samplers(key), key->nr_samplers);20422043image = draw_llvm_image_soa_create(draw_llvm_variant_key_images(key),2044key->nr_images);20452046step = lp_build_const_int32(gallivm, vector_length);20472048ind_vec = blduivec.undef;2049for (i = 0; i < vs_type.length; i++) {2050LLVMValueRef index = lp_build_const_int32(gallivm, i);2051ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, "");2052}20532054have_elts = LLVMBuildICmp(builder, LLVMIntNE,2055LLVMConstPointerNull(arg_types[10]), fetch_elts, "");20562057fetch_max = LLVMBuildSub(builder, count, bld.one, "fetch_max");2058fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max);2059/*2060* Only needed for non-indexed path.2061*/2062start_vec = lp_build_broadcast_scalar(&blduivec, start_or_maxelt);20632064/*2065* Pre-calculate everything which is constant per shader invocation.2066*/2067for (j = 0; j < key->nr_vertex_elements; ++j) {2068LLVMValueRef vb_buffer_offset, buffer_size, temp_ptr;2069LLVMValueRef vb_info, vbuffer_ptr, buf_offset, ofbit;2070struct pipe_vertex_element *velem = &key->vertex_element[j];2071LLVMValueRef vb_index =2072lp_build_const_int32(gallivm, velem->vertex_buffer_index);2073LLVMValueRef bsize = lp_build_const_int32(gallivm,2074util_format_get_blocksize(velem->src_format));2075LLVMValueRef src_offset = lp_build_const_int32(gallivm,2076velem->src_offset);2077struct lp_build_if_state if_ctx;20782079if (velem->src_format != PIPE_FORMAT_NONE) {2080vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &vb_index, 1, "");2081vb_info = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");2082vb_stride[j] = draw_jit_vbuffer_stride(gallivm, vb_info);2083vb_stride[j] = LLVMBuildZExt(gallivm->builder, vb_stride[j],2084LLVMInt32TypeInContext(context), "");2085vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vb_info);2086map_ptr[j] = draw_jit_dvbuffer_map(gallivm, vbuffer_ptr);2087buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);20882089ofbit = NULL;2090/*2091* We'll set buffer_size_adj to zero if we have of, so it will2092* always overflow later automatically without having to keep ofbit.2093* Overflows (with normal wraparound) doing the actual offset2094* calculation should be ok, just not for the buffer size calc.2095* It would also be possible to detect such overflows and return2096* zeros if that happens, but this would be more complex.2097*/2098buf_offset = lp_build_add(&bld, vb_buffer_offset, src_offset);2099tmp = lp_build_sub(&bld, bsize, bld.one);2100buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp,2101&ofbit);2102buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size_adj[j],2103buf_offset, &ofbit);21042105/*2106* We can't easily set fake vertex buffers outside the generated code.2107* Hence, set fake vertex buffers here instead basically, so fetch2108* code can always fetch using offset 0, eliminating all control flow2109* inside the main loop.2110* (Alternatively, could have control flow per vector skipping fetch2111* if ofbit is true.)2112*/2113if (velem->instance_divisor) {2114/*2115* Index is equal to the start instance plus the number of current2116* instance divided by the divisor. In this case we compute it as:2117* index = start_instance + (instance_id / divisor).2118* Note we could actually do the fetch here, outside the loop -2119* it's all constant, hopefully llvm recognizes this.2120*/2121LLVMValueRef current_instance;2122current_instance = LLVMBuildUDiv(builder, system_values.instance_id,2123lp_build_const_int32(gallivm,2124velem->instance_divisor),2125"instance_divisor");2126instance_index[j] = lp_build_uadd_overflow(gallivm, system_values.base_instance,2127current_instance, &ofbit);2128}21292130buffer_size_adj[j] = LLVMBuildSelect(builder, ofbit, bld.zero,2131buffer_size_adj[j], "");21322133temp_ptr = lp_build_alloca_undef(gallivm,2134LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");21352136lp_build_if(&if_ctx, gallivm, ofbit);2137{2138LLVMBuildStore(builder, fake_buf_ptr, temp_ptr);2139}2140lp_build_else(&if_ctx);2141{2142map_ptr[j] = LLVMBuildGEP(builder, map_ptr[j], &buf_offset, 1, "");2143LLVMBuildStore(builder, map_ptr[j], temp_ptr);2144}2145lp_build_endif(&if_ctx);2146map_ptr[j] = LLVMBuildLoad(builder, temp_ptr, "map_ptr");21472148if (0) {2149lp_build_printf(gallivm, "velem %d, vbuf index = %u, vb_stride = %u\n",2150lp_build_const_int32(gallivm, j),2151vb_index, vb_stride[j]);2152lp_build_printf(gallivm,2153" vb_buffer_offset = %u, src_offset = %u, buf_offset = %u\n",2154vb_buffer_offset, src_offset, buf_offset);2155lp_build_printf(gallivm, " buffer size = %u, blocksize = %u\n",2156buffer_size, bsize);2157lp_build_printf(gallivm, " instance_id = %u\n", system_values.instance_id);2158}2159}2160}21612162lp_build_loop_begin(&lp_loop, gallivm, bld.zero);2163{2164LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];2165LLVMValueRef io;2166LLVMValueRef clipmask; /* holds the clipmask value */2167LLVMValueRef true_index_array, index_store;2168const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];21692170io_itr = lp_loop.counter;21712172io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");2173#if DEBUG_STORE2174lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",2175io_itr, io, lp_loop.counter);2176#endif21772178true_index_array = lp_build_broadcast_scalar(&blduivec, lp_loop.counter);2179true_index_array = LLVMBuildAdd(builder, true_index_array, ind_vec, "");21802181LLVMValueRef exec_mask = lp_build_cmp(&blduivec, PIPE_FUNC_LEQUAL, true_index_array, fetch_max);2182/*2183* Limit indices to fetch_max, otherwise might try to access indices2184* beyond index buffer (or rather vsplit elt buffer) size.2185* Could probably safely (?) skip this for non-indexed draws and2186* simplify things minimally (by removing it could combine the ind_vec2187* and start_vec adds). I think the only effect for non-indexed draws will2188* be that for the invalid elements they will be all fetched from the2189* same location as the last valid one, but noone should really care.2190*/2191true_index_array = lp_build_min(&blduivec, true_index_array, fetch_max);21922193index_store = lp_build_alloca_undef(gallivm, blduivec.vec_type, "index_store");21942195lp_build_if(&if_ctx, gallivm, have_elts);2196{2197/*2198* Note: you'd expect some comparison/clamp against fetch_elt_max2199* here.2200* There used to be one here but it was incorrect: overflow was2201* detected if index > fetch_elt_max - but the correct condition2202* would be index >= fetch_elt_max (since this is just size of elts2203* buffer / element size).2204* Using the correct condition however will cause failures - due to2205* vsplit/vcache code which rebases indices. So, as an example, if2206* fetch_elt_max is just 1 and fetch_count 2, vsplit cache will2207* replace all invalid indices with 0 - which in case of elt_bias2208* not being zero will get a different fetch index than the valid2209* index 0. So, just rely on vsplit code preventing out-of-bounds2210* fetches. This is also why it's safe to do elts fetch even if there2211* was no index buffer bound - the real buffer is never seen here, at2212* least not if there are index buffer overflows...2213*/22142215/*2216* XXX should not have to do this, as scale can be handled2217* natively by loads (hits asserts though).2218*/2219tmp = lp_build_shl_imm(&blduivec, true_index_array, 2);2220fetch_elts = LLVMBuildBitCast(builder, fetch_elts,2221LLVMPointerType(LLVMInt8TypeInContext(context),22220), "");2223tmp = lp_build_gather(gallivm, vs_type.length,222432, bld.type, TRUE,2225fetch_elts, tmp, FALSE);2226LLVMBuildStore(builder, tmp, index_store);2227}2228lp_build_else(&if_ctx);2229{2230tmp = LLVMBuildAdd(builder, true_index_array, start_vec, "");2231LLVMBuildStore(builder, tmp, index_store);2232}2233lp_build_endif(&if_ctx);22342235true_index_array = LLVMBuildLoad(builder, index_store, "");22362237for (j = 0; j < key->nr_vertex_elements; ++j) {2238struct pipe_vertex_element *velem = &key->vertex_element[j];2239const struct util_format_description *format_desc =2240util_format_description(velem->src_format);22412242if (format_desc->format == PIPE_FORMAT_NONE) {2243for (i = 0; i < TGSI_NUM_CHANNELS; i++) {2244inputs[j][i] = lp_build_zero(gallivm, vs_type);2245}2246}2247else if (velem->instance_divisor) {2248fetch_instanced(gallivm, format_desc, vs_type,2249vb_stride[j], map_ptr[j],2250buffer_size_adj[j],2251inputs[j], instance_index[j]);2252}2253else {2254fetch_vector(gallivm, format_desc, vs_type,2255vb_stride[j], map_ptr[j],2256buffer_size_adj[j],2257inputs[j], true_index_array);2258}2259}22602261struct lp_build_mask_context mask;22622263lp_build_mask_begin(&mask, gallivm, vs_type, exec_mask);2264/* In the paths with elts vertex id has to be unaffected by the2265* index bias and because indices inside our elements array have2266* already had index bias applied we need to subtract it here to2267* get back to the original index.2268* in the linear paths vertex id has to be unaffected by the2269* original start index and because we abuse the 'start' variable2270* to either represent the actual start index or the index at which2271* the primitive was split (we split rendering into chunks of at2272* most 4095-vertices) we need to back out the original start2273* index out of our vertex id here.2274* for ARB_shader_draw_parameters, base_vertex should be 0 for non-indexed draws.2275*/2276LLVMValueRef base_vertex = lp_build_select(&bld, have_elts, vertex_id_offset, lp_build_const_int32(gallivm, 0));2277system_values.basevertex = lp_build_broadcast_scalar(&blduivec, base_vertex);2278/* first vertex is for Vulkan base vertex support */2279LLVMValueRef first_vertex = lp_build_select(&bld, have_elts, vertex_id_offset, start_or_maxelt);2280system_values.firstvertex = lp_build_broadcast_scalar(&blduivec, first_vertex);2281system_values.vertex_id = true_index_array;2282system_values.vertex_id_nobase = LLVMBuildSub(builder, true_index_array,2283lp_build_broadcast_scalar(&blduivec, vertex_id_offset), "");22842285ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;2286generate_vs(variant,2287builder,2288vs_type,2289outputs,2290ptr_aos,2291&system_values,2292context_ptr,2293sampler,2294image,2295key->clamp_vertex_color,2296&mask);22972298lp_build_mask_end(&mask);2299if (pos != -1 && cv != -1) {2300/* store original positions in clip before further manipulation */2301store_clip(gallivm, vs_type, io, outputs, pos);23022303/* do cliptest */2304if (enable_cliptest) {2305LLVMValueRef temp = LLVMBuildLoad(builder, clipmask_bool_ptr, "");2306/* allocate clipmask, assign it integer type */2307clipmask = generate_clipmask(llvm,2308gallivm,2309vs_type,2310outputs,2311key,2312context_ptr, &have_clipdist);2313temp = LLVMBuildOr(builder, clipmask, temp, "");2314/* store temporary clipping boolean value */2315LLVMBuildStore(builder, temp, clipmask_bool_ptr);2316}2317else {2318clipmask = blduivec.zero;2319}23202321/* do viewport mapping */2322if (!bypass_viewport) {2323generate_viewport(variant, builder, vs_type, outputs, context_ptr);2324}2325}2326else {2327clipmask = blduivec.zero;2328}23292330/* store clipmask in vertex header,2331* original positions in clip2332* and transformed positions in data2333*/2334convert_to_aos(gallivm, io, NULL, outputs, clipmask,2335vs_info->num_outputs, vs_type,2336enable_cliptest && key->need_edgeflags);2337}2338lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);23392340sampler->destroy(sampler);2341image->destroy(image);23422343/* return clipping boolean value for function */2344ret = clipmask_booli8(gallivm, vs_type, clipmask_bool_ptr,2345enable_cliptest && key->need_edgeflags);23462347LLVMBuildRet(builder, ret);23482349gallivm_verify_function(gallivm, variant_func);2350}235123522353struct draw_llvm_variant_key *2354draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)2355{2356unsigned i;2357struct draw_llvm_variant_key *key;2358struct draw_sampler_static_state *draw_sampler;2359struct draw_image_static_state *draw_image;23602361key = (struct draw_llvm_variant_key *)store;23622363memset(key, 0, offsetof(struct draw_llvm_variant_key, vertex_element[0]));23642365key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color; /**/23662367/* will have to rig this up properly later */2368key->clip_xy = llvm->draw->clip_xy;2369key->clip_z = llvm->draw->clip_z;2370key->clip_user = llvm->draw->clip_user;2371key->bypass_viewport = llvm->draw->bypass_viewport;2372key->clip_halfz = llvm->draw->rasterizer->clip_halfz;2373/* XXX assumes edgeflag output not at 0 */2374key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);2375key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;2376key->has_gs_or_tes = llvm->draw->gs.geometry_shader != NULL || llvm->draw->tes.tess_eval_shader != NULL;2377key->num_outputs = draw_total_vs_outputs(llvm->draw);23782379/* All variants of this shader will have the same value for2380* nr_samplers. Not yet trying to compact away holes in the2381* sampler array.2382*/2383key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;2384if (llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {2385key->nr_sampler_views =2386llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;2387}2388else {2389key->nr_sampler_views = key->nr_samplers;2390}23912392key->nr_images = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_IMAGE] + 1;23932394/* Presumably all variants of the shader should have the same2395* number of vertex elements - ie the number of shader inputs.2396* NOTE: we NEED to store the needed number of needed inputs2397* here, not the number of provided elements to match keysize2398* (and the offset of sampler state in the key).2399* If we have excess number of vertex elements, this is valid,2400* but the excess ones don't matter.2401* If we don't have enough vertex elements (which looks not really2402* valid but we'll handle it gracefully) fill out missing ones with2403* zero (we'll recognize these later by PIPE_FORMAT_NONE).2404*/2405key->nr_vertex_elements =2406llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 1;24072408if (llvm->draw->pt.nr_vertex_elements < key->nr_vertex_elements) {2409debug_printf("draw: vs with %d inputs but only have %d vertex elements\n",2410key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements);2411memset(key->vertex_element, 0,2412sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);2413}2414memcpy(key->vertex_element,2415llvm->draw->pt.vertex_element,2416sizeof(struct pipe_vertex_element) *2417MIN2(key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements));24182419draw_sampler = draw_llvm_variant_key_samplers(key);2420memset(draw_sampler, 0,2421MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);24222423for (i = 0 ; i < key->nr_samplers; i++) {2424lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,2425llvm->draw->samplers[PIPE_SHADER_VERTEX][i]);2426}2427for (i = 0 ; i < key->nr_sampler_views; i++) {2428lp_sampler_static_texture_state(&draw_sampler[i].texture_state,2429llvm->draw->sampler_views[PIPE_SHADER_VERTEX][i]);2430}24312432draw_image = draw_llvm_variant_key_images(key);2433memset(draw_image, 0,2434key->nr_images * sizeof *draw_image);2435for (i = 0; i < key->nr_images; i++) {2436lp_sampler_static_texture_state_image(&draw_image[i].image_state,2437llvm->draw->images[PIPE_SHADER_VERTEX][i]);2438}2439return key;2440}244124422443void2444draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)2445{2446unsigned i;2447struct draw_sampler_static_state *sampler = draw_llvm_variant_key_samplers(key);2448struct draw_image_static_state *image = draw_llvm_variant_key_images(key);2449debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);2450debug_printf("clip_xy = %u\n", key->clip_xy);2451debug_printf("clip_z = %u\n", key->clip_z);2452debug_printf("clip_user = %u\n", key->clip_user);2453debug_printf("bypass_viewport = %u\n", key->bypass_viewport);2454debug_printf("clip_halfz = %u\n", key->clip_halfz);2455debug_printf("need_edgeflags = %u\n", key->need_edgeflags);2456debug_printf("has_gs_or_tes = %u\n", key->has_gs_or_tes);2457debug_printf("ucp_enable = %u\n", key->ucp_enable);24582459for (i = 0 ; i < key->nr_vertex_elements; i++) {2460debug_printf("vertex_element[%i].src_offset = %u\n", i, key->vertex_element[i].src_offset);2461debug_printf("vertex_element[%i].instance_divisor = %u\n", i, key->vertex_element[i].instance_divisor);2462debug_printf("vertex_element[%i].vertex_buffer_index = %u\n", i, key->vertex_element[i].vertex_buffer_index);2463debug_printf("vertex_element[%i].src_format = %s\n", i, util_format_name(key->vertex_element[i].src_format));2464}24652466for (i = 0 ; i < key->nr_sampler_views; i++) {2467debug_printf("sampler[%i].src_format = %s\n", i, util_format_name(sampler[i].texture_state.format));2468}24692470for (i = 0 ; i < key->nr_images; i++)2471debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));2472}247324742475void2476draw_llvm_set_mapped_texture(struct draw_context *draw,2477enum pipe_shader_type shader_stage,2478unsigned sview_idx,2479uint32_t width, uint32_t height, uint32_t depth,2480uint32_t first_level, uint32_t last_level,2481uint32_t num_samples,2482uint32_t sample_stride,2483const void *base_ptr,2484uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],2485uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],2486uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])2487{2488unsigned j;2489struct draw_jit_texture *jit_tex;24902491switch (shader_stage) {2492case PIPE_SHADER_VERTEX:2493assert(sview_idx < ARRAY_SIZE(draw->llvm->jit_context.textures));2494jit_tex = &draw->llvm->jit_context.textures[sview_idx];2495break;2496case PIPE_SHADER_GEOMETRY:2497assert(sview_idx < ARRAY_SIZE(draw->llvm->gs_jit_context.textures));2498jit_tex = &draw->llvm->gs_jit_context.textures[sview_idx];2499break;2500case PIPE_SHADER_TESS_CTRL:2501assert(sview_idx < ARRAY_SIZE(draw->llvm->tcs_jit_context.textures));2502jit_tex = &draw->llvm->tcs_jit_context.textures[sview_idx];2503break;2504case PIPE_SHADER_TESS_EVAL:2505assert(sview_idx < ARRAY_SIZE(draw->llvm->tes_jit_context.textures));2506jit_tex = &draw->llvm->tes_jit_context.textures[sview_idx];2507break;2508default:2509assert(0);2510return;2511}25122513jit_tex->width = width;2514jit_tex->height = height;2515jit_tex->depth = depth;2516jit_tex->first_level = first_level;2517jit_tex->last_level = last_level;2518jit_tex->base = base_ptr;2519jit_tex->num_samples = num_samples;2520jit_tex->sample_stride = sample_stride;25212522for (j = first_level; j <= last_level; j++) {2523jit_tex->mip_offsets[j] = mip_offsets[j];2524jit_tex->row_stride[j] = row_stride[j];2525jit_tex->img_stride[j] = img_stride[j];2526}2527}25282529void2530draw_llvm_set_mapped_image(struct draw_context *draw,2531enum pipe_shader_type shader_stage,2532unsigned idx,2533uint32_t width, uint32_t height, uint32_t depth,2534const void *base_ptr,2535uint32_t row_stride,2536uint32_t img_stride,2537uint32_t num_samples,2538uint32_t sample_stride)2539{2540struct draw_jit_image *jit_image;25412542switch (shader_stage) {2543case PIPE_SHADER_VERTEX:2544assert(idx < ARRAY_SIZE(draw->llvm->jit_context.images));2545jit_image = &draw->llvm->jit_context.images[idx];2546break;2547case PIPE_SHADER_GEOMETRY:2548assert(idx < ARRAY_SIZE(draw->llvm->gs_jit_context.images));2549jit_image = &draw->llvm->gs_jit_context.images[idx];2550break;2551case PIPE_SHADER_TESS_CTRL:2552assert(idx < ARRAY_SIZE(draw->llvm->tcs_jit_context.images));2553jit_image = &draw->llvm->tcs_jit_context.images[idx];2554break;2555case PIPE_SHADER_TESS_EVAL:2556assert(idx < ARRAY_SIZE(draw->llvm->tes_jit_context.images));2557jit_image = &draw->llvm->tes_jit_context.images[idx];2558break;2559default:2560assert(0);2561return;2562}25632564jit_image->width = width;2565jit_image->height = height;2566jit_image->depth = depth;2567jit_image->base = base_ptr;25682569jit_image->row_stride = row_stride;2570jit_image->img_stride = img_stride;2571jit_image->num_samples = num_samples;2572jit_image->sample_stride = sample_stride;2573}257425752576void2577draw_llvm_set_sampler_state(struct draw_context *draw,2578enum pipe_shader_type shader_type)2579{2580unsigned i;25812582switch (shader_type) {2583case PIPE_SHADER_VERTEX:2584for (i = 0; i < draw->num_samplers[PIPE_SHADER_VERTEX]; i++) {2585struct draw_jit_sampler *jit_sam = &draw->llvm->jit_context.samplers[i];25862587if (draw->samplers[PIPE_SHADER_VERTEX][i]) {2588const struct pipe_sampler_state *s2589= draw->samplers[PIPE_SHADER_VERTEX][i];2590jit_sam->min_lod = s->min_lod;2591jit_sam->max_lod = s->max_lod;2592jit_sam->lod_bias = s->lod_bias;2593COPY_4V(jit_sam->border_color, s->border_color.f);2594}2595}2596break;2597case PIPE_SHADER_GEOMETRY:2598for (i = 0; i < draw->num_samplers[PIPE_SHADER_GEOMETRY]; i++) {2599struct draw_jit_sampler *jit_sam = &draw->llvm->gs_jit_context.samplers[i];26002601if (draw->samplers[PIPE_SHADER_GEOMETRY][i]) {2602const struct pipe_sampler_state *s2603= draw->samplers[PIPE_SHADER_GEOMETRY][i];2604jit_sam->min_lod = s->min_lod;2605jit_sam->max_lod = s->max_lod;2606jit_sam->lod_bias = s->lod_bias;2607COPY_4V(jit_sam->border_color, s->border_color.f);2608}2609}2610break;2611case PIPE_SHADER_TESS_CTRL:2612for (i = 0; i < draw->num_samplers[PIPE_SHADER_TESS_CTRL]; i++) {2613struct draw_jit_sampler *jit_sam = &draw->llvm->tcs_jit_context.samplers[i];26142615if (draw->samplers[PIPE_SHADER_TESS_CTRL][i]) {2616const struct pipe_sampler_state *s2617= draw->samplers[PIPE_SHADER_TESS_CTRL][i];2618jit_sam->min_lod = s->min_lod;2619jit_sam->max_lod = s->max_lod;2620jit_sam->lod_bias = s->lod_bias;2621COPY_4V(jit_sam->border_color, s->border_color.f);2622}2623}2624break;2625case PIPE_SHADER_TESS_EVAL:2626for (i = 0; i < draw->num_samplers[PIPE_SHADER_TESS_EVAL]; i++) {2627struct draw_jit_sampler *jit_sam = &draw->llvm->tes_jit_context.samplers[i];26282629if (draw->samplers[PIPE_SHADER_TESS_EVAL][i]) {2630const struct pipe_sampler_state *s2631= draw->samplers[PIPE_SHADER_TESS_EVAL][i];2632jit_sam->min_lod = s->min_lod;2633jit_sam->max_lod = s->max_lod;2634jit_sam->lod_bias = s->lod_bias;2635COPY_4V(jit_sam->border_color, s->border_color.f);2636}2637}2638break;2639default:2640assert(0);2641break;2642}2643}264426452646void2647draw_llvm_destroy_variant(struct draw_llvm_variant *variant)2648{2649struct draw_llvm *llvm = variant->llvm;26502651if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {2652debug_printf("Deleting VS variant: %u vs variants,\t%u total variants\n",2653variant->shader->variants_cached, llvm->nr_variants);2654}26552656gallivm_destroy(variant->gallivm);26572658remove_from_list(&variant->list_item_local);2659variant->shader->variants_cached--;2660remove_from_list(&variant->list_item_global);2661llvm->nr_variants--;2662FREE(variant);2663}266426652666/**2667* Create LLVM types for various structures.2668*/2669static void2670create_gs_jit_types(struct draw_gs_llvm_variant *var)2671{2672struct gallivm_state *gallivm = var->gallivm;2673LLVMTypeRef texture_type, sampler_type, image_type, context_type;26742675texture_type = create_jit_texture_type(gallivm, "texture");2676sampler_type = create_jit_sampler_type(gallivm, "sampler");2677image_type = create_jit_image_type(gallivm, "image");26782679context_type = create_gs_jit_context_type(gallivm,2680var->shader->base.vector_length,2681texture_type, sampler_type,2682image_type,2683"draw_gs_jit_context");2684var->context_ptr_type = LLVMPointerType(context_type, 0);26852686var->input_array_type = create_gs_jit_input_type(gallivm);2687}26882689static LLVMTypeRef2690get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant)2691{2692if (!variant->context_ptr_type)2693create_gs_jit_types(variant);2694return variant->context_ptr_type;2695}26962697static LLVMValueRef2698generate_mask_value(struct draw_gs_llvm_variant *variant,2699struct lp_type gs_type)2700{2701struct gallivm_state *gallivm = variant->gallivm;2702LLVMBuilderRef builder = gallivm->builder;2703struct lp_type mask_type = lp_int_type(gs_type);2704LLVMValueRef num_prims;2705LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);2706unsigned i;27072708num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),2709variant->num_prims);2710for (i = 0; i < gs_type.length; i++) {2711LLVMValueRef idx = lp_build_const_int32(gallivm, i);2712mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");2713}2714mask_val = lp_build_compare(gallivm, mask_type,2715PIPE_FUNC_GREATER, num_prims, mask_val);27162717return mask_val;2718}27192720static void2721draw_gs_llvm_generate(struct draw_llvm *llvm,2722struct draw_gs_llvm_variant *variant)2723{2724struct gallivm_state *gallivm = variant->gallivm;2725LLVMContextRef context = gallivm->context;2726LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);2727LLVMTypeRef arg_types[8];2728LLVMTypeRef func_type;2729LLVMValueRef variant_func;2730LLVMValueRef context_ptr;2731LLVMValueRef prim_id_ptr;2732LLVMBasicBlockRef block;2733LLVMBuilderRef builder;2734LLVMValueRef io_ptr, input_array, num_prims, mask_val;2735struct lp_build_sampler_soa *sampler = 0;2736struct lp_build_image_soa *image = NULL;2737struct lp_build_context bld;2738struct lp_bld_tgsi_system_values system_values;2739char func_name[64];2740struct lp_type gs_type;2741unsigned i;2742struct draw_gs_llvm_iface gs_iface;2743const struct tgsi_token *tokens = variant->shader->base.state.tokens;2744LLVMValueRef consts_ptr, num_consts_ptr;2745LLVMValueRef ssbos_ptr, num_ssbos_ptr;2746LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];2747struct lp_build_mask_context mask;2748const struct tgsi_shader_info *gs_info = &variant->shader->base.info;2749unsigned vector_length = variant->shader->base.vector_length;27502751memset(&system_values, 0, sizeof(system_values));2752memset(&outputs, 0, sizeof(outputs));27532754snprintf(func_name, sizeof(func_name), "draw_llvm_gs_variant");27552756assert(variant->vertex_header_ptr_type);27572758arg_types[0] = get_gs_context_ptr_type(variant); /* context */2759arg_types[1] = variant->input_array_type; /* input */2760arg_types[2] = LLVMPointerType(variant->vertex_header_ptr_type, 0); /* vertex_header */2761arg_types[3] = int32_type; /* num_prims */2762arg_types[4] = int32_type; /* instance_id */2763arg_types[5] = LLVMPointerType(2764LLVMVectorType(int32_type, vector_length), 0); /* prim_id_ptr */2765arg_types[6] = int32_type;2766arg_types[7] = int32_type;27672768func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);27692770variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);27712772variant->function = variant_func;27732774LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);27752776for (i = 0; i < ARRAY_SIZE(arg_types); ++i)2777if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)2778lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);27792780if (gallivm->cache && gallivm->cache->data_size)2781return;2782context_ptr = LLVMGetParam(variant_func, 0);2783input_array = LLVMGetParam(variant_func, 1);2784io_ptr = LLVMGetParam(variant_func, 2);2785num_prims = LLVMGetParam(variant_func, 3);2786system_values.instance_id = LLVMGetParam(variant_func, 4);2787prim_id_ptr = LLVMGetParam(variant_func, 5);2788system_values.invocation_id = LLVMGetParam(variant_func, 6);2789system_values.view_index = LLVMGetParam(variant_func, 7);27902791lp_build_name(context_ptr, "context");2792lp_build_name(input_array, "input");2793lp_build_name(io_ptr, "io");2794lp_build_name(num_prims, "num_prims");2795lp_build_name(system_values.instance_id, "instance_id");2796lp_build_name(prim_id_ptr, "prim_id_ptr");2797lp_build_name(system_values.invocation_id, "invocation_id");2798lp_build_name(system_values.view_index, "view_index");27992800variant->context_ptr = context_ptr;2801variant->io_ptr = io_ptr;2802variant->num_prims = num_prims;28032804gs_iface.base.fetch_input = draw_gs_llvm_fetch_input;2805gs_iface.base.emit_vertex = draw_gs_llvm_emit_vertex;2806gs_iface.base.end_primitive = draw_gs_llvm_end_primitive;2807gs_iface.base.gs_epilogue = draw_gs_llvm_epilogue;2808gs_iface.input = input_array;2809gs_iface.variant = variant;28102811/*2812* Function body2813*/28142815block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");2816builder = gallivm->builder;2817LLVMPositionBuilderAtEnd(builder, block);28182819lp_build_context_init(&bld, gallivm, lp_type_int(32));28202821memset(&gs_type, 0, sizeof gs_type);2822gs_type.floating = TRUE; /* floating point values */2823gs_type.sign = TRUE; /* values are signed */2824gs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */2825gs_type.width = 32; /* 32-bit float */2826gs_type.length = vector_length;28272828consts_ptr = draw_gs_jit_context_constants(variant->gallivm, context_ptr);2829num_consts_ptr =2830draw_gs_jit_context_num_constants(variant->gallivm, context_ptr);28312832ssbos_ptr = draw_gs_jit_context_ssbos(variant->gallivm, context_ptr);2833num_ssbos_ptr =2834draw_gs_jit_context_num_ssbos(variant->gallivm, context_ptr);28352836/* code generated texture sampling */2837sampler = draw_llvm_sampler_soa_create(variant->key.samplers, variant->key.nr_samplers);2838image = draw_llvm_image_soa_create(draw_gs_llvm_variant_key_images(&variant->key),2839variant->key.nr_images);2840mask_val = generate_mask_value(variant, gs_type);2841lp_build_mask_begin(&mask, gallivm, gs_type, mask_val);28422843if (gs_info->uses_primid) {2844system_values.prim_id = LLVMBuildLoad(builder, prim_id_ptr, "prim_id");2845}28462847if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {2848if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)2849tgsi_dump(tokens, 0);2850else2851nir_print_shader(llvm->draw->gs.geometry_shader->state.ir.nir, stderr);2852draw_gs_llvm_dump_variant_key(&variant->key);2853}28542855struct lp_build_tgsi_params params;2856memset(¶ms, 0, sizeof(params));28572858params.type = gs_type;2859params.mask = &mask;2860params.consts_ptr = consts_ptr;2861params.const_sizes_ptr = num_consts_ptr;2862params.system_values = &system_values;2863params.context_ptr = context_ptr;2864params.sampler = sampler;2865params.info = &llvm->draw->gs.geometry_shader->info;2866params.gs_iface = (const struct lp_build_gs_iface *)&gs_iface;2867params.ssbo_ptr = ssbos_ptr;2868params.ssbo_sizes_ptr = num_ssbos_ptr;2869params.image = image;2870params.gs_vertex_streams = variant->shader->base.num_vertex_streams;28712872if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)2873lp_build_tgsi_soa(variant->gallivm,2874tokens,2875¶ms,2876outputs);2877else2878lp_build_nir_soa(variant->gallivm,2879llvm->draw->gs.geometry_shader->state.ir.nir,2880¶ms,2881outputs);28822883sampler->destroy(sampler);2884image->destroy(image);28852886lp_build_mask_end(&mask);28872888LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));28892890gallivm_verify_function(gallivm, variant_func);2891}28922893struct draw_gs_llvm_variant *2894draw_gs_llvm_create_variant(struct draw_llvm *llvm,2895unsigned num_outputs,2896const struct draw_gs_llvm_variant_key *key)2897{2898struct draw_gs_llvm_variant *variant;2899struct llvm_geometry_shader *shader =2900llvm_geometry_shader(llvm->draw->gs.geometry_shader);2901LLVMTypeRef vertex_header;2902char module_name[64];2903unsigned char ir_sha1_cache_key[20];2904struct lp_cached_code cached = { 0 };2905bool needs_caching = false;29062907variant = MALLOC(sizeof *variant +2908shader->variant_key_size -2909sizeof variant->key);2910if (!variant)2911return NULL;29122913variant->llvm = llvm;2914variant->shader = shader;29152916snprintf(module_name, sizeof(module_name), "draw_llvm_gs_variant%u",2917variant->shader->variants_cached);29182919memcpy(&variant->key, key, shader->variant_key_size);29202921if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {2922draw_get_ir_cache_key(shader->base.state.ir.nir,2923key,2924shader->variant_key_size,2925num_outputs,2926ir_sha1_cache_key);29272928llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,2929&cached,2930ir_sha1_cache_key);2931if (!cached.data_size)2932needs_caching = true;2933}2934variant->gallivm = gallivm_create(module_name, llvm->context, &cached);29352936create_gs_jit_types(variant);29372938vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs);29392940variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);29412942draw_gs_llvm_generate(llvm, variant);29432944gallivm_compile_module(variant->gallivm);29452946variant->jit_func = (draw_gs_jit_func)2947gallivm_jit_function(variant->gallivm, variant->function);29482949if (needs_caching)2950llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,2951&cached,2952ir_sha1_cache_key);2953gallivm_free_ir(variant->gallivm);29542955variant->list_item_global.base = variant;2956variant->list_item_local.base = variant;2957/*variant->no = */shader->variants_created++;2958variant->list_item_global.base = variant;29592960return variant;2961}29622963void2964draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant)2965{2966struct draw_llvm *llvm = variant->llvm;29672968if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {2969debug_printf("Deleting GS variant: %u gs variants,\t%u total variants\n",2970variant->shader->variants_cached, llvm->nr_gs_variants);2971}29722973gallivm_destroy(variant->gallivm);29742975remove_from_list(&variant->list_item_local);2976variant->shader->variants_cached--;2977remove_from_list(&variant->list_item_global);2978llvm->nr_gs_variants--;2979FREE(variant);2980}29812982struct draw_gs_llvm_variant_key *2983draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)2984{2985unsigned i;2986struct draw_gs_llvm_variant_key *key;2987struct draw_sampler_static_state *draw_sampler;2988struct draw_image_static_state *draw_image;29892990key = (struct draw_gs_llvm_variant_key *)store;29912992memset(key, 0, offsetof(struct draw_gs_llvm_variant_key, samplers[0]));29932994key->num_outputs = draw_total_gs_outputs(llvm->draw);29952996/* All variants of this shader will have the same value for2997* nr_samplers. Not yet trying to compact away holes in the2998* sampler array.2999*/3000key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;3001if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {3002key->nr_sampler_views =3003llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;3004}3005else {3006key->nr_sampler_views = key->nr_samplers;3007}30083009key->nr_images = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_IMAGE] + 1;30103011draw_sampler = key->samplers;30123013memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);30143015for (i = 0 ; i < key->nr_samplers; i++) {3016lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,3017llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]);3018}3019for (i = 0 ; i < key->nr_sampler_views; i++) {3020lp_sampler_static_texture_state(&draw_sampler[i].texture_state,3021llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]);3022}30233024draw_image = draw_gs_llvm_variant_key_images(key);3025memset(draw_image, 0,3026key->nr_images * sizeof *draw_image);3027for (i = 0; i < key->nr_images; i++) {3028lp_sampler_static_texture_state_image(&draw_image[i].image_state,3029llvm->draw->images[PIPE_SHADER_GEOMETRY][i]);3030}3031return key;3032}30333034void3035draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key)3036{3037unsigned i;3038struct draw_sampler_static_state *sampler = key->samplers;3039struct draw_image_static_state *image = draw_gs_llvm_variant_key_images(key);3040for (i = 0 ; i < key->nr_sampler_views; i++) {3041debug_printf("sampler[%i].src_format = %s\n", i,3042util_format_name(sampler[i].texture_state.format));3043}30443045for (i = 0 ; i < key->nr_images; i++)3046debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));30473048}30493050static void3051create_tcs_jit_types(struct draw_tcs_llvm_variant *var)3052{3053struct gallivm_state *gallivm = var->gallivm;3054LLVMTypeRef texture_type, sampler_type, image_type, context_type;30553056texture_type = create_jit_texture_type(gallivm, "texture");3057sampler_type = create_jit_sampler_type(gallivm, "sampler");3058image_type = create_jit_image_type(gallivm, "image");30593060context_type = create_tcs_jit_context_type(gallivm,30610,3062texture_type, sampler_type,3063image_type,3064"draw_tcs_jit_context");3065var->input_array_type = create_tcs_jit_input_type(gallivm);3066var->output_array_type = create_tcs_jit_output_type(gallivm);3067var->context_ptr_type = LLVMPointerType(context_type, 0);3068}30693070static LLVMTypeRef3071get_tcs_context_ptr_type(struct draw_tcs_llvm_variant *variant)3072{3073if (!variant->context_ptr_type)3074create_tcs_jit_types(variant);3075return variant->context_ptr_type;3076}30773078static LLVMValueRef3079draw_tcs_llvm_emit_fetch_input(const struct lp_build_tcs_iface *tes_iface,3080struct lp_build_context *bld,3081boolean is_vindex_indirect,3082LLVMValueRef vertex_index,3083boolean is_aindex_indirect,3084LLVMValueRef attrib_index,3085boolean is_sindex_indirect,3086LLVMValueRef swizzle_index)3087{3088const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);3089struct gallivm_state *gallivm = bld->gallivm;3090LLVMBuilderRef builder = gallivm->builder;3091LLVMValueRef indices[3];3092LLVMValueRef res;3093struct lp_type type = bld->type;30943095if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {3096int i;30973098res = bld->zero;3099for (i = 0; i < type.length; ++i) {3100LLVMValueRef idx = lp_build_const_int32(gallivm, i);3101LLVMValueRef vert_chan_index = vertex_index;3102LLVMValueRef attr_chan_index = attrib_index;3103LLVMValueRef swiz_chan_index = swizzle_index;3104LLVMValueRef channel_vec;31053106if (is_vindex_indirect) {3107vert_chan_index = LLVMBuildExtractElement(builder,3108vertex_index, idx, "");3109}3110if (is_aindex_indirect) {3111attr_chan_index = LLVMBuildExtractElement(builder,3112attrib_index, idx, "");3113}3114if (is_sindex_indirect) {3115swiz_chan_index = LLVMBuildExtractElement(builder,3116swizzle_index, idx, "");3117}31183119indices[0] = vert_chan_index;3120indices[1] = attr_chan_index;3121indices[2] = swiz_chan_index;31223123channel_vec = LLVMBuildGEP(builder, tcs->input, indices, 3, "");3124channel_vec = LLVMBuildLoad(builder, channel_vec, "");31253126res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");3127}3128} else {3129indices[0] = vertex_index;3130indices[1] = attrib_index;3131indices[2] = swizzle_index;31323133res = LLVMBuildGEP(builder, tcs->input, indices, 3, "");3134res = LLVMBuildLoad(builder, res, "");3135res = lp_build_broadcast_scalar(bld, res);3136}3137return res;3138}31393140static LLVMValueRef3141draw_tcs_llvm_emit_fetch_output(const struct lp_build_tcs_iface *tes_iface,3142struct lp_build_context *bld,3143boolean is_vindex_indirect,3144LLVMValueRef vertex_index,3145boolean is_aindex_indirect,3146LLVMValueRef attrib_index,3147boolean is_sindex_indirect,3148LLVMValueRef swizzle_index,3149uint32_t name)3150{3151const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);3152struct gallivm_state *gallivm = bld->gallivm;3153LLVMBuilderRef builder = gallivm->builder;3154LLVMValueRef indices[3];3155LLVMValueRef res;3156struct lp_type type = bld->type;31573158if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {3159int i;31603161res = bld->zero;3162for (i = 0; i < type.length; ++i) {3163LLVMValueRef idx = lp_build_const_int32(gallivm, i);3164LLVMValueRef vert_chan_index = vertex_index;3165LLVMValueRef attr_chan_index = attrib_index;3166LLVMValueRef swiz_chan_index = swizzle_index;3167LLVMValueRef channel_vec;31683169if (is_vindex_indirect) {3170vert_chan_index = LLVMBuildExtractElement(builder,3171vertex_index, idx, "");3172}3173if (is_aindex_indirect) {3174attr_chan_index = LLVMBuildExtractElement(builder,3175attrib_index, idx, "");3176}3177if (is_sindex_indirect) {3178swiz_chan_index = LLVMBuildExtractElement(builder,3179swizzle_index, idx, "");3180}31813182indices[0] = vert_chan_index;3183indices[1] = attr_chan_index;3184indices[2] = swiz_chan_index;31853186channel_vec = LLVMBuildGEP(builder, tcs->output, indices, 3, "");3187channel_vec = LLVMBuildLoad(builder, channel_vec, "");31883189res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");3190}3191} else {3192indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);3193indices[1] = attrib_index;3194indices[2] = swizzle_index;31953196res = LLVMBuildGEP(builder, tcs->output, indices, 3, "");3197res = LLVMBuildLoad(builder, res, "");3198res = lp_build_broadcast_scalar(bld, res);3199}3200return res;3201}32023203static void3204draw_tcs_llvm_emit_store_output(const struct lp_build_tcs_iface *tes_iface,3205struct lp_build_context *bld,3206unsigned name,3207boolean is_vindex_indirect,3208LLVMValueRef vertex_index,3209boolean is_aindex_indirect,3210LLVMValueRef attrib_index,3211boolean is_sindex_indirect,3212LLVMValueRef swizzle_index,3213LLVMValueRef value,3214LLVMValueRef mask_vec)3215{3216const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);3217struct gallivm_state *gallivm = bld->gallivm;3218LLVMBuilderRef builder = gallivm->builder;3219LLVMValueRef indices[3];3220LLVMValueRef res;3221struct lp_type type = bld->type;32223223if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {3224int i;32253226for (i = 0; i < type.length; ++i) {3227LLVMValueRef idx = lp_build_const_int32(gallivm, i);3228LLVMValueRef vert_chan_index = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);3229LLVMValueRef attr_chan_index = attrib_index;3230LLVMValueRef swiz_chan_index = swizzle_index;3231LLVMValueRef channel_vec;32323233if (is_vindex_indirect) {3234vert_chan_index = LLVMBuildExtractElement(builder,3235vertex_index, idx, "");3236}3237if (is_aindex_indirect) {3238attr_chan_index = LLVMBuildExtractElement(builder,3239attrib_index, idx, "");3240}32413242if (is_sindex_indirect) {3243swiz_chan_index = LLVMBuildExtractElement(builder,3244swizzle_index, idx, "");3245}32463247indices[0] = vert_chan_index;3248indices[1] = attr_chan_index;3249indices[2] = swiz_chan_index;32503251channel_vec = LLVMBuildGEP(builder, tcs->output, indices, 3, "");32523253res = LLVMBuildExtractElement(builder, value, idx, "");32543255struct lp_build_if_state ifthen;3256LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");3257cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");3258lp_build_if(&ifthen, gallivm, cond);3259LLVMBuildStore(builder, res, channel_vec);3260lp_build_endif(&ifthen);3261}3262} else {3263indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);3264indices[1] = attrib_index;3265indices[2] = swizzle_index;32663267res = LLVMBuildGEP(builder, tcs->output, indices, 3, "");3268for (unsigned i = 0; i < type.length; ++i) {3269LLVMValueRef idx = lp_build_const_int32(gallivm, i);3270LLVMValueRef val = LLVMBuildExtractElement(builder, value, idx, "");32713272struct lp_build_if_state ifthen;3273LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");3274cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");3275lp_build_if(&ifthen, gallivm, cond);3276LLVMBuildStore(builder, val, res);3277lp_build_endif(&ifthen);3278}3279}3280}328132823283static LLVMValueRef3284generate_tcs_mask_value(struct draw_tcs_llvm_variant *variant,3285struct lp_type tcs_type, LLVMValueRef limit, LLVMValueRef loop_counter)3286{3287struct gallivm_state *gallivm = variant->gallivm;3288LLVMBuilderRef builder = gallivm->builder;3289struct lp_type mask_type = lp_int_type(tcs_type);3290LLVMValueRef num_vecs;3291LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);3292unsigned i;32933294num_vecs = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);3295for (i = 0; i < tcs_type.length; i++) {3296LLVMValueRef idx = lp_build_const_int32(gallivm, i);3297mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");3298}3299mask_val = lp_build_compare(gallivm, mask_type,3300PIPE_FUNC_GREATER, num_vecs, mask_val);33013302return mask_val;3303}33043305static void3306draw_tcs_llvm_generate(struct draw_llvm *llvm,3307struct draw_tcs_llvm_variant *variant)3308{3309struct gallivm_state *gallivm = variant->gallivm;3310LLVMContextRef context = gallivm->context;3311LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);3312LLVMTypeRef arg_types[7];3313LLVMTypeRef func_type, coro_func_type;3314LLVMValueRef variant_func, variant_coro;3315LLVMValueRef context_ptr;3316LLVMValueRef view_index;3317LLVMValueRef input_array, output_array, prim_id, patch_vertices_in;3318LLVMValueRef mask_val;3319LLVMBasicBlockRef block;3320LLVMBuilderRef builder;3321struct lp_build_context bld, bldvec;3322struct lp_build_sampler_soa *sampler = 0;3323struct lp_build_image_soa *image = NULL;3324struct lp_bld_tgsi_system_values system_values;3325char func_name[64], func_name_coro[64];3326unsigned i;3327struct draw_tcs_llvm_iface tcs_iface;3328struct lp_build_mask_context mask;3329LLVMValueRef consts_ptr, num_consts_ptr;3330LLVMValueRef ssbos_ptr, num_ssbos_ptr;3331struct lp_type tcs_type;3332unsigned vector_length = variant->shader->base.vector_length;33333334memset(&system_values, 0, sizeof(system_values));33353336snprintf(func_name, sizeof(func_name), "draw_llvm_tcs_variant");33373338snprintf(func_name_coro, sizeof(func_name_coro), "draw_llvm_tcs_coro_variant");33393340arg_types[0] = get_tcs_context_ptr_type(variant); /* context */3341arg_types[1] = variant->input_array_type; /* input */3342arg_types[2] = variant->output_array_type;3343arg_types[3] = int32_type;3344arg_types[4] = int32_type;3345arg_types[5] = int32_type;3346arg_types[6] = int32_type; /* coroutine only */33473348func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types) - 1, 0);33493350coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), arg_types, ARRAY_SIZE(arg_types), 0);33513352variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);33533354variant_coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);33553356variant->function = variant_func;3357LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);33583359LLVMSetFunctionCallConv(variant_coro, LLVMCCallConv);33603361for (i = 0; i < ARRAY_SIZE(arg_types); ++i) {3362if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {3363lp_add_function_attr(variant_coro, i + 1, LP_FUNC_ATTR_NOALIAS);3364lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);3365}3366}33673368if (gallivm->cache && gallivm->cache->data_size)3369return;3370context_ptr = LLVMGetParam(variant_func, 0);3371input_array = LLVMGetParam(variant_func, 1);3372output_array = LLVMGetParam(variant_func, 2);3373prim_id = LLVMGetParam(variant_func, 3);3374patch_vertices_in = LLVMGetParam(variant_func, 4);3375view_index = LLVMGetParam(variant_func, 5);33763377lp_build_name(context_ptr, "context");3378lp_build_name(input_array, "input");3379lp_build_name(output_array, "output");3380lp_build_name(prim_id, "prim_id");3381lp_build_name(patch_vertices_in, "patch_vertices_in");3382lp_build_name(view_index, "view_index");33833384block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");3385builder = gallivm->builder;3386LLVMPositionBuilderAtEnd(builder, block);33873388lp_build_context_init(&bld, gallivm, lp_type_int(32));33893390memset(&tcs_type, 0, sizeof tcs_type);3391tcs_type.floating = TRUE; /* floating point values */3392tcs_type.sign = TRUE; /* values are signed */3393tcs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */3394tcs_type.width = 32; /* 32-bit float */3395tcs_type.length = vector_length;33963397lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tcs_type));33983399LLVMValueRef count = lp_build_const_int32(gallivm, variant->shader->base.vertices_out);3400LLVMValueRef step = lp_build_const_int32(gallivm, vector_length);34013402struct lp_build_loop_state loop_state[2];3403LLVMValueRef num_inner_loop;3404unsigned count_align = util_align_npot(variant->shader->base.vertices_out, tcs_type.length);3405num_inner_loop = lp_build_const_int32(gallivm, count_align / tcs_type.length);3406LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);3407LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_inner_loop, "coro_hdls");3408unsigned end_coroutine = INT_MAX;3409lp_build_loop_begin(&loop_state[1], gallivm,3410lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */3411lp_build_loop_begin(&loop_state[0], gallivm,3412lp_build_const_int32(gallivm, 0)); /* inner loop */3413{3414LLVMValueRef args[7];3415args[0] = context_ptr;3416args[1] = input_array;3417args[2] = output_array;3418args[3] = prim_id;3419args[4] = patch_vertices_in;3420args[5] = view_index;3421args[6] = loop_state[0].counter;3422LLVMValueRef coro_entry = LLVMBuildGEP(builder, coro_hdls, &loop_state[0].counter, 1, "");3423LLVMValueRef coro_hdl = LLVMBuildLoad(builder, coro_entry, "coro_hdl");34243425struct lp_build_if_state ifstate;3426LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntEQ, loop_state[1].counter,3427lp_build_const_int32(gallivm, 0), "");3428/* first time here - call the coroutine function entry point */3429lp_build_if(&ifstate, gallivm, cmp);3430LLVMValueRef coro_ret = LLVMBuildCall(builder, variant_coro, args, 7, "");3431LLVMBuildStore(builder, coro_ret, coro_entry);3432lp_build_else(&ifstate);3433/* subsequent calls for this invocation - check if done. */3434LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);3435struct lp_build_if_state ifstate2;3436lp_build_if(&ifstate2, gallivm, coro_done);3437/* if done destroy and force loop exit */3438lp_build_coro_destroy(gallivm, coro_hdl);3439lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1));3440lp_build_else(&ifstate2);3441/* otherwise resume the coroutine */3442lp_build_coro_resume(gallivm, coro_hdl);3443lp_build_endif(&ifstate2);3444lp_build_endif(&ifstate);3445lp_build_loop_force_reload_counter(&loop_state[1]);3446}3447lp_build_loop_end_cond(&loop_state[0],3448num_inner_loop,3449NULL, LLVMIntUGE);3450lp_build_loop_end_cond(&loop_state[1],3451lp_build_const_int32(gallivm, end_coroutine),3452NULL, LLVMIntEQ);3453LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));34543455block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "entry");3456LLVMPositionBuilderAtEnd(builder, block);34573458context_ptr = LLVMGetParam(variant_coro, 0);3459input_array = LLVMGetParam(variant_coro, 1);3460output_array = LLVMGetParam(variant_coro, 2);3461prim_id = LLVMGetParam(variant_coro, 3);3462patch_vertices_in = LLVMGetParam(variant_coro, 4);3463view_index = LLVMGetParam(variant_coro, 5);34643465consts_ptr = draw_tcs_jit_context_constants(variant->gallivm, context_ptr);3466num_consts_ptr =3467draw_tcs_jit_context_num_constants(variant->gallivm, context_ptr);34683469ssbos_ptr = draw_tcs_jit_context_ssbos(variant->gallivm, context_ptr);3470num_ssbos_ptr =3471draw_tcs_jit_context_num_ssbos(variant->gallivm, context_ptr);3472sampler = draw_llvm_sampler_soa_create(variant->key.samplers, variant->key.nr_samplers);3473image = draw_llvm_image_soa_create(draw_tcs_llvm_variant_key_images(&variant->key),3474variant->key.nr_images);34753476LLVMValueRef counter = LLVMGetParam(variant_coro, 6);3477LLVMValueRef invocvec = LLVMGetUndef(LLVMVectorType(int32_type, vector_length));3478for (i = 0; i < vector_length; i++) {3479LLVMValueRef idx = LLVMBuildAdd(builder, LLVMBuildMul(builder, counter, step, ""), lp_build_const_int32(gallivm, i), "");3480invocvec = LLVMBuildInsertElement(builder, invocvec, idx, idx, "");3481}34823483system_values.invocation_id = invocvec;3484system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);3485system_values.view_index = view_index;3486system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);3487tcs_iface.input = input_array;3488tcs_iface.output = output_array;3489tcs_iface.base.emit_fetch_input = draw_tcs_llvm_emit_fetch_input;3490tcs_iface.base.emit_fetch_output = draw_tcs_llvm_emit_fetch_output;3491tcs_iface.base.emit_store_output = draw_tcs_llvm_emit_store_output;349234933494{3495LLVMValueRef coro_id = lp_build_coro_id(gallivm);3496LLVMValueRef coro_hdl = lp_build_coro_begin_alloc_mem(gallivm, coro_id);34973498mask_val = generate_tcs_mask_value(variant, tcs_type, count, LLVMBuildMul(builder, counter, step, ""));3499lp_build_mask_begin(&mask, gallivm, tcs_type, mask_val);35003501struct lp_build_coro_suspend_info coro_info;35023503LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "suspend");3504LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "cleanup");35053506coro_info.suspend = sus_block;3507coro_info.cleanup = clean_block;35083509struct lp_build_tgsi_params params;3510memset(¶ms, 0, sizeof(params));35113512params.type = tcs_type;3513params.mask = &mask;3514params.consts_ptr = consts_ptr;3515params.const_sizes_ptr = num_consts_ptr;3516params.system_values = &system_values;3517params.context_ptr = context_ptr;3518params.sampler = sampler;3519params.info = &llvm->draw->tcs.tess_ctrl_shader->info;3520params.ssbo_ptr = ssbos_ptr;3521params.ssbo_sizes_ptr = num_ssbos_ptr;3522params.image = image;3523params.coro = &coro_info;3524params.tcs_iface = &tcs_iface.base;35253526lp_build_nir_soa(variant->gallivm,3527llvm->draw->tcs.tess_ctrl_shader->state.ir.nir,3528¶ms, NULL);35293530lp_build_mask_end(&mask);35313532lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);3533LLVMPositionBuilderAtEnd(builder, clean_block);35343535lp_build_coro_free_mem(gallivm, coro_id, coro_hdl);35363537LLVMBuildBr(builder, sus_block);3538LLVMPositionBuilderAtEnd(builder, sus_block);35393540lp_build_coro_end(gallivm, coro_hdl);3541LLVMBuildRet(builder, coro_hdl);3542}35433544sampler->destroy(sampler);3545image->destroy(image);3546gallivm_verify_function(gallivm, variant_func);3547gallivm_verify_function(gallivm, variant_coro);3548}35493550struct draw_tcs_llvm_variant *3551draw_tcs_llvm_create_variant(struct draw_llvm *llvm,3552unsigned num_outputs,3553const struct draw_tcs_llvm_variant_key *key)3554{3555struct draw_tcs_llvm_variant *variant;3556struct llvm_tess_ctrl_shader *shader = llvm_tess_ctrl_shader(llvm->draw->tcs.tess_ctrl_shader);3557char module_name[64];3558unsigned char ir_sha1_cache_key[20];3559struct lp_cached_code cached = { 0 };3560bool needs_caching = false;35613562variant = MALLOC(sizeof *variant +3563shader->variant_key_size - sizeof variant->key);3564if (!variant)3565return NULL;35663567variant->llvm = llvm;3568variant->shader = shader;35693570snprintf(module_name, sizeof(module_name), "draw_llvm_tcs_variant%u",3571variant->shader->variants_cached);35723573memcpy(&variant->key, key, shader->variant_key_size);35743575if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {3576draw_get_ir_cache_key(shader->base.state.ir.nir,3577key,3578shader->variant_key_size,3579num_outputs,3580ir_sha1_cache_key);35813582llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,3583&cached,3584ir_sha1_cache_key);3585if (!cached.data_size)3586needs_caching = true;3587}35883589variant->gallivm = gallivm_create(module_name, llvm->context, &cached);35903591create_tcs_jit_types(variant);35923593if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {3594nir_print_shader(llvm->draw->tcs.tess_ctrl_shader->state.ir.nir, stderr);3595draw_tcs_llvm_dump_variant_key(&variant->key);3596}35973598lp_build_coro_declare_malloc_hooks(variant->gallivm);3599draw_tcs_llvm_generate(llvm, variant);36003601gallivm_compile_module(variant->gallivm);36023603lp_build_coro_add_malloc_hooks(variant->gallivm);3604variant->jit_func = (draw_tcs_jit_func)3605gallivm_jit_function(variant->gallivm, variant->function);36063607if (needs_caching)3608llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,3609&cached,3610ir_sha1_cache_key);3611gallivm_free_ir(variant->gallivm);36123613variant->list_item_global.base = variant;3614variant->list_item_local.base = variant;3615/*variant->no = */shader->variants_created++;3616variant->list_item_global.base = variant;36173618return variant;3619}36203621void3622draw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant *variant)3623{3624struct draw_llvm *llvm = variant->llvm;36253626if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {3627debug_printf("Deleting TCS variant: %u tcs variants,\t%u total variants\n",3628variant->shader->variants_cached, llvm->nr_tcs_variants);3629}36303631gallivm_destroy(variant->gallivm);36323633remove_from_list(&variant->list_item_local);3634variant->shader->variants_cached--;3635remove_from_list(&variant->list_item_global);3636llvm->nr_tcs_variants--;3637FREE(variant);3638}36393640struct draw_tcs_llvm_variant_key *3641draw_tcs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)3642{3643unsigned i;3644struct draw_tcs_llvm_variant_key *key;3645struct draw_sampler_static_state *draw_sampler;3646struct draw_image_static_state *draw_image;36473648key = (struct draw_tcs_llvm_variant_key *)store;36493650memset(key, 0, offsetof(struct draw_tcs_llvm_variant_key, samplers[0]));36513652/* All variants of this shader will have the same value for3653* nr_samplers. Not yet trying to compact away holes in the3654* sampler array.3655*/3656key->nr_samplers = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;3657if (llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {3658key->nr_sampler_views =3659llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;3660}3661else {3662key->nr_sampler_views = key->nr_samplers;3663}36643665key->nr_images = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_IMAGE] + 1;36663667draw_sampler = key->samplers;36683669memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);36703671for (i = 0 ; i < key->nr_samplers; i++) {3672lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,3673llvm->draw->samplers[PIPE_SHADER_TESS_CTRL][i]);3674}3675for (i = 0 ; i < key->nr_sampler_views; i++) {3676lp_sampler_static_texture_state(&draw_sampler[i].texture_state,3677llvm->draw->sampler_views[PIPE_SHADER_TESS_CTRL][i]);3678}36793680draw_image = draw_tcs_llvm_variant_key_images(key);3681memset(draw_image, 0,3682key->nr_images * sizeof *draw_image);3683for (i = 0; i < key->nr_images; i++) {3684lp_sampler_static_texture_state_image(&draw_image[i].image_state,3685llvm->draw->images[PIPE_SHADER_TESS_CTRL][i]);3686}3687return key;3688}36893690void3691draw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key *key)3692{3693unsigned i;3694struct draw_sampler_static_state *sampler = key->samplers;3695struct draw_image_static_state *image = draw_tcs_llvm_variant_key_images(key);3696for (i = 0 ; i < key->nr_sampler_views; i++) {3697debug_printf("sampler[%i].src_format = %s\n", i,3698util_format_name(sampler[i].texture_state.format));3699}37003701for (i = 0 ; i < key->nr_images; i++)3702debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));37033704}37053706static void3707create_tes_jit_types(struct draw_tes_llvm_variant *var)3708{3709struct gallivm_state *gallivm = var->gallivm;3710LLVMTypeRef texture_type, sampler_type, image_type, context_type;37113712texture_type = create_jit_texture_type(gallivm, "texture");3713sampler_type = create_jit_sampler_type(gallivm, "sampler");3714image_type = create_jit_image_type(gallivm, "image");37153716context_type = create_tes_jit_context_type(gallivm,37170,3718texture_type, sampler_type,3719image_type,3720"draw_tes_jit_context");3721var->context_ptr_type = LLVMPointerType(context_type, 0);37223723var->input_array_type = create_tes_jit_input_type(gallivm);3724}37253726static LLVMTypeRef3727get_tes_context_ptr_type(struct draw_tes_llvm_variant *variant)3728{3729if (!variant->context_ptr_type)3730create_tes_jit_types(variant);3731return variant->context_ptr_type;3732}37333734static LLVMValueRef3735generate_tes_mask_value(struct draw_tes_llvm_variant *variant,3736struct lp_type tes_type, LLVMValueRef limit, LLVMValueRef loop_counter)3737{3738struct gallivm_state *gallivm = variant->gallivm;3739LLVMBuilderRef builder = gallivm->builder;3740struct lp_type mask_type = lp_int_type(tes_type);3741LLVMValueRef num_prims;3742LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);3743unsigned i;37443745num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);3746for (i = 0; i < tes_type.length; i++) {3747LLVMValueRef idx = lp_build_const_int32(gallivm, i);3748mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");3749}3750mask_val = lp_build_compare(gallivm, mask_type,3751PIPE_FUNC_GREATER, num_prims, mask_val);37523753return mask_val;3754}37553756static LLVMValueRef3757draw_tes_llvm_fetch_vertex_input(const struct lp_build_tes_iface *tes_iface,3758struct lp_build_context *bld,3759boolean is_vindex_indirect,3760LLVMValueRef vertex_index,3761boolean is_aindex_indirect,3762LLVMValueRef attrib_index,3763boolean is_sindex_indirect,3764LLVMValueRef swizzle_index)3765{3766const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);3767struct gallivm_state *gallivm = bld->gallivm;3768LLVMBuilderRef builder = gallivm->builder;3769LLVMValueRef indices[3];3770LLVMValueRef res;3771struct lp_type type = bld->type;37723773if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {3774int i;37753776res = bld->zero;37773778for (i = 0; i < type.length; ++i) {3779LLVMValueRef idx = lp_build_const_int32(gallivm, i);3780LLVMValueRef vert_chan_index = vertex_index;3781LLVMValueRef attr_chan_index = attrib_index;3782LLVMValueRef swiz_chan_index = swizzle_index;3783LLVMValueRef channel_vec;37843785if (is_vindex_indirect) {3786vert_chan_index = LLVMBuildExtractElement(builder,3787vertex_index, idx, "");3788}3789if (is_aindex_indirect) {3790attr_chan_index = LLVMBuildExtractElement(builder,3791attrib_index, idx, "");3792}3793if (is_sindex_indirect) {3794swiz_chan_index = LLVMBuildExtractElement(builder,3795swizzle_index, idx, "");3796}37973798indices[0] = vert_chan_index;3799indices[1] = attr_chan_index;3800indices[2] = swiz_chan_index;38013802channel_vec = LLVMBuildGEP(builder, tes->input, indices, 3, "");3803channel_vec = LLVMBuildLoad(builder, channel_vec, "");38043805res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");3806}3807} else {3808indices[0] = vertex_index;3809indices[1] = attrib_index;3810indices[2] = swizzle_index;38113812res = LLVMBuildGEP(builder, tes->input, indices, 3, "");3813res = LLVMBuildLoad(builder, res, "");3814res = lp_build_broadcast_scalar(bld, res);3815}3816return res;3817}38183819static LLVMValueRef3820draw_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,3821struct lp_build_context *bld,3822boolean is_aindex_indirect,3823LLVMValueRef attrib_index,3824LLVMValueRef swizzle_index)3825{3826const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);3827struct gallivm_state *gallivm = bld->gallivm;3828LLVMBuilderRef builder = gallivm->builder;3829LLVMValueRef indices[3];3830LLVMValueRef res;3831struct lp_type type = bld->type;38323833if (is_aindex_indirect) {3834int i;38353836res = bld->zero;38373838for (i = 0; i < type.length; ++i) {3839LLVMValueRef idx = lp_build_const_int32(gallivm, i);3840LLVMValueRef attr_chan_index = attrib_index;3841LLVMValueRef channel_vec;38423843if (is_aindex_indirect) {3844attr_chan_index = LLVMBuildExtractElement(builder,3845attrib_index, idx, "");3846}38473848indices[0] = lp_build_const_int32(gallivm, 0);3849indices[1] = attr_chan_index;3850indices[2] = swizzle_index;38513852channel_vec = LLVMBuildGEP(builder, tes->input, indices, 3, "");3853channel_vec = LLVMBuildLoad(builder, channel_vec, "");38543855res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");3856}3857} else {3858indices[0] = lp_build_const_int32(gallivm, 0);3859indices[1] = attrib_index;3860indices[2] = swizzle_index;38613862res = LLVMBuildGEP(builder, tes->input, indices, 3, "");3863res = LLVMBuildLoad(builder, res, "");3864res = lp_build_broadcast_scalar(bld, res);3865}3866return res;3867}38683869static void3870draw_tes_llvm_generate(struct draw_llvm *llvm,3871struct draw_tes_llvm_variant *variant)3872{3873struct gallivm_state *gallivm = variant->gallivm;3874LLVMContextRef context = gallivm->context;3875LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);3876LLVMTypeRef flt_type = LLVMFloatTypeInContext(context);3877LLVMTypeRef arg_types[11];3878LLVMTypeRef func_type;3879LLVMValueRef variant_func;3880LLVMValueRef context_ptr;3881LLVMValueRef tess_coord[2], io_ptr, input_array, num_tess_coord;3882LLVMValueRef view_index;3883LLVMValueRef tess_inner, tess_outer, prim_id, patch_vertices_in;3884LLVMBasicBlockRef block;3885LLVMBuilderRef builder;3886LLVMValueRef mask_val;3887struct lp_build_context bld, bldvec;3888struct lp_build_sampler_soa *sampler = 0;3889struct lp_build_image_soa *image = NULL;3890struct lp_bld_tgsi_system_values system_values;3891char func_name[64];3892unsigned i;3893struct draw_tes_llvm_iface tes_iface;3894LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];3895struct lp_build_mask_context mask;3896LLVMValueRef consts_ptr, num_consts_ptr;3897LLVMValueRef ssbos_ptr, num_ssbos_ptr;3898LLVMValueRef step;3899struct lp_type tes_type;3900unsigned vector_length = variant->shader->base.vector_length;39013902memset(&system_values, 0, sizeof(system_values));3903memset(&outputs, 0, sizeof(outputs));39043905snprintf(func_name, sizeof(func_name), "draw_llvm_tes_variant");39063907arg_types[0] = get_tes_context_ptr_type(variant); /* context */3908arg_types[1] = variant->input_array_type; /* input */3909arg_types[2] = variant->vertex_header_ptr_type;3910arg_types[3] = int32_type;3911arg_types[4] = int32_type;3912arg_types[5] = LLVMPointerType(flt_type, 0);3913arg_types[6] = LLVMPointerType(flt_type, 0);3914arg_types[7] = LLVMPointerType(LLVMArrayType(flt_type, 4), 0);3915arg_types[8] = LLVMPointerType(LLVMArrayType(flt_type, 2), 0);3916arg_types[9] = int32_type;3917arg_types[10] = int32_type;39183919func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);3920variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);39213922variant->function = variant_func;3923LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);39243925for (i = 0; i < ARRAY_SIZE(arg_types); ++i)3926if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)3927lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);39283929if (gallivm->cache && gallivm->cache->data_size)3930return;3931context_ptr = LLVMGetParam(variant_func, 0);3932input_array = LLVMGetParam(variant_func, 1);3933io_ptr = LLVMGetParam(variant_func, 2);3934prim_id = LLVMGetParam(variant_func, 3);3935num_tess_coord = LLVMGetParam(variant_func, 4);3936tess_coord[0] = LLVMGetParam(variant_func, 5);3937tess_coord[1] = LLVMGetParam(variant_func, 6);3938tess_outer = LLVMGetParam(variant_func, 7);3939tess_inner = LLVMGetParam(variant_func, 8);3940patch_vertices_in = LLVMGetParam(variant_func, 9);3941view_index = LLVMGetParam(variant_func, 10);39423943lp_build_name(context_ptr, "context");3944lp_build_name(input_array, "input");3945lp_build_name(io_ptr, "io");3946lp_build_name(prim_id, "prim_id");3947lp_build_name(num_tess_coord, "num_tess_coord");3948lp_build_name(tess_coord[0], "tess_coord[0]");3949lp_build_name(tess_coord[1], "tess_coord[1]");3950lp_build_name(tess_outer, "tess_outer");3951lp_build_name(tess_inner, "tess_inner");3952lp_build_name(patch_vertices_in, "patch_vertices_in");3953lp_build_name(view_index, "view_index");39543955tes_iface.base.fetch_vertex_input = draw_tes_llvm_fetch_vertex_input;3956tes_iface.base.fetch_patch_input = draw_tes_llvm_fetch_patch_input;3957tes_iface.input = input_array;3958tes_iface.variant = variant;39593960block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");3961builder = gallivm->builder;3962LLVMPositionBuilderAtEnd(builder, block);39633964lp_build_context_init(&bld, gallivm, lp_type_int(32));39653966memset(&tes_type, 0, sizeof tes_type);3967tes_type.floating = TRUE; /* floating point values */3968tes_type.sign = TRUE; /* values are signed */3969tes_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */3970tes_type.width = 32; /* 32-bit float */3971tes_type.length = vector_length;39723973lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tes_type));3974consts_ptr = draw_tes_jit_context_constants(variant->gallivm, context_ptr);3975num_consts_ptr =3976draw_tes_jit_context_num_constants(variant->gallivm, context_ptr);39773978ssbos_ptr = draw_tes_jit_context_ssbos(variant->gallivm, context_ptr);3979num_ssbos_ptr =3980draw_tes_jit_context_num_ssbos(variant->gallivm, context_ptr);3981sampler = draw_llvm_sampler_soa_create(variant->key.samplers, variant->key.nr_samplers);3982image = draw_llvm_image_soa_create(draw_tes_llvm_variant_key_images(&variant->key),3983variant->key.nr_images);3984step = lp_build_const_int32(gallivm, vector_length);39853986system_values.tess_outer = LLVMBuildLoad(builder, tess_outer, "");3987system_values.tess_inner = LLVMBuildLoad(builder, tess_inner, "");39883989system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);39903991system_values.view_index = view_index;39923993system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);39943995if (variant->key.primid_needed) {3996int slot = variant->key.primid_output;3997for (unsigned i = 0; i < 4; i++) {3998outputs[slot][i] = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, tes_type), "primid");3999LLVMBuildStore(builder, system_values.prim_id, outputs[slot][i]);4000}4001}4002struct lp_build_loop_state lp_loop;4003lp_build_loop_begin(&lp_loop, gallivm, bld.zero);4004{4005LLVMValueRef io;40064007io = LLVMBuildGEP(builder, io_ptr, &lp_loop.counter, 1, "");4008mask_val = generate_tes_mask_value(variant, tes_type, num_tess_coord, lp_loop.counter);4009lp_build_mask_begin(&mask, gallivm, tes_type, mask_val);40104011system_values.tess_coord = LLVMGetUndef(LLVMArrayType(LLVMVectorType(flt_type, vector_length), 3));4012for (i = 0; i < 3; i++) {4013LLVMValueRef tess_coord_chan = LLVMGetUndef(LLVMVectorType(flt_type, vector_length));4014for (unsigned j = 0; j < vector_length; j++) {4015LLVMValueRef idx = LLVMBuildAdd(builder, lp_loop.counter, lp_build_const_int32(gallivm, j), "");4016LLVMValueRef tc_val;4017if (i == 2) {4018if (variant->shader->base.prim_mode == PIPE_PRIM_TRIANGLES) {4019tc_val = lp_build_const_float(gallivm, 1.0);4020tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get(builder, tess_coord[0], idx), "");4021tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get(builder, tess_coord[1], idx), "");4022} else4023tc_val = lp_build_const_float(gallivm, 0.0);4024} else4025tc_val = lp_build_pointer_get(builder, tess_coord[i], idx);40264027tess_coord_chan = LLVMBuildInsertElement(builder, tess_coord_chan, tc_val, lp_build_const_int32(gallivm, j), "");4028}4029system_values.tess_coord = LLVMBuildInsertValue(builder, system_values.tess_coord, tess_coord_chan, i, "");4030}40314032struct lp_build_tgsi_params params;4033memset(¶ms, 0, sizeof(params));40344035params.type = tes_type;4036params.mask = &mask;4037params.consts_ptr = consts_ptr;4038params.const_sizes_ptr = num_consts_ptr;4039params.system_values = &system_values;4040params.context_ptr = context_ptr;4041params.sampler = sampler;4042params.info = &llvm->draw->tes.tess_eval_shader->info;4043params.ssbo_ptr = ssbos_ptr;4044params.ssbo_sizes_ptr = num_ssbos_ptr;4045params.image = image;4046params.tes_iface = &tes_iface.base;40474048lp_build_nir_soa(variant->gallivm,4049llvm->draw->tes.tess_eval_shader->state.ir.nir,4050¶ms,4051outputs);40524053lp_build_mask_end(&mask);4054LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,4055lp_int_type(tes_type), 0);40564057convert_to_aos(gallivm, io, NULL, outputs, clipmask,4058draw_total_tes_outputs(llvm->draw), tes_type, FALSE);4059}4060lp_build_loop_end_cond(&lp_loop, num_tess_coord, step, LLVMIntUGE);4061sampler->destroy(sampler);4062image->destroy(image);40634064LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));4065gallivm_verify_function(gallivm, variant_func);4066}40674068struct draw_tes_llvm_variant *4069draw_tes_llvm_create_variant(struct draw_llvm *llvm,4070unsigned num_outputs,4071const struct draw_tes_llvm_variant_key *key)4072{4073struct draw_tes_llvm_variant *variant;4074struct llvm_tess_eval_shader *shader = llvm_tess_eval_shader(llvm->draw->tes.tess_eval_shader);4075LLVMTypeRef vertex_header;4076char module_name[64];4077unsigned char ir_sha1_cache_key[20];4078struct lp_cached_code cached = { 0 };4079bool needs_caching = false;40804081variant = MALLOC(sizeof *variant +4082shader->variant_key_size - sizeof variant->key);4083if (!variant)4084return NULL;40854086variant->llvm = llvm;4087variant->shader = shader;40884089snprintf(module_name, sizeof(module_name), "draw_llvm_tes_variant%u",4090variant->shader->variants_cached);40914092memcpy(&variant->key, key, shader->variant_key_size);4093if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {4094draw_get_ir_cache_key(shader->base.state.ir.nir,4095key,4096shader->variant_key_size,4097num_outputs,4098ir_sha1_cache_key);40994100llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,4101&cached,4102ir_sha1_cache_key);4103if (!cached.data_size)4104needs_caching = true;4105}4106variant->gallivm = gallivm_create(module_name, llvm->context, &cached);41074108create_tes_jit_types(variant);41094110vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs);41114112variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);41134114if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {4115nir_print_shader(llvm->draw->tes.tess_eval_shader->state.ir.nir, stderr);4116draw_tes_llvm_dump_variant_key(&variant->key);4117}41184119draw_tes_llvm_generate(llvm, variant);41204121gallivm_compile_module(variant->gallivm);41224123variant->jit_func = (draw_tes_jit_func)4124gallivm_jit_function(variant->gallivm, variant->function);41254126if (needs_caching)4127llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,4128&cached,4129ir_sha1_cache_key);4130gallivm_free_ir(variant->gallivm);41314132variant->list_item_global.base = variant;4133variant->list_item_local.base = variant;4134/*variant->no = */shader->variants_created++;4135variant->list_item_global.base = variant;41364137return variant;4138}41394140void4141draw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant *variant)4142{4143struct draw_llvm *llvm = variant->llvm;41444145if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {4146debug_printf("Deleting TES variant: %u tes variants,\t%u total variants\n",4147variant->shader->variants_cached, llvm->nr_tes_variants);4148}41494150gallivm_destroy(variant->gallivm);41514152remove_from_list(&variant->list_item_local);4153variant->shader->variants_cached--;4154remove_from_list(&variant->list_item_global);4155llvm->nr_tes_variants--;4156FREE(variant);4157}41584159struct draw_tes_llvm_variant_key *4160draw_tes_llvm_make_variant_key(struct draw_llvm *llvm, char *store)4161{4162unsigned i;4163struct draw_tes_llvm_variant_key *key;4164struct draw_sampler_static_state *draw_sampler;4165struct draw_image_static_state *draw_image;41664167key = (struct draw_tes_llvm_variant_key *)store;41684169memset(key, 0, offsetof(struct draw_tes_llvm_variant_key, samplers[0]));41704171int primid_output = draw_find_shader_output(llvm->draw, TGSI_SEMANTIC_PRIMID, 0);4172if (primid_output >= 0) {4173key->primid_output = primid_output;4174key->primid_needed = true;4175}41764177/* All variants of this shader will have the same value for4178* nr_samplers. Not yet trying to compact away holes in the4179* sampler array.4180*/4181key->nr_samplers = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;4182if (llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {4183key->nr_sampler_views =4184llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;4185}4186else {4187key->nr_sampler_views = key->nr_samplers;4188}41894190key->nr_images = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_IMAGE] + 1;41914192draw_sampler = key->samplers;41934194memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);41954196for (i = 0 ; i < key->nr_samplers; i++) {4197lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,4198llvm->draw->samplers[PIPE_SHADER_TESS_EVAL][i]);4199}4200for (i = 0 ; i < key->nr_sampler_views; i++) {4201lp_sampler_static_texture_state(&draw_sampler[i].texture_state,4202llvm->draw->sampler_views[PIPE_SHADER_TESS_EVAL][i]);4203}42044205draw_image = draw_tes_llvm_variant_key_images(key);4206memset(draw_image, 0,4207key->nr_images * sizeof *draw_image);4208for (i = 0; i < key->nr_images; i++) {4209lp_sampler_static_texture_state_image(&draw_image[i].image_state,4210llvm->draw->images[PIPE_SHADER_TESS_EVAL][i]);4211}4212return key;4213}42144215void4216draw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key *key)4217{4218unsigned i;4219struct draw_sampler_static_state *sampler = key->samplers;4220struct draw_image_static_state *image = draw_tes_llvm_variant_key_images(key);42214222if (key->primid_needed)4223debug_printf("prim id output %d\n", key->primid_output);4224for (i = 0 ; i < key->nr_sampler_views; i++) {4225debug_printf("sampler[%i].src_format = %s\n", i,4226util_format_name(sampler[i].texture_state.format));4227}42284229for (i = 0 ; i < key->nr_images; i++)4230debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));42314232}423342344235