Path: blob/21.2-virgl/src/gallium/drivers/llvmpipe/lp_bld_interp.c
4570 views
/**************************************************************************1*2* Copyright 2009 VMware, Inc.3* Copyright 2007-2008 VMware, Inc.4* All Rights Reserved.5*6* Permission is hereby granted, free of charge, to any person obtaining a7* copy of this software and associated documentation files (the8* "Software"), to deal in the Software without restriction, including9* without limitation the rights to use, copy, modify, merge, publish,10* distribute, sub license, and/or sell copies of the Software, and to11* permit persons to whom the Software is furnished to do so, subject to12* the following conditions:13*14* The above copyright notice and this permission notice (including the15* next paragraph) shall be included in all copies or substantial portions16* of the Software.17*18* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS19* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF20* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.21* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR22* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,23* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE24* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.25*26**************************************************************************/2728/**29* @file30* Position and shader input interpolation.31*32* @author Jose Fonseca <[email protected]>33*/3435#include "pipe/p_shader_tokens.h"36#include "util/compiler.h"37#include "util/u_debug.h"38#include "util/u_memory.h"39#include "util/u_math.h"40#include "tgsi/tgsi_scan.h"41#include "gallivm/lp_bld_debug.h"42#include "gallivm/lp_bld_const.h"43#include "gallivm/lp_bld_arit.h"44#include "gallivm/lp_bld_swizzle.h"45#include "gallivm/lp_bld_flow.h"46#include "gallivm/lp_bld_logic.h"47#include "gallivm/lp_bld_struct.h"48#include "gallivm/lp_bld_gather.h"49#include "lp_bld_interp.h"505152/*53* The shader JIT function operates on blocks of quads.54* Each block has 2x2 quads and each quad has 2x2 pixels.55*56* We iterate over the quads in order 0, 1, 2, 3:57*58* #################59* # | # | #60* #---0---#---1---#61* # | # | #62* #################63* # | # | #64* #---2---#---3---#65* # | # | #66* #################67*68* If we iterate over multiple quads at once, quads 01 and 23 are processed69* together.70*71* Within each quad, we have four pixels which are represented in SOA72* order:73*74* #########75* # 0 | 1 #76* #---+---#77* # 2 | 3 #78* #########79*80* So the green channel (for example) of the four pixels is stored in81* a single vector register: {g0, g1, g2, g3}.82* The order stays the same even with multiple quads:83* 0 1 4 584* 2 3 6 785* is stored as g0..g786*/878889/**90* Do one perspective divide per quad.91*92* For perspective interpolation, the final attribute value is given93*94* a' = a/w = a * oow95*96* where97*98* a = a0 + dadx*x + dady*y99* w = w0 + dwdx*x + dwdy*y100* oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)101*102* Instead of computing the division per pixel, with this macro we compute the103* division on the upper left pixel of each quad, and use a linear104* approximation in the remaining pixels, given by:105*106* da'dx = (dadx - dwdx*a)*oow107* da'dy = (dady - dwdy*a)*oow108*109* Ironically, this actually makes things slower -- probably because the110* divide hardware unit is rarely used, whereas the multiply unit is typically111* already saturated.112*/113#define PERSPECTIVE_DIVIDE_PER_QUAD 0114115116static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};117static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};118119120static void121attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)122{123if(attrib == 0)124lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);125else126lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);127}128129static void130calc_offsets(struct lp_build_context *coeff_bld,131unsigned quad_start_index,132LLVMValueRef *pixoffx,133LLVMValueRef *pixoffy)134{135unsigned i;136unsigned num_pix = coeff_bld->type.length;137struct gallivm_state *gallivm = coeff_bld->gallivm;138LLVMBuilderRef builder = coeff_bld->gallivm->builder;139LLVMValueRef nr, pixxf, pixyf;140141*pixoffx = coeff_bld->undef;142*pixoffy = coeff_bld->undef;143144for (i = 0; i < num_pix; i++) {145nr = lp_build_const_int32(gallivm, i);146pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +147(quad_start_index & 1) * 2);148pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +149(quad_start_index & 2));150*pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");151*pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");152}153}154155static void156calc_centroid_offsets(struct lp_build_interp_soa_context *bld,157struct gallivm_state *gallivm,158LLVMValueRef loop_iter,159LLVMValueRef mask_store,160LLVMValueRef pix_center_offset,161LLVMValueRef *centroid_x, LLVMValueRef *centroid_y)162{163struct lp_build_context *coeff_bld = &bld->coeff_bld;164LLVMBuilderRef builder = gallivm->builder;165LLVMValueRef s_mask_and = NULL;166LLVMValueRef centroid_x_offset = pix_center_offset;167LLVMValueRef centroid_y_offset = pix_center_offset;168for (int s = bld->coverage_samples - 1; s >= 0; s--) {169LLVMValueRef sample_cov;170LLVMValueRef s_mask_idx = LLVMBuildMul(builder, bld->num_loop, lp_build_const_int32(gallivm, s), "");171172s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_iter, "");173sample_cov = lp_build_pointer_get(builder, mask_store, s_mask_idx);174if (s == bld->coverage_samples - 1)175s_mask_and = sample_cov;176else177s_mask_and = LLVMBuildAnd(builder, s_mask_and, sample_cov, "");178179LLVMValueRef x_val_idx = lp_build_const_int32(gallivm, s * 2);180LLVMValueRef y_val_idx = lp_build_const_int32(gallivm, s * 2 + 1);181182x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);183y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);184x_val_idx = lp_build_broadcast_scalar(coeff_bld, x_val_idx);185y_val_idx = lp_build_broadcast_scalar(coeff_bld, y_val_idx);186centroid_x_offset = lp_build_select(coeff_bld, sample_cov, x_val_idx, centroid_x_offset);187centroid_y_offset = lp_build_select(coeff_bld, sample_cov, y_val_idx, centroid_y_offset);188}189*centroid_x = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_x_offset);190*centroid_y = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_y_offset);191}192193/* Much easier, and significantly less instructions in the per-stamp194* part (less than half) but overall more instructions so a loss if195* most quads are active. Might be a win though with larger vectors.196* No ability to do per-quad divide (doable but not implemented)197* Could be made to work with passed in pixel offsets (i.e. active quad merging).198*/199static void200coeffs_init_simple(struct lp_build_interp_soa_context *bld,201LLVMValueRef a0_ptr,202LLVMValueRef dadx_ptr,203LLVMValueRef dady_ptr)204{205struct lp_build_context *coeff_bld = &bld->coeff_bld;206struct lp_build_context *setup_bld = &bld->setup_bld;207struct gallivm_state *gallivm = coeff_bld->gallivm;208LLVMBuilderRef builder = gallivm->builder;209unsigned attrib;210211for (attrib = 0; attrib < bld->num_attribs; ++attrib) {212/*213* always fetch all 4 values for performance/simplicity214* Note: we do that here because it seems to generate better215* code. It generates a lot of moves initially but less216* moves later. As far as I can tell this looks like a217* llvm issue, instead of simply reloading the values from218* the passed in pointers it if it runs out of registers219* it spills/reloads them. Maybe some optimization passes220* would help.221* Might want to investigate this again later.222*/223const unsigned interp = bld->interp[attrib];224LLVMValueRef index = lp_build_const_int32(gallivm,225attrib * TGSI_NUM_CHANNELS);226LLVMValueRef ptr;227LLVMValueRef dadxaos = setup_bld->zero;228LLVMValueRef dadyaos = setup_bld->zero;229LLVMValueRef a0aos = setup_bld->zero;230231switch (interp) {232case LP_INTERP_PERSPECTIVE:233FALLTHROUGH;234235case LP_INTERP_LINEAR:236ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");237ptr = LLVMBuildBitCast(builder, ptr,238LLVMPointerType(setup_bld->vec_type, 0), "");239dadxaos = LLVMBuildLoad(builder, ptr, "");240241ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");242ptr = LLVMBuildBitCast(builder, ptr,243LLVMPointerType(setup_bld->vec_type, 0), "");244dadyaos = LLVMBuildLoad(builder, ptr, "");245246attrib_name(dadxaos, attrib, 0, ".dadxaos");247attrib_name(dadyaos, attrib, 0, ".dadyaos");248FALLTHROUGH;249250case LP_INTERP_CONSTANT:251case LP_INTERP_FACING:252ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");253ptr = LLVMBuildBitCast(builder, ptr,254LLVMPointerType(setup_bld->vec_type, 0), "");255a0aos = LLVMBuildLoad(builder, ptr, "");256attrib_name(a0aos, attrib, 0, ".a0aos");257break;258259case LP_INTERP_POSITION:260/* Nothing to do as the position coeffs are already setup in slot 0 */261continue;262263default:264assert(0);265break;266}267bld->a0aos[attrib] = a0aos;268bld->dadxaos[attrib] = dadxaos;269bld->dadyaos[attrib] = dadyaos;270}271}272273/**274* Interpolate the shader input attribute values.275* This is called for each (group of) quad(s).276*/277static void278attribs_update_simple(struct lp_build_interp_soa_context *bld,279struct gallivm_state *gallivm,280LLVMValueRef loop_iter,281LLVMValueRef mask_store,282LLVMValueRef sample_id,283int start,284int end)285{286LLVMBuilderRef builder = gallivm->builder;287struct lp_build_context *coeff_bld = &bld->coeff_bld;288struct lp_build_context *setup_bld = &bld->setup_bld;289LLVMValueRef oow = NULL;290unsigned attrib;291LLVMValueRef pixoffx;292LLVMValueRef pixoffy;293LLVMValueRef ptr;294LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);295296/* could do this with code-generated passed in pixel offsets too */297298assert(loop_iter);299ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");300pixoffx = LLVMBuildLoad(builder, ptr, "");301ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");302pixoffy = LLVMBuildLoad(builder, ptr, "");303304pixoffx = LLVMBuildFAdd(builder, pixoffx,305lp_build_broadcast_scalar(coeff_bld, bld->x), "");306pixoffy = LLVMBuildFAdd(builder, pixoffy,307lp_build_broadcast_scalar(coeff_bld, bld->y), "");308309for (attrib = start; attrib < end; attrib++) {310const unsigned mask = bld->mask[attrib];311const unsigned interp = bld->interp[attrib];312const unsigned loc = bld->interp_loc[attrib];313unsigned chan;314315for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {316if (mask & (1 << chan)) {317LLVMValueRef index;318LLVMValueRef dadx = coeff_bld->zero;319LLVMValueRef dady = coeff_bld->zero;320LLVMValueRef a = coeff_bld->zero;321LLVMValueRef chan_pixoffx = pixoffx, chan_pixoffy = pixoffy;322323index = lp_build_const_int32(gallivm, chan);324switch (interp) {325case LP_INTERP_PERSPECTIVE:326FALLTHROUGH;327328case LP_INTERP_LINEAR:329if (attrib == 0 && chan == 0) {330dadx = coeff_bld->one;331if (sample_id) {332LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");333x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);334a = lp_build_broadcast_scalar(coeff_bld, x_val_idx);335} else {336a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);337}338}339else if (attrib == 0 && chan == 1) {340dady = coeff_bld->one;341if (sample_id) {342LLVMValueRef y_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");343y_val_idx = LLVMBuildAdd(gallivm->builder, y_val_idx, lp_build_const_int32(gallivm, 1), "");344y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);345a = lp_build_broadcast_scalar(coeff_bld, y_val_idx);346} else {347a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);348}349}350else {351dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,352coeff_bld->type, bld->dadxaos[attrib],353index);354dady = lp_build_extract_broadcast(gallivm, setup_bld->type,355coeff_bld->type, bld->dadyaos[attrib],356index);357a = lp_build_extract_broadcast(gallivm, setup_bld->type,358coeff_bld->type, bld->a0aos[attrib],359index);360361if (bld->coverage_samples > 1) {362LLVMValueRef xoffset = pix_center_offset;363LLVMValueRef yoffset = pix_center_offset;364if (loc == TGSI_INTERPOLATE_LOC_SAMPLE || (attrib == 0 && chan == 2 && sample_id)) {365LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");366LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int32(gallivm, 1), "");367368x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);369y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);370xoffset = lp_build_broadcast_scalar(coeff_bld, x_val_idx);371yoffset = lp_build_broadcast_scalar(coeff_bld, y_val_idx);372} else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {373calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,374pix_center_offset, &xoffset, &yoffset);375}376chan_pixoffx = lp_build_add(coeff_bld, chan_pixoffx, xoffset);377chan_pixoffy = lp_build_add(coeff_bld, chan_pixoffy, yoffset);378}379}380381/*382* a = a0 + (x * dadx + y * dady)383*/384a = lp_build_fmuladd(builder, dadx, chan_pixoffx, a);385a = lp_build_fmuladd(builder, dady, chan_pixoffy, a);386387if (interp == LP_INTERP_PERSPECTIVE) {388if (oow == NULL) {389LLVMValueRef w = bld->attribs[0][3];390assert(attrib != 0);391assert(bld->mask[0] & TGSI_WRITEMASK_W);392oow = lp_build_rcp(coeff_bld, w);393}394a = lp_build_mul(coeff_bld, a, oow);395}396break;397398case LP_INTERP_CONSTANT:399case LP_INTERP_FACING:400a = lp_build_extract_broadcast(gallivm, setup_bld->type,401coeff_bld->type, bld->a0aos[attrib],402index);403break;404405case LP_INTERP_POSITION:406assert(attrib > 0);407a = bld->attribs[0][chan];408break;409410default:411assert(0);412break;413}414415if ((attrib == 0) && (chan == 2) && !bld->depth_clamp){416/* FIXME: Depth values can exceed 1.0, due to the fact that417* setup interpolation coefficients refer to (0,0) which causes418* precision loss. So we must clamp to 1.0 here to avoid artifacts.419* Note though values outside [0,1] are perfectly valid with420* depth clip disabled.421* XXX: If depth clip is disabled but we force depth clamp422* we may get values larger than 1.0 in the fs (but not in423* depth test). Not sure if that's an issue...424* Also, on a similar note, it is not obvious if the depth values425* appearing in fs (with depth clip disabled) should be clamped426* to [0,1], clamped to near/far or not be clamped at all...427*/428a = lp_build_min(coeff_bld, a, coeff_bld->one);429}430bld->attribs[attrib][chan] = a;431}432}433}434}435436static LLVMValueRef437lp_build_interp_soa_indirect(struct lp_build_interp_soa_context *bld,438struct gallivm_state *gallivm,439unsigned attrib, unsigned chan,440LLVMValueRef indir_index,441LLVMValueRef pixoffx,442LLVMValueRef pixoffy)443{444LLVMBuilderRef builder = gallivm->builder;445struct lp_build_context *coeff_bld = &bld->coeff_bld;446const unsigned interp = bld->interp[attrib];447LLVMValueRef dadx = coeff_bld->zero;448LLVMValueRef dady = coeff_bld->zero;449LLVMValueRef a = coeff_bld->zero;450451LLVMTypeRef u8ptr = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);452453indir_index = LLVMBuildAdd(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, attrib), "");454LLVMValueRef index = LLVMBuildMul(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");455index = LLVMBuildAdd(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, chan), "");456457/* size up to byte indices */458index = LLVMBuildMul(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");459460struct lp_type dst_type = coeff_bld->type;461dst_type.length = 1;462switch (interp) {463case LP_INTERP_PERSPECTIVE:464FALLTHROUGH;465case LP_INTERP_LINEAR:466467dadx = lp_build_gather(gallivm, coeff_bld->type.length,468coeff_bld->type.width, dst_type,469true, LLVMBuildBitCast(builder, bld->dadx_ptr, u8ptr, ""), index, false);470471dady = lp_build_gather(gallivm, coeff_bld->type.length,472coeff_bld->type.width, dst_type,473true, LLVMBuildBitCast(builder, bld->dady_ptr, u8ptr, ""), index, false);474475a = lp_build_gather(gallivm, coeff_bld->type.length,476coeff_bld->type.width, dst_type,477true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);478479/*480* a = a0 + (x * dadx + y * dady)481*/482a = lp_build_fmuladd(builder, dadx, pixoffx, a);483a = lp_build_fmuladd(builder, dady, pixoffy, a);484485if (interp == LP_INTERP_PERSPECTIVE) {486LLVMValueRef w = bld->attribs[0][3];487assert(attrib != 0);488assert(bld->mask[0] & TGSI_WRITEMASK_W);489LLVMValueRef oow = lp_build_rcp(coeff_bld, w);490a = lp_build_mul(coeff_bld, a, oow);491}492493break;494case LP_INTERP_CONSTANT:495case LP_INTERP_FACING:496a = lp_build_gather(gallivm, coeff_bld->type.length,497coeff_bld->type.width, dst_type,498true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);499break;500default:501assert(0);502break;503}504return a;505}506507LLVMValueRef508lp_build_interp_soa(struct lp_build_interp_soa_context *bld,509struct gallivm_state *gallivm,510LLVMValueRef loop_iter,511LLVMValueRef mask_store,512unsigned attrib, unsigned chan,513unsigned loc,514LLVMValueRef indir_index,515LLVMValueRef offsets[2])516{517LLVMBuilderRef builder = gallivm->builder;518struct lp_build_context *coeff_bld = &bld->coeff_bld;519struct lp_build_context *setup_bld = &bld->setup_bld;520LLVMValueRef pixoffx;521LLVMValueRef pixoffy;522LLVMValueRef ptr;523524/* could do this with code-generated passed in pixel offsets too */525526assert(loop_iter);527ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");528pixoffx = LLVMBuildLoad(builder, ptr, "");529ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");530pixoffy = LLVMBuildLoad(builder, ptr, "");531532pixoffx = LLVMBuildFAdd(builder, pixoffx,533lp_build_broadcast_scalar(coeff_bld, bld->x), "");534pixoffy = LLVMBuildFAdd(builder, pixoffy,535lp_build_broadcast_scalar(coeff_bld, bld->y), "");536537LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, 0.5);538539if (loc == TGSI_INTERPOLATE_LOC_CENTER) {540if (bld->coverage_samples > 1) {541pixoffx = LLVMBuildFAdd(builder, pixoffx, pix_center_offset, "");542pixoffy = LLVMBuildFAdd(builder, pixoffy, pix_center_offset, "");543}544545if (offsets[0])546pixoffx = LLVMBuildFAdd(builder, pixoffx,547offsets[0], "");548if (offsets[1])549pixoffy = LLVMBuildFAdd(builder, pixoffy,550offsets[1], "");551} else if (loc == TGSI_INTERPOLATE_LOC_SAMPLE) {552LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, offsets[0], lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 2 * 4), "");553LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 4), "");554555LLVMValueRef base_ptr = LLVMBuildBitCast(gallivm->builder, bld->sample_pos_array,556LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");557LLVMValueRef xoffset = lp_build_gather(gallivm,558bld->coeff_bld.type.length,559bld->coeff_bld.type.width,560lp_elem_type(bld->coeff_bld.type),561false,562base_ptr,563x_val_idx, true);564LLVMValueRef yoffset = lp_build_gather(gallivm,565bld->coeff_bld.type.length,566bld->coeff_bld.type.width,567lp_elem_type(bld->coeff_bld.type),568false,569base_ptr,570y_val_idx, true);571572if (bld->coverage_samples > 1) {573pixoffx = LLVMBuildFAdd(builder, pixoffx, xoffset, "");574pixoffy = LLVMBuildFAdd(builder, pixoffy, yoffset, "");575}576} else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {577LLVMValueRef centroid_x_offset, centroid_y_offset;578579/* for centroid find covered samples for this quad. */580/* if all samples are covered use pixel centers */581if (bld->coverage_samples > 1) {582calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,583pix_center_offset, ¢roid_x_offset, ¢roid_y_offset);584585pixoffx = LLVMBuildFAdd(builder, pixoffx, centroid_x_offset, "");586pixoffy = LLVMBuildFAdd(builder, pixoffy, centroid_y_offset, "");587}588}589590// remap attrib properly.591attrib++;592593if (indir_index)594return lp_build_interp_soa_indirect(bld, gallivm, attrib, chan,595indir_index, pixoffx, pixoffy);596597598const unsigned interp = bld->interp[attrib];599LLVMValueRef dadx = coeff_bld->zero;600LLVMValueRef dady = coeff_bld->zero;601LLVMValueRef a = coeff_bld->zero;602603LLVMValueRef index = lp_build_const_int32(gallivm, chan);604605switch (interp) {606case LP_INTERP_PERSPECTIVE:607FALLTHROUGH;608case LP_INTERP_LINEAR:609dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,610coeff_bld->type, bld->dadxaos[attrib],611index);612613dady = lp_build_extract_broadcast(gallivm, setup_bld->type,614coeff_bld->type, bld->dadyaos[attrib],615index);616617a = lp_build_extract_broadcast(gallivm, setup_bld->type,618coeff_bld->type, bld->a0aos[attrib],619index);620621/*622* a = a0 + (x * dadx + y * dady)623*/624a = lp_build_fmuladd(builder, dadx, pixoffx, a);625a = lp_build_fmuladd(builder, dady, pixoffy, a);626627if (interp == LP_INTERP_PERSPECTIVE) {628LLVMValueRef w = bld->attribs[0][3];629assert(attrib != 0);630assert(bld->mask[0] & TGSI_WRITEMASK_W);631LLVMValueRef oow = lp_build_rcp(coeff_bld, w);632a = lp_build_mul(coeff_bld, a, oow);633}634635break;636case LP_INTERP_CONSTANT:637case LP_INTERP_FACING:638a = lp_build_extract_broadcast(gallivm, setup_bld->type,639coeff_bld->type, bld->a0aos[attrib],640index);641break;642default:643assert(0);644break;645}646return a;647}648649/**650* Generate the position vectors.651*652* Parameter x0, y0 are the integer values with upper left coordinates.653*/654static void655pos_init(struct lp_build_interp_soa_context *bld,656LLVMValueRef x0,657LLVMValueRef y0)658{659LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;660struct lp_build_context *coeff_bld = &bld->coeff_bld;661662bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");663bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");664}665666667/**668* Initialize fragment shader input attribute info.669*/670void671lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,672struct gallivm_state *gallivm,673unsigned num_inputs,674const struct lp_shader_input *inputs,675boolean pixel_center_integer,676unsigned coverage_samples,677LLVMValueRef sample_pos_array,678LLVMValueRef num_loop,679boolean depth_clamp,680LLVMBuilderRef builder,681struct lp_type type,682LLVMValueRef a0_ptr,683LLVMValueRef dadx_ptr,684LLVMValueRef dady_ptr,685LLVMValueRef x0,686LLVMValueRef y0)687{688struct lp_type coeff_type;689struct lp_type setup_type;690unsigned attrib;691unsigned chan;692693memset(bld, 0, sizeof *bld);694695memset(&coeff_type, 0, sizeof coeff_type);696coeff_type.floating = TRUE;697coeff_type.sign = TRUE;698coeff_type.width = 32;699coeff_type.length = type.length;700701memset(&setup_type, 0, sizeof setup_type);702setup_type.floating = TRUE;703setup_type.sign = TRUE;704setup_type.width = 32;705setup_type.length = TGSI_NUM_CHANNELS;706707708/* XXX: we don't support interpolating into any other types */709assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);710711lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);712lp_build_context_init(&bld->setup_bld, gallivm, setup_type);713714/* For convenience */715bld->pos = bld->attribs[0];716bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];717718/* Position */719bld->mask[0] = TGSI_WRITEMASK_XYZW;720bld->interp[0] = LP_INTERP_LINEAR;721bld->interp_loc[0] = 0;722723/* Inputs */724for (attrib = 0; attrib < num_inputs; ++attrib) {725bld->mask[1 + attrib] = inputs[attrib].usage_mask;726bld->interp[1 + attrib] = inputs[attrib].interp;727bld->interp_loc[1 + attrib] = inputs[attrib].location;728}729bld->num_attribs = 1 + num_inputs;730731/* needed for indirect */732bld->a0_ptr = a0_ptr;733bld->dadx_ptr = dadx_ptr;734bld->dady_ptr = dady_ptr;735736/* Ensure all masked out input channels have a valid value */737for (attrib = 0; attrib < bld->num_attribs; ++attrib) {738for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {739bld->attribs[attrib][chan] = bld->coeff_bld.undef;740}741}742743if (pixel_center_integer) {744bld->pos_offset = 0.0;745} else {746bld->pos_offset = 0.5;747}748bld->depth_clamp = depth_clamp;749bld->coverage_samples = coverage_samples;750bld->num_loop = num_loop;751bld->sample_pos_array = sample_pos_array;752753pos_init(bld, x0, y0);754755/*756* Simple method (single step interpolation) may be slower if vector length757* is just 4, but the results are different (generally less accurate) with758* the other method, so always use more accurate version.759*/760{761/* XXX this should use a global static table */762unsigned i;763unsigned num_loops = 16 / type.length;764LLVMValueRef pixoffx, pixoffy, index;765LLVMValueRef ptr;766767bld->xoffset_store = lp_build_array_alloca(gallivm,768lp_build_vec_type(gallivm, type),769lp_build_const_int32(gallivm, num_loops),770"");771bld->yoffset_store = lp_build_array_alloca(gallivm,772lp_build_vec_type(gallivm, type),773lp_build_const_int32(gallivm, num_loops),774"");775for (i = 0; i < num_loops; i++) {776index = lp_build_const_int32(gallivm, i);777calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);778ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, "");779LLVMBuildStore(builder, pixoffx, ptr);780ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, "");781LLVMBuildStore(builder, pixoffy, ptr);782}783}784coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);785}786787788/*789* Advance the position and inputs to the given quad within the block.790*/791792void793lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,794struct gallivm_state *gallivm,795LLVMValueRef quad_start_index,796LLVMValueRef mask_store,797LLVMValueRef sample_id)798{799attribs_update_simple(bld, gallivm, quad_start_index, mask_store, sample_id, 1, bld->num_attribs);800}801802void803lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,804struct gallivm_state *gallivm,805LLVMValueRef quad_start_index,806LLVMValueRef sample_id)807{808attribs_update_simple(bld, gallivm, quad_start_index, NULL, sample_id, 0, 1);809}810811812813