Path: blob/21.2-virgl/src/gallium/drivers/llvmpipe/lp_bld_blend.c
4570 views
/**************************************************************************1*2* Copyright 2012 VMware, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial portions15* of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS18* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.20* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR21* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,22* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE23* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25**************************************************************************/2627#include "pipe/p_state.h"28#include "util/u_debug.h"2930#include "gallivm/lp_bld_type.h"31#include "gallivm/lp_bld_arit.h"32#include "gallivm/lp_bld_const.h"33#include "gallivm/lp_bld_logic.h"34#include "gallivm/lp_bld_swizzle.h"35#include "gallivm/lp_bld_flow.h"36#include "gallivm/lp_bld_debug.h"37#include "gallivm/lp_bld_pack.h"3839#include "lp_bld_blend.h"4041/**42* Is (a OP b) == (b OP a)?43*/44boolean45lp_build_blend_func_commutative(unsigned func)46{47switch (func) {48case PIPE_BLEND_ADD:49case PIPE_BLEND_MIN:50case PIPE_BLEND_MAX:51return TRUE;52case PIPE_BLEND_SUBTRACT:53case PIPE_BLEND_REVERSE_SUBTRACT:54return FALSE;55default:56assert(0);57return TRUE;58}59}606162/**63* Whether the blending functions are the reverse of each other.64*/65boolean66lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)67{68if (rgb_func == alpha_func)69return FALSE;70if (rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)71return TRUE;72if (rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)73return TRUE;74return FALSE;75}767778/**79* Whether the blending factors are complementary of each other.80*/81static inline boolean82lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor)83{84STATIC_ASSERT((PIPE_BLENDFACTOR_ZERO ^ 0x10) == PIPE_BLENDFACTOR_ONE);85STATIC_ASSERT((PIPE_BLENDFACTOR_CONST_COLOR ^ 0x10) ==86PIPE_BLENDFACTOR_INV_CONST_COLOR);87return dst_factor == (src_factor ^ 0x10);88}899091/**92* Whether this is a inverse blend factor93*/94static inline boolean95is_inverse_factor(unsigned factor)96{97STATIC_ASSERT(PIPE_BLENDFACTOR_ZERO == 0x11);98return factor > 0x11;99}100101102/**103* Calculates the (expanded to wider type) multiplication104* of 2 normalized numbers.105*/106static void107lp_build_mul_norm_expand(struct lp_build_context *bld,108LLVMValueRef a, LLVMValueRef b,109LLVMValueRef *resl, LLVMValueRef *resh,110boolean signedness_differs)111{112const struct lp_type type = bld->type;113struct lp_type wide_type = lp_wider_type(type);114struct lp_type wide_type2 = wide_type;115struct lp_type type2 = type;116LLVMValueRef al, ah, bl, bh;117118assert(lp_check_value(type, a));119assert(lp_check_value(type, b));120assert(!type.floating && !type.fixed && type.norm);121122if (a == bld->zero || b == bld->zero) {123LLVMValueRef zero = LLVMConstNull(lp_build_vec_type(bld->gallivm, wide_type));124*resl = zero;125*resh = zero;126return;127}128129if (signedness_differs) {130type2.sign = !type.sign;131wide_type2.sign = !wide_type2.sign;132}133134lp_build_unpack2_native(bld->gallivm, type, wide_type, a, &al, &ah);135lp_build_unpack2_native(bld->gallivm, type2, wide_type2, b, &bl, &bh);136137*resl = lp_build_mul_norm(bld->gallivm, wide_type, al, bl);138*resh = lp_build_mul_norm(bld->gallivm, wide_type, ah, bh);139}140141142/**143* @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml144*/145LLVMValueRef146lp_build_blend_func(struct lp_build_context *bld,147unsigned func,148LLVMValueRef term1,149LLVMValueRef term2)150{151switch (func) {152case PIPE_BLEND_ADD:153return lp_build_add(bld, term1, term2);154case PIPE_BLEND_SUBTRACT:155return lp_build_sub(bld, term1, term2);156case PIPE_BLEND_REVERSE_SUBTRACT:157return lp_build_sub(bld, term2, term1);158case PIPE_BLEND_MIN:159return lp_build_min(bld, term1, term2);160case PIPE_BLEND_MAX:161return lp_build_max(bld, term1, term2);162default:163assert(0);164return bld->zero;165}166}167168169/**170* Performs optimizations and blending independent of SoA/AoS171*172* @param func the blend function173* @param factor_src PIPE_BLENDFACTOR_xxx174* @param factor_dst PIPE_BLENDFACTOR_xxx175* @param src source rgba176* @param dst dest rgba177* @param src_factor src factor computed value178* @param dst_factor dst factor computed value179* @param not_alpha_dependent same factors accross all channels of src/dst180*181* not_alpha_dependent should be:182* SoA: always true as it is only one channel at a time183* AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor184*185* Note that pretty much every possible optimisation can only be done on non-unorm targets186* due to unorm values not going above 1.0 meaning factorisation can change results.187* e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1.188*/189LLVMValueRef190lp_build_blend(struct lp_build_context *bld,191unsigned func,192unsigned factor_src,193unsigned factor_dst,194LLVMValueRef src,195LLVMValueRef dst,196LLVMValueRef src_factor,197LLVMValueRef dst_factor,198boolean not_alpha_dependent,199boolean optimise_only)200{201LLVMValueRef result, src_term, dst_term;202203/* If we are not alpha dependent we can mess with the src/dst factors */204if (not_alpha_dependent) {205if (lp_build_blend_factor_complementary(factor_src, factor_dst)) {206if (func == PIPE_BLEND_ADD) {207if (factor_src < factor_dst) {208return lp_build_lerp(bld, src_factor, dst, src, 0);209} else {210return lp_build_lerp(bld, dst_factor, src, dst, 0);211}212} else if (bld->type.floating && func == PIPE_BLEND_SUBTRACT) {213result = lp_build_add(bld, src, dst);214215if (factor_src < factor_dst) {216result = lp_build_mul(bld, result, src_factor);217return lp_build_sub(bld, result, dst);218} else {219result = lp_build_mul(bld, result, dst_factor);220return lp_build_sub(bld, src, result);221}222} else if (bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) {223result = lp_build_add(bld, src, dst);224225if (factor_src < factor_dst) {226result = lp_build_mul(bld, result, src_factor);227return lp_build_sub(bld, dst, result);228} else {229result = lp_build_mul(bld, result, dst_factor);230return lp_build_sub(bld, result, src);231}232}233}234235if (bld->type.floating && factor_src == factor_dst) {236if (func == PIPE_BLEND_ADD ||237func == PIPE_BLEND_SUBTRACT ||238func == PIPE_BLEND_REVERSE_SUBTRACT) {239LLVMValueRef result;240result = lp_build_blend_func(bld, func, src, dst);241return lp_build_mul(bld, result, src_factor);242}243}244}245246if (optimise_only)247return NULL;248249if ((bld->type.norm && bld->type.sign) &&250(is_inverse_factor(factor_src) || is_inverse_factor(factor_dst))) {251/*252* With snorm blending, the inverse blend factors range from [0,2]253* instead of [-1,1], so the ordinary signed normalized arithmetic254* doesn't quite work. Unpack must be unsigned, and the add/sub255* must be done with wider type.256* (Note that it's not quite obvious what the blend equation wrt to257* clamping should actually be based on GL spec in this case, but258* really the incoming src values are clamped to [-1,1] (the dst is259* always clamped already), and then NO further clamping occurs until260* the end.)261*/262struct lp_build_context bldw;263struct lp_type wide_type = lp_wider_type(bld->type);264LLVMValueRef src_terml, src_termh, dst_terml, dst_termh;265LLVMValueRef resl, resh;266267/*268* We don't need saturate math for the sub/add, since we have269* x+1 bit numbers in x*2 wide type (result is x+2 bits).270* (Doesn't really matter on x86 sse2 though as we use saturated271* intrinsics.)272*/273wide_type.norm = 0;274lp_build_context_init(&bldw, bld->gallivm, wide_type);275276/*277* XXX This is a bit hackish. Note that -128 really should278* be -1.0, the same as -127. However, we did not actually clamp279* things anywhere (relying on pack intrinsics instead) therefore280* we will get -128, and the inverted factor then 255. But the mul281* can overflow in this case (rather the rounding fixups for the mul,282* -128*255 will be positive).283* So we clamp the src and dst up here but only when necessary (we284* should do this before calculating blend factors but it's enough285* for avoiding overflow).286*/287if (is_inverse_factor(factor_src)) {288src = lp_build_max(bld, src,289lp_build_const_vec(bld->gallivm, bld->type, -1.0));290}291if (is_inverse_factor(factor_dst)) {292dst = lp_build_max(bld, dst,293lp_build_const_vec(bld->gallivm, bld->type, -1.0));294}295296lp_build_mul_norm_expand(bld, src, src_factor, &src_terml, &src_termh,297is_inverse_factor(factor_src) ? TRUE : FALSE);298lp_build_mul_norm_expand(bld, dst, dst_factor, &dst_terml, &dst_termh,299is_inverse_factor(factor_dst) ? TRUE : FALSE);300resl = lp_build_blend_func(&bldw, func, src_terml, dst_terml);301resh = lp_build_blend_func(&bldw, func, src_termh, dst_termh);302303/*304* XXX pack2_native is not ok because the values have to be in dst305* range. We need native pack though for the correct order on avx2.306* Will break on everything not implementing clamping pack intrinsics307* (i.e. everything but sse2 and altivec).308*/309return lp_build_pack2_native(bld->gallivm, wide_type, bld->type, resl, resh);310} else {311src_term = lp_build_mul(bld, src, src_factor);312dst_term = lp_build_mul(bld, dst, dst_factor);313return lp_build_blend_func(bld, func, src_term, dst_term);314}315}316317void318lp_build_alpha_to_coverage(struct gallivm_state *gallivm,319struct lp_type type,320struct lp_build_mask_context *mask,321LLVMValueRef alpha,322boolean do_branch)323{324struct lp_build_context bld;325LLVMValueRef test;326LLVMValueRef alpha_ref_value;327328lp_build_context_init(&bld, gallivm, type);329330alpha_ref_value = lp_build_const_vec(gallivm, type, 0.5);331332test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value);333334lp_build_name(test, "alpha_to_coverage");335336lp_build_mask_update(mask, test);337338if (do_branch)339lp_build_mask_check(mask);340}341342343