Path: blob/21.2-virgl/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
4565 views
/**************************************************************************1*2* Copyright 2009 VMware, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial portions15* of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS18* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.20* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR21* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,22* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE23* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25**************************************************************************/2627/**28* @file29* AoS pixel format manipulation.30*31* @author Jose Fonseca <[email protected]>32*/333435#include "util/format/u_format.h"36#include "util/u_memory.h"37#include "util/u_math.h"38#include "util/u_pointer.h"39#include "util/u_string.h"40#include "util/u_cpu_detect.h"4142#include "lp_bld_arit.h"43#include "lp_bld_init.h"44#include "lp_bld_type.h"45#include "lp_bld_flow.h"46#include "lp_bld_const.h"47#include "lp_bld_conv.h"48#include "lp_bld_swizzle.h"49#include "lp_bld_gather.h"50#include "lp_bld_debug.h"51#include "lp_bld_format.h"52#include "lp_bld_pack.h"53#include "lp_bld_intr.h"54#include "lp_bld_logic.h"55#include "lp_bld_bitarit.h"56#include "lp_bld_misc.h"5758/**59* Basic swizzling. Rearrange the order of the unswizzled array elements60* according to the format description. PIPE_SWIZZLE_0/ONE are supported61* too.62* Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.63*/64LLVMValueRef65lp_build_format_swizzle_aos(const struct util_format_description *desc,66struct lp_build_context *bld,67LLVMValueRef unswizzled)68{69unsigned char swizzles[4];70unsigned chan;7172assert(bld->type.length % 4 == 0);7374for (chan = 0; chan < 4; ++chan) {75enum pipe_swizzle swizzle;7677if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {78/*79* For ZS formats do RGBA = ZZZ180*/81if (chan == 3) {82swizzle = PIPE_SWIZZLE_1;83} else if (desc->swizzle[0] == PIPE_SWIZZLE_NONE) {84swizzle = PIPE_SWIZZLE_0;85} else {86swizzle = desc->swizzle[0];87}88} else {89swizzle = desc->swizzle[chan];90}91swizzles[chan] = swizzle;92}9394return lp_build_swizzle_aos(bld, unswizzled, swizzles);95}969798/**99* Whether the format matches the vector type, apart of swizzles.100*/101static inline boolean102format_matches_type(const struct util_format_description *desc,103struct lp_type type)104{105enum util_format_type chan_type;106unsigned chan;107108assert(type.length % 4 == 0);109110if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||111desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||112desc->block.width != 1 ||113desc->block.height != 1) {114return FALSE;115}116117if (type.floating) {118chan_type = UTIL_FORMAT_TYPE_FLOAT;119} else if (type.fixed) {120chan_type = UTIL_FORMAT_TYPE_FIXED;121} else if (type.sign) {122chan_type = UTIL_FORMAT_TYPE_SIGNED;123} else {124chan_type = UTIL_FORMAT_TYPE_UNSIGNED;125}126127for (chan = 0; chan < desc->nr_channels; ++chan) {128if (desc->channel[chan].size != type.width) {129return FALSE;130}131132if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {133if (desc->channel[chan].type != chan_type ||134desc->channel[chan].normalized != type.norm) {135return FALSE;136}137}138}139140return TRUE;141}142143/*144* Do rounding when converting small unorm values to larger ones.145* Not quite 100% accurate, as it's done by appending MSBs, but146* should be good enough.147*/148149static inline LLVMValueRef150scale_bits_up(struct gallivm_state *gallivm,151int src_bits,152int dst_bits,153LLVMValueRef src,154struct lp_type src_type)155{156LLVMBuilderRef builder = gallivm->builder;157LLVMValueRef result = src;158159if (src_bits == 1 && dst_bits > 1) {160/*161* Useful for a1 - we'd need quite some repeated copies otherwise.162*/163struct lp_build_context bld;164LLVMValueRef dst_mask;165lp_build_context_init(&bld, gallivm, src_type);166dst_mask = lp_build_const_int_vec(gallivm, src_type,167(1 << dst_bits) - 1),168result = lp_build_cmp(&bld, PIPE_FUNC_EQUAL, src,169lp_build_const_int_vec(gallivm, src_type, 0));170result = lp_build_andnot(&bld, dst_mask, result);171}172else if (dst_bits > src_bits) {173/* Scale up bits */174int db = dst_bits - src_bits;175176/* Shift left by difference in bits */177result = LLVMBuildShl(builder,178src,179lp_build_const_int_vec(gallivm, src_type, db),180"");181182if (db <= src_bits) {183/* Enough bits in src to fill the remainder */184LLVMValueRef lower = LLVMBuildLShr(builder,185src,186lp_build_const_int_vec(gallivm, src_type,187src_bits - db),188"");189190result = LLVMBuildOr(builder, result, lower, "");191} else if (db > src_bits) {192/* Need to repeatedly copy src bits to fill remainder in dst */193unsigned n;194195for (n = src_bits; n < dst_bits; n *= 2) {196LLVMValueRef shuv = lp_build_const_int_vec(gallivm, src_type, n);197198result = LLVMBuildOr(builder,199result,200LLVMBuildLShr(builder, result, shuv, ""),201"");202}203}204} else {205assert (dst_bits == src_bits);206}207208return result;209}210211/**212* Unpack a single pixel into its XYZW components.213*214* @param desc the pixel format for the packed pixel value215* @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM216*217* @return XYZW in a float[4] or ubyte[4] or ushort[4] vector.218*/219static inline LLVMValueRef220lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,221const struct util_format_description *desc,222LLVMValueRef packed)223{224LLVMBuilderRef builder = gallivm->builder;225LLVMValueRef shifted, casted, scaled, masked;226LLVMValueRef shifts[4];227LLVMValueRef masks[4];228LLVMValueRef scales[4];229LLVMTypeRef vec32_type;230231boolean normalized;232boolean needs_uitofp;233unsigned i;234235/* TODO: Support more formats */236assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);237assert(desc->block.width == 1);238assert(desc->block.height == 1);239assert(desc->block.bits <= 32);240241/* Do the intermediate integer computations with 32bit integers since it242* matches floating point size */243assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context));244245vec32_type = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4);246247/* Broadcast the packed value to all four channels248* before: packed = BGRA249* after: packed = {BGRA, BGRA, BGRA, BGRA}250*/251packed = LLVMBuildInsertElement(builder, LLVMGetUndef(vec32_type), packed,252LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)),253"");254packed = LLVMBuildShuffleVector(builder, packed, LLVMGetUndef(vec32_type),255LLVMConstNull(vec32_type),256"");257258/* Initialize vector constants */259normalized = FALSE;260needs_uitofp = FALSE;261262/* Loop over 4 color components */263for (i = 0; i < 4; ++i) {264unsigned bits = desc->channel[i].size;265unsigned shift = desc->channel[i].shift;266267if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {268shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));269masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));270scales[i] = LLVMConstNull(LLVMFloatTypeInContext(gallivm->context));271}272else {273unsigned long long mask = (1ULL << bits) - 1;274275assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);276277if (bits == 32) {278needs_uitofp = TRUE;279}280281shifts[i] = lp_build_const_int32(gallivm, shift);282masks[i] = lp_build_const_int32(gallivm, mask);283284if (desc->channel[i].normalized) {285scales[i] = lp_build_const_float(gallivm, 1.0 / mask);286normalized = TRUE;287}288else289scales[i] = lp_build_const_float(gallivm, 1.0);290}291}292293/* Ex: convert packed = {XYZW, XYZW, XYZW, XYZW}294* into masked = {X, Y, Z, W}295*/296if (desc->block.bits < 32 && normalized) {297/*298* Note: we cannot do the shift below on x86 natively until AVX2.299*300* Old llvm versions will resort to scalar extract/shift insert,301* which is definitely terrible, new versions will just do302* several vector shifts and shuffle/blend results together.303* We could turn this into a variable left shift plus a constant304* right shift, and llvm would then turn the variable left shift305* into a mul for us (albeit without sse41 the mul needs emulation306* too...). However, since we're going to do a float mul307* anyway, we just adjust that mul instead (plus the mask), skipping308* the shift completely.309* We could also use a extra mul when the format isn't normalized and310* we don't have AVX2 support, but don't bother for now. Unfortunately,311* this strategy doesn't work for 32bit formats (such as rgb10a2 or even312* rgba8 if it ends up here), as that would require UIToFP, albeit that313* would be fixable with easy 16bit shuffle (unless there's channels314* crossing 16bit boundaries).315*/316for (i = 0; i < 4; ++i) {317if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {318unsigned bits = desc->channel[i].size;319unsigned shift = desc->channel[i].shift;320unsigned long long mask = ((1ULL << bits) - 1) << shift;321scales[i] = lp_build_const_float(gallivm, 1.0 / mask);322masks[i] = lp_build_const_int32(gallivm, mask);323}324}325masked = LLVMBuildAnd(builder, packed, LLVMConstVector(masks, 4), "");326} else {327shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");328masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");329}330331if (!needs_uitofp) {332/* UIToFP can't be expressed in SSE2 */333casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");334} else {335casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");336}337338/*339* At this point 'casted' may be a vector of floats such as340* {255.0, 255.0, 255.0, 255.0}. (Normalized values may be multiplied341* by powers of two). Next, if the pixel values are normalized342* we'll scale this to {1.0, 1.0, 1.0, 1.0}.343*/344345if (normalized)346scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");347else348scaled = casted;349350return scaled;351}352353354/**355* Pack a single pixel.356*357* @param rgba 4 float vector with the unpacked components.358*359* XXX: This is mostly for reference and testing -- operating a single pixel at360* a time is rarely if ever needed.361*/362LLVMValueRef363lp_build_pack_rgba_aos(struct gallivm_state *gallivm,364const struct util_format_description *desc,365LLVMValueRef rgba)366{367LLVMBuilderRef builder = gallivm->builder;368LLVMTypeRef type;369LLVMValueRef packed = NULL;370LLVMValueRef swizzles[4];371LLVMValueRef shifted, casted, scaled, unswizzled;372LLVMValueRef shifts[4];373LLVMValueRef scales[4];374boolean normalized;375unsigned i, j;376377assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);378assert(desc->block.width == 1);379assert(desc->block.height == 1);380381type = LLVMIntTypeInContext(gallivm->context, desc->block.bits);382383/* Unswizzle the color components into the source vector. */384for (i = 0; i < 4; ++i) {385for (j = 0; j < 4; ++j) {386if (desc->swizzle[j] == i)387break;388}389if (j < 4)390swizzles[i] = lp_build_const_int32(gallivm, j);391else392swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));393}394395unswizzled = LLVMBuildShuffleVector(builder, rgba,396LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)),397LLVMConstVector(swizzles, 4), "");398399normalized = FALSE;400for (i = 0; i < 4; ++i) {401unsigned bits = desc->channel[i].size;402unsigned shift = desc->channel[i].shift;403404if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {405shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));406scales[i] = LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context));407}408else {409unsigned mask = (1 << bits) - 1;410411assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);412assert(bits < 32);413414shifts[i] = lp_build_const_int32(gallivm, shift);415416if (desc->channel[i].normalized) {417scales[i] = lp_build_const_float(gallivm, mask);418normalized = TRUE;419}420else421scales[i] = lp_build_const_float(gallivm, 1.0);422}423}424425if (normalized)426scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");427else428scaled = unswizzled;429430casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), "");431432shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");433434/* Bitwise or all components */435for (i = 0; i < 4; ++i) {436if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {437LLVMValueRef component = LLVMBuildExtractElement(builder, shifted,438lp_build_const_int32(gallivm, i), "");439if (packed)440packed = LLVMBuildOr(builder, packed, component, "");441else442packed = component;443}444}445446if (!packed)447packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));448449if (desc->block.bits < 32)450packed = LLVMBuildTrunc(builder, packed, type, "");451452return packed;453}454455456457458/**459* Fetch a pixel into a 4 float AoS.460*461* \param format_desc describes format of the image we're fetching from462* \param aligned whether the data is guaranteed to be aligned463* \param ptr address of the pixel block (or the texel if uncompressed)464* \param i, j the sub-block pixel coordinates. For non-compressed formats465* these will always be (0, 0).466* \param cache optional value pointing to a lp_build_format_cache structure467* \return a 4 element vector with the pixel's RGBA values.468*/469LLVMValueRef470lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,471const struct util_format_description *format_desc,472struct lp_type type,473boolean aligned,474LLVMValueRef base_ptr,475LLVMValueRef offset,476LLVMValueRef i,477LLVMValueRef j,478LLVMValueRef cache)479{480const struct util_format_unpack_description *unpack =481util_format_unpack_description(format_desc->format);482LLVMBuilderRef builder = gallivm->builder;483unsigned num_pixels = type.length / 4;484struct lp_build_context bld;485486assert(type.length <= LP_MAX_VECTOR_LENGTH);487assert(type.length % 4 == 0);488489lp_build_context_init(&bld, gallivm, type);490491/*492* Trivial case493*494* The format matches the type (apart of a swizzle) so no need for495* scaling or converting.496*/497498if (format_matches_type(format_desc, type) &&499format_desc->block.bits <= type.width * 4 &&500/* XXX this shouldn't be needed */501util_is_power_of_two_or_zero(format_desc->block.bits)) {502LLVMValueRef packed;503LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, type);504struct lp_type fetch_type;505unsigned vec_len = type.width * type.length;506507/*508* The format matches the type (apart of a swizzle) so no need for509* scaling or converting.510*/511512fetch_type = lp_type_uint(type.width*4);513packed = lp_build_gather(gallivm, type.length/4,514format_desc->block.bits, fetch_type,515aligned, base_ptr, offset, TRUE);516517assert(format_desc->block.bits <= vec_len);518(void) vec_len; /* silence unused var warning for non-debug build */519520packed = LLVMBuildBitCast(gallivm->builder, packed, dst_vec_type, "");521return lp_build_format_swizzle_aos(format_desc, &bld, packed);522}523524/*525* Bit arithmetic for converting small_unorm to unorm8.526*527* This misses some opportunities for optimizations (like skipping mask528* for the highest channel for instance, or doing bit scaling in parallel529* for channels with the same bit width) but it should be passable for530* all arithmetic formats.531*/532if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&533format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&534util_format_fits_8unorm(format_desc) &&535type.width == 8 && type.norm == 1 && type.sign == 0 &&536type.fixed == 0 && type.floating == 0) {537LLVMValueRef packed, res = NULL, chans[4], rgba[4];538LLVMTypeRef dst_vec_type, conv_vec_type;539struct lp_type fetch_type, conv_type;540struct lp_build_context bld_conv;541unsigned j;542543fetch_type = lp_type_uint(type.width*4);544conv_type = lp_type_int_vec(type.width*4, type.width * type.length);545dst_vec_type = lp_build_vec_type(gallivm, type);546conv_vec_type = lp_build_vec_type(gallivm, conv_type);547lp_build_context_init(&bld_conv, gallivm, conv_type);548549packed = lp_build_gather(gallivm, type.length/4,550format_desc->block.bits, fetch_type,551aligned, base_ptr, offset, TRUE);552553assert(format_desc->block.bits * type.length / 4 <=554type.width * type.length);555556packed = LLVMBuildBitCast(gallivm->builder, packed, conv_vec_type, "");557558for (j = 0; j < format_desc->nr_channels; ++j) {559unsigned mask = 0;560unsigned sa = format_desc->channel[j].shift;561562mask = (1 << format_desc->channel[j].size) - 1;563564/* Extract bits from source */565chans[j] = LLVMBuildLShr(builder, packed,566lp_build_const_int_vec(gallivm, conv_type, sa),567"");568569chans[j] = LLVMBuildAnd(builder, chans[j],570lp_build_const_int_vec(gallivm, conv_type, mask),571"");572573/* Scale bits */574if (type.norm) {575chans[j] = scale_bits_up(gallivm, format_desc->channel[j].size,576type.width, chans[j], conv_type);577}578}579/*580* This is a hacked lp_build_format_swizzle_soa() since we need a581* normalized 1 but only 8 bits in a 32bit vector...582*/583for (j = 0; j < 4; ++j) {584enum pipe_swizzle swizzle = format_desc->swizzle[j];585if (swizzle == PIPE_SWIZZLE_1) {586rgba[j] = lp_build_const_int_vec(gallivm, conv_type, (1 << type.width) - 1);587} else {588rgba[j] = lp_build_swizzle_soa_channel(&bld_conv, chans, swizzle);589}590if (j == 0) {591res = rgba[j];592} else {593rgba[j] = LLVMBuildShl(builder, rgba[j],594lp_build_const_int_vec(gallivm, conv_type,595j * type.width), "");596res = LLVMBuildOr(builder, res, rgba[j], "");597}598}599res = LLVMBuildBitCast(gallivm->builder, res, dst_vec_type, "");600601return res;602}603604/*605* Bit arithmetic606*/607608if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&609(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||610format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&611format_desc->block.width == 1 &&612format_desc->block.height == 1 &&613/* XXX this shouldn't be needed */614util_is_power_of_two_or_zero(format_desc->block.bits) &&615format_desc->block.bits <= 32 &&616format_desc->is_bitmask &&617!format_desc->is_mixed &&618(format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||619format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED) &&620!format_desc->channel[0].pure_integer) {621622LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];623LLVMValueRef res[LP_MAX_VECTOR_WIDTH / 128];624struct lp_type conv_type;625unsigned k, num_conv_src, num_conv_dst;626627/*628* Note this path is generally terrible for fetching multiple pixels.629* We should make sure we cannot hit this code path for anything but630* single pixels.631*/632633/*634* Unpack a pixel at a time into a <4 x float> RGBA vector635*/636637for (k = 0; k < num_pixels; ++k) {638LLVMValueRef packed;639640packed = lp_build_gather_elem(gallivm, num_pixels,641format_desc->block.bits, 32, aligned,642base_ptr, offset, k, FALSE);643644tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,645format_desc,646packed);647}648649/*650* Type conversion.651*652* TODO: We could avoid floating conversion for integer to653* integer conversions.654*/655656if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {657debug_printf("%s: unpacking %s with floating point\n",658__FUNCTION__, format_desc->short_name);659}660661conv_type = lp_float32_vec4_type();662num_conv_src = num_pixels;663num_conv_dst = 1;664665if (num_pixels % 8 == 0) {666lp_build_concat_n(gallivm, lp_float32_vec4_type(),667tmps, num_pixels, tmps, num_pixels / 2);668conv_type.length *= num_pixels / 4;669num_conv_src = 4 * num_pixels / 8;670if (type.width == 8 && type.floating == 0 && type.fixed == 0) {671/*672* FIXME: The fast float->unorm path (which is basically673* skipping the MIN/MAX which are extremely pointless in any674* case) requires that there's 2 destinations...675* In any case, we really should make sure we don't hit this676* code with multiple pixels for unorm8 dst types, it's677* completely hopeless even if we do hit the right conversion.678*/679type.length /= num_pixels / 4;680num_conv_dst = num_pixels / 4;681}682}683684lp_build_conv(gallivm, conv_type, type,685tmps, num_conv_src, res, num_conv_dst);686687if (num_pixels % 8 == 0 &&688(type.width == 8 && type.floating == 0 && type.fixed == 0)) {689lp_build_concat_n(gallivm, type, res, num_conv_dst, res, 1);690}691692return lp_build_format_swizzle_aos(format_desc, &bld, res[0]);693}694695/* If all channels are of same type and we are not using half-floats */696if (format_desc->is_array &&697format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) {698assert(!format_desc->is_mixed);699return lp_build_fetch_rgba_aos_array(gallivm, format_desc, type, base_ptr, offset);700}701702/*703* YUV / subsampled formats704*/705706if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {707struct lp_type tmp_type;708LLVMValueRef tmp;709710memset(&tmp_type, 0, sizeof tmp_type);711tmp_type.width = 8;712tmp_type.length = num_pixels * 4;713tmp_type.norm = TRUE;714715tmp = lp_build_fetch_subsampled_rgba_aos(gallivm,716format_desc,717num_pixels,718base_ptr,719offset,720i, j);721722lp_build_conv(gallivm,723tmp_type, type,724&tmp, 1, &tmp, 1);725726return tmp;727}728729/*730* s3tc rgb formats731*/732733if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {734struct lp_type tmp_type;735LLVMValueRef tmp;736737memset(&tmp_type, 0, sizeof tmp_type);738tmp_type.width = 8;739tmp_type.length = num_pixels * 4;740tmp_type.norm = TRUE;741742tmp = lp_build_fetch_s3tc_rgba_aos(gallivm,743format_desc,744num_pixels,745base_ptr,746offset,747i, j,748cache);749750lp_build_conv(gallivm,751tmp_type, type,752&tmp, 1, &tmp, 1);753754return tmp;755}756757/*758* rgtc rgb formats759*/760761if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {762struct lp_type tmp_type;763LLVMValueRef tmp;764765memset(&tmp_type, 0, sizeof tmp_type);766tmp_type.width = 8;767tmp_type.length = num_pixels * 4;768tmp_type.norm = TRUE;769tmp_type.sign = (format_desc->format == PIPE_FORMAT_RGTC1_SNORM ||770format_desc->format == PIPE_FORMAT_RGTC2_SNORM ||771format_desc->format == PIPE_FORMAT_LATC1_SNORM ||772format_desc->format == PIPE_FORMAT_LATC2_SNORM);773774tmp = lp_build_fetch_rgtc_rgba_aos(gallivm,775format_desc,776num_pixels,777base_ptr,778offset,779i, j,780cache);781782lp_build_conv(gallivm,783tmp_type, type,784&tmp, 1, &tmp, 1);785786return tmp;787}788789/*790* Fallback to util_format_description::fetch_rgba_8unorm().791*/792793if (unpack->fetch_rgba_8unorm &&794!type.floating && type.width == 8 && !type.sign && type.norm) {795/*796* Fallback to calling util_format_description::fetch_rgba_8unorm.797*798* This is definitely not the most efficient way of fetching pixels, as799* we miss the opportunity to do vectorization, but this it is a800* convenient for formats or scenarios for which there was no opportunity801* or incentive to optimize.802*/803804LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);805LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);806LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);807LLVMValueRef function;808LLVMValueRef tmp_ptr;809LLVMValueRef tmp;810LLVMValueRef res;811unsigned k;812813if (gallivm_debug & GALLIVM_DEBUG_PERF) {814debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n",815__FUNCTION__, format_desc->short_name);816}817818/*819* Declare and bind format_desc->fetch_rgba_8unorm().820*/821822{823/*824* Function to call looks like:825* fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)826*/827LLVMTypeRef ret_type;828LLVMTypeRef arg_types[4];829LLVMTypeRef function_type;830831ret_type = LLVMVoidTypeInContext(gallivm->context);832arg_types[0] = pi8t;833arg_types[1] = pi8t;834arg_types[2] = i32t;835arg_types[3] = i32t;836function_type = LLVMFunctionType(ret_type, arg_types,837ARRAY_SIZE(arg_types), 0);838839if (gallivm->cache)840gallivm->cache->dont_cache = true;841/* make const pointer for the C fetch_rgba_8unorm function */842function = lp_build_const_int_pointer(gallivm,843func_to_pointer((func_pointer) unpack->fetch_rgba_8unorm));844845/* cast the callee pointer to the function's type */846function = LLVMBuildBitCast(builder, function,847LLVMPointerType(function_type, 0),848"cast callee");849}850851tmp_ptr = lp_build_alloca(gallivm, i32t, "");852853res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));854855/*856* Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result857* in the SoA vectors.858*/859860for (k = 0; k < num_pixels; ++k) {861LLVMValueRef index = lp_build_const_int32(gallivm, k);862LLVMValueRef args[4];863864args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");865args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,866base_ptr, offset, k);867868if (num_pixels == 1) {869args[2] = i;870args[3] = j;871}872else {873args[2] = LLVMBuildExtractElement(builder, i, index, "");874args[3] = LLVMBuildExtractElement(builder, j, index, "");875}876877LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), "");878879tmp = LLVMBuildLoad(builder, tmp_ptr, "");880881if (num_pixels == 1) {882res = tmp;883}884else {885res = LLVMBuildInsertElement(builder, res, tmp, index, "");886}887}888889/* Bitcast from <n x i32> to <4n x i8> */890res = LLVMBuildBitCast(builder, res, bld.vec_type, "");891892return res;893}894895/*896* Fallback to fetch_rgba().897*/898899util_format_fetch_rgba_func_ptr fetch_rgba =900util_format_fetch_rgba_func(format_desc->format);901if (fetch_rgba) {902/*903* Fallback to calling util_format_description::fetch_rgba_float.904*905* This is definitely not the most efficient way of fetching pixels, as906* we miss the opportunity to do vectorization, but this it is a907* convenient for formats or scenarios for which there was no opportunity908* or incentive to optimize.909*/910911LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);912LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);913LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);914LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);915LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);916LLVMValueRef function;917LLVMValueRef tmp_ptr;918LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];919LLVMValueRef res;920unsigned k;921922if (gallivm_debug & GALLIVM_DEBUG_PERF) {923debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n",924__FUNCTION__, format_desc->short_name);925}926927/*928* Declare and bind unpack->fetch_rgba_float().929*/930931{932/*933* Function to call looks like:934* fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)935*/936LLVMTypeRef ret_type;937LLVMTypeRef arg_types[4];938939ret_type = LLVMVoidTypeInContext(gallivm->context);940arg_types[0] = pf32t;941arg_types[1] = pi8t;942arg_types[2] = i32t;943arg_types[3] = i32t;944945if (gallivm->cache)946gallivm->cache->dont_cache = true;947function = lp_build_const_func_pointer(gallivm,948func_to_pointer((func_pointer) fetch_rgba),949ret_type,950arg_types, ARRAY_SIZE(arg_types),951format_desc->short_name);952}953954tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");955956/*957* Invoke format_desc->fetch_rgba_float() for each pixel and insert the result958* in the SoA vectors.959*/960961for (k = 0; k < num_pixels; ++k) {962LLVMValueRef args[4];963964args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");965args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,966base_ptr, offset, k);967968if (num_pixels == 1) {969args[2] = i;970args[3] = j;971}972else {973LLVMValueRef index = lp_build_const_int32(gallivm, k);974args[2] = LLVMBuildExtractElement(builder, i, index, "");975args[3] = LLVMBuildExtractElement(builder, j, index, "");976}977978LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), "");979980tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");981}982983lp_build_conv(gallivm,984lp_float32_vec4_type(),985type,986tmps, num_pixels, &res, 1);987988return res;989}990991assert(!util_format_is_pure_integer(format_desc->format));992993assert(0);994return lp_build_undef(gallivm, type);995}996997998