Path: blob/21.2-virgl/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
4565 views
/**************************************************************************1*2* Copyright 2010 VMware, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL16* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,17* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR18* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE19* USE OR OTHER DEALINGS IN THE SOFTWARE.20*21* The above copyright notice and this permission notice (including the22* next paragraph) shall be included in all copies or substantial portions23* of the Software.24*25**************************************************************************/262728/**29* @file30* YUV pixel format manipulation.31*32* @author Jose Fonseca <[email protected]>33*/343536#include "util/format/u_format.h"37#include "util/u_cpu_detect.h"3839#include "lp_bld_arit.h"40#include "lp_bld_type.h"41#include "lp_bld_const.h"42#include "lp_bld_conv.h"43#include "lp_bld_gather.h"44#include "lp_bld_format.h"45#include "lp_bld_init.h"46#include "lp_bld_logic.h"4748/**49* Extract Y, U, V channels from packed UYVY.50* @param packed is a <n x i32> vector with the packed UYVY blocks51* @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)52*/53static void54uyvy_to_yuv_soa(struct gallivm_state *gallivm,55unsigned n,56LLVMValueRef packed,57LLVMValueRef i,58LLVMValueRef *y,59LLVMValueRef *u,60LLVMValueRef *v)61{62LLVMBuilderRef builder = gallivm->builder;63struct lp_type type;64LLVMValueRef mask;6566memset(&type, 0, sizeof type);67type.width = 32;68type.length = n;6970assert(lp_check_value(type, packed));71assert(lp_check_value(type, i));7273/*74* Little endian:75* y = (uyvy >> (16*i + 8)) & 0xff76* u = (uyvy ) & 0xff77* v = (uyvy >> 16 ) & 0xff78*79* Big endian:80* y = (uyvy >> (-16*i + 16)) & 0xff81* u = (uyvy >> 24) & 0xff82* v = (uyvy >> 8) & 0xff83*/8485#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)86/*87* Avoid shift with per-element count.88* No support on x86, gets translated to roughly 5 instructions89* per element. Didn't measure performance but cuts shader size90* by quite a bit (less difference if cpu has no sse4.1 support).91*/92if (util_get_cpu_caps()->has_sse2 && n > 1) {93LLVMValueRef sel, tmp, tmp2;94struct lp_build_context bld32;9596lp_build_context_init(&bld32, gallivm, type);9798tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");99tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");100sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));101*y = lp_build_select(&bld32, sel, tmp, tmp2);102} else103#endif104{105LLVMValueRef shift;106#if UTIL_ARCH_LITTLE_ENDIAN107shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");108shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");109#else110shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");111shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 16), "");112#endif113*y = LLVMBuildLShr(builder, packed, shift, "");114}115116#if UTIL_ARCH_LITTLE_ENDIAN117*u = packed;118*v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");119#else120*u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");121*v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");122#endif123124mask = lp_build_const_int_vec(gallivm, type, 0xff);125126*y = LLVMBuildAnd(builder, *y, mask, "y");127*u = LLVMBuildAnd(builder, *u, mask, "u");128*v = LLVMBuildAnd(builder, *v, mask, "v");129}130131132/**133* Extract Y, U, V channels from packed YUYV.134* @param packed is a <n x i32> vector with the packed YUYV blocks135* @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)136*/137static void138yuyv_to_yuv_soa(struct gallivm_state *gallivm,139unsigned n,140LLVMValueRef packed,141LLVMValueRef i,142LLVMValueRef *y,143LLVMValueRef *u,144LLVMValueRef *v)145{146LLVMBuilderRef builder = gallivm->builder;147struct lp_type type;148LLVMValueRef mask;149150memset(&type, 0, sizeof type);151type.width = 32;152type.length = n;153154assert(lp_check_value(type, packed));155assert(lp_check_value(type, i));156157/*158* Little endian:159* y = (yuyv >> 16*i) & 0xff160* u = (yuyv >> 8 ) & 0xff161* v = (yuyv >> 24 ) & 0xff162*163* Big endian:164* y = (yuyv >> (-16*i + 24) & 0xff165* u = (yuyv >> 16) & 0xff166* v = (yuyv) & 0xff167*/168169#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)170/*171* Avoid shift with per-element count.172* No support on x86, gets translated to roughly 5 instructions173* per element. Didn't measure performance but cuts shader size174* by quite a bit (less difference if cpu has no sse4.1 support).175*/176if (util_get_cpu_caps()->has_sse2 && n > 1) {177LLVMValueRef sel, tmp;178struct lp_build_context bld32;179180lp_build_context_init(&bld32, gallivm, type);181182tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");183sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));184*y = lp_build_select(&bld32, sel, packed, tmp);185} else186#endif187{188LLVMValueRef shift;189#if UTIL_ARCH_LITTLE_ENDIAN190shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");191#else192shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");193shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 24), "");194#endif195*y = LLVMBuildLShr(builder, packed, shift, "");196}197198#if UTIL_ARCH_LITTLE_ENDIAN199*u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");200*v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");201#else202*u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");203*v = packed;204#endif205206mask = lp_build_const_int_vec(gallivm, type, 0xff);207208*y = LLVMBuildAnd(builder, *y, mask, "y");209*u = LLVMBuildAnd(builder, *u, mask, "u");210*v = LLVMBuildAnd(builder, *v, mask, "v");211}212213214static inline void215yuv_to_rgb_soa(struct gallivm_state *gallivm,216unsigned n,217LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,218LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)219{220LLVMBuilderRef builder = gallivm->builder;221struct lp_type type;222struct lp_build_context bld;223224LLVMValueRef c0;225LLVMValueRef c8;226LLVMValueRef c16;227LLVMValueRef c128;228LLVMValueRef c255;229230LLVMValueRef cy;231LLVMValueRef cug;232LLVMValueRef cub;233LLVMValueRef cvr;234LLVMValueRef cvg;235236memset(&type, 0, sizeof type);237type.sign = TRUE;238type.width = 32;239type.length = n;240241lp_build_context_init(&bld, gallivm, type);242243assert(lp_check_value(type, y));244assert(lp_check_value(type, u));245assert(lp_check_value(type, v));246247/*248* Constants249*/250251c0 = lp_build_const_int_vec(gallivm, type, 0);252c8 = lp_build_const_int_vec(gallivm, type, 8);253c16 = lp_build_const_int_vec(gallivm, type, 16);254c128 = lp_build_const_int_vec(gallivm, type, 128);255c255 = lp_build_const_int_vec(gallivm, type, 255);256257cy = lp_build_const_int_vec(gallivm, type, 298);258cug = lp_build_const_int_vec(gallivm, type, -100);259cub = lp_build_const_int_vec(gallivm, type, 516);260cvr = lp_build_const_int_vec(gallivm, type, 409);261cvg = lp_build_const_int_vec(gallivm, type, -208);262263/*264* y -= 16;265* u -= 128;266* v -= 128;267*/268269y = LLVMBuildSub(builder, y, c16, "");270u = LLVMBuildSub(builder, u, c128, "");271v = LLVMBuildSub(builder, v, c128, "");272273/*274* r = 298 * _y + 409 * _v + 128;275* g = 298 * _y - 100 * _u - 208 * _v + 128;276* b = 298 * _y + 516 * _u + 128;277*/278279y = LLVMBuildMul(builder, y, cy, "");280y = LLVMBuildAdd(builder, y, c128, "");281282*r = LLVMBuildMul(builder, v, cvr, "");283*g = LLVMBuildAdd(builder,284LLVMBuildMul(builder, u, cug, ""),285LLVMBuildMul(builder, v, cvg, ""),286"");287*b = LLVMBuildMul(builder, u, cub, "");288289*r = LLVMBuildAdd(builder, *r, y, "");290*g = LLVMBuildAdd(builder, *g, y, "");291*b = LLVMBuildAdd(builder, *b, y, "");292293/*294* r >>= 8;295* g >>= 8;296* b >>= 8;297*/298299*r = LLVMBuildAShr(builder, *r, c8, "r");300*g = LLVMBuildAShr(builder, *g, c8, "g");301*b = LLVMBuildAShr(builder, *b, c8, "b");302303/*304* Clamp305*/306307*r = lp_build_clamp(&bld, *r, c0, c255);308*g = lp_build_clamp(&bld, *g, c0, c255);309*b = lp_build_clamp(&bld, *b, c0, c255);310}311312313static LLVMValueRef314rgb_to_rgba_aos(struct gallivm_state *gallivm,315unsigned n,316LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)317{318LLVMBuilderRef builder = gallivm->builder;319struct lp_type type;320LLVMValueRef a;321LLVMValueRef rgba;322323memset(&type, 0, sizeof type);324type.sign = TRUE;325type.width = 32;326type.length = n;327328assert(lp_check_value(type, r));329assert(lp_check_value(type, g));330assert(lp_check_value(type, b));331332/*333* Make a 4 x unorm8 vector334*/335336#if UTIL_ARCH_LITTLE_ENDIAN337r = r;338g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");339b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");340a = lp_build_const_int_vec(gallivm, type, 0xff000000);341#else342r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), "");343g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), "");344b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), "");345a = lp_build_const_int_vec(gallivm, type, 0x000000ff);346#endif347348rgba = r;349rgba = LLVMBuildOr(builder, rgba, g, "");350rgba = LLVMBuildOr(builder, rgba, b, "");351rgba = LLVMBuildOr(builder, rgba, a, "");352353rgba = LLVMBuildBitCast(builder, rgba,354LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");355356return rgba;357}358359360/**361* Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS362*/363static LLVMValueRef364uyvy_to_rgba_aos(struct gallivm_state *gallivm,365unsigned n,366LLVMValueRef packed,367LLVMValueRef i)368{369LLVMValueRef y, u, v;370LLVMValueRef r, g, b;371LLVMValueRef rgba;372373uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);374yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);375rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);376377return rgba;378}379380381/**382* Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS383*/384static LLVMValueRef385yuyv_to_rgba_aos(struct gallivm_state *gallivm,386unsigned n,387LLVMValueRef packed,388LLVMValueRef i)389{390LLVMValueRef y, u, v;391LLVMValueRef r, g, b;392LLVMValueRef rgba;393394yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);395yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);396rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);397398return rgba;399}400401402/**403* Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS404*/405static LLVMValueRef406rgbg_to_rgba_aos(struct gallivm_state *gallivm,407unsigned n,408LLVMValueRef packed,409LLVMValueRef i)410{411LLVMValueRef r, g, b;412LLVMValueRef rgba;413414uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);415rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);416417return rgba;418}419420421/**422* Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS423*/424static LLVMValueRef425grgb_to_rgba_aos(struct gallivm_state *gallivm,426unsigned n,427LLVMValueRef packed,428LLVMValueRef i)429{430LLVMValueRef r, g, b;431LLVMValueRef rgba;432433yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);434rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);435436return rgba;437}438439/**440* Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS441*/442static LLVMValueRef443grbr_to_rgba_aos(struct gallivm_state *gallivm,444unsigned n,445LLVMValueRef packed,446LLVMValueRef i)447{448LLVMValueRef r, g, b;449LLVMValueRef rgba;450451uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);452rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);453454return rgba;455}456457458/**459* Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS460*/461static LLVMValueRef462rgrb_to_rgba_aos(struct gallivm_state *gallivm,463unsigned n,464LLVMValueRef packed,465LLVMValueRef i)466{467LLVMValueRef r, g, b;468LLVMValueRef rgba;469470yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);471rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);472473return rgba;474}475476/**477* @param n is the number of pixels processed478* @param packed is a <n x i32> vector with the packed YUYV blocks479* @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)480* @return a <4*n x i8> vector with the pixel RGBA values in AoS481*/482LLVMValueRef483lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,484const struct util_format_description *format_desc,485unsigned n,486LLVMValueRef base_ptr,487LLVMValueRef offset,488LLVMValueRef i,489LLVMValueRef j)490{491LLVMValueRef packed;492LLVMValueRef rgba;493struct lp_type fetch_type;494495assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);496assert(format_desc->block.bits == 32);497assert(format_desc->block.width == 2);498assert(format_desc->block.height == 1);499500fetch_type = lp_type_uint(32);501packed = lp_build_gather(gallivm, n, 32, fetch_type, TRUE, base_ptr, offset, FALSE);502503(void)j;504505switch (format_desc->format) {506case PIPE_FORMAT_UYVY:507rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);508break;509case PIPE_FORMAT_YUYV:510rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);511break;512case PIPE_FORMAT_R8G8_B8G8_UNORM:513rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);514break;515case PIPE_FORMAT_G8R8_G8B8_UNORM:516rgba = grgb_to_rgba_aos(gallivm, n, packed, i);517break;518case PIPE_FORMAT_G8R8_B8R8_UNORM:519rgba = grbr_to_rgba_aos(gallivm, n, packed, i);520break;521case PIPE_FORMAT_R8G8_R8B8_UNORM:522rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);523break;524default:525assert(0);526rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));527break;528}529530return rgba;531}532533534535