Path: blob/master/thirdparty/amd-fsr2/shaders/ffx_core_glsl.h
9917 views
// This file is part of the FidelityFX SDK.1//2// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.3//4// Permission is hereby granted, free of charge, to any person obtaining a copy5// of this software and associated documentation files (the "Software"), to deal6// in the Software without restriction, including without limitation the rights7// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell8// copies of the Software, and to permit persons to whom the Software is9// furnished to do so, subject to the following conditions:10// The above copyright notice and this permission notice shall be included in11// all copies or substantial portions of the Software.12//13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN19// THE SOFTWARE.2021/// A define for abstracting shared memory between shading languages.22///23/// @ingroup GPU24#define FFX_GROUPSHARED shared2526/// A define for abstracting compute memory barriers between shading languages.27///28/// @ingroup GPU29#define FFX_GROUP_MEMORY_BARRIER() barrier()3031/// A define added to accept static markup on functions to aid CPU/GPU portability of code.32///33/// @ingroup GPU34#define FFX_STATIC3536/// A define for abstracting loop unrolling between shading languages.37///38/// @ingroup GPU39#define FFX_UNROLL4041/// A define for abstracting a 'greater than' comparison operator between two types.42///43/// @ingroup GPU44#define FFX_GREATER_THAN(x, y) greaterThan(x, y)4546/// A define for abstracting a 'greater than or equal' comparison operator between two types.47///48/// @ingroup GPU49#define FFX_GREATER_THAN_EQUAL(x, y) greaterThanEqual(x, y)5051/// A define for abstracting a 'less than' comparison operator between two types.52///53/// @ingroup GPU54#define FFX_LESS_THAN(x, y) lessThan(x, y)5556/// A define for abstracting a 'less than or equal' comparison operator between two types.57///58/// @ingroup GPU59#define FFX_LESS_THAN_EQUAL(x, y) lessThanEqual(x, y)6061/// A define for abstracting an 'equal' comparison operator between two types.62///63/// @ingroup GPU64#define FFX_EQUAL(x, y) equal(x, y)6566/// A define for abstracting a 'not equal' comparison operator between two types.67///68/// @ingroup GPU69#define FFX_NOT_EQUAL(x, y) notEqual(x, y)7071/// Broadcast a scalar value to a 1-dimensional floating point vector.72///73/// @ingroup GPU74#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x)7576/// Broadcast a scalar value to a 2-dimensional floating point vector.77///78/// @ingroup GPU79#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32x2(FfxFloat32(x))8081/// Broadcast a scalar value to a 3-dimensional floating point vector.82///83/// @ingroup GPU84#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32x3(FfxFloat32(x))8586/// Broadcast a scalar value to a 4-dimensional floating point vector.87///88/// @ingroup GPU89#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32x4(FfxFloat32(x))9091/// Broadcast a scalar value to a 1-dimensional unsigned integer vector.92///93/// @ingroup GPU94#define FFX_BROADCAST_UINT32(x) FfxUInt32(x)9596/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.97///98/// @ingroup GPU99#define FFX_BROADCAST_UINT32X2(x) FfxUInt32x2(FfxUInt32(x))100101/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.102///103/// @ingroup GPU104#define FFX_BROADCAST_UINT32X3(x) FfxUInt32x3(FfxUInt32(x))105106/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.107///108/// @ingroup GPU109#define FFX_BROADCAST_UINT32X4(x) FfxUInt32x4(FfxUInt32(x))110111/// Broadcast a scalar value to a 1-dimensional signed integer vector.112///113/// @ingroup GPU114#define FFX_BROADCAST_INT32(x) FfxInt32(x)115116/// Broadcast a scalar value to a 2-dimensional signed integer vector.117///118/// @ingroup GPU119#define FFX_BROADCAST_INT32X2(x) FfxInt32x2(FfxInt32(x))120121/// Broadcast a scalar value to a 3-dimensional signed integer vector.122///123/// @ingroup GPU124#define FFX_BROADCAST_INT32X3(x) FfxInt32x3(FfxInt32(x))125126/// Broadcast a scalar value to a 4-dimensional signed integer vector.127///128/// @ingroup GPU129#define FFX_BROADCAST_INT32X4(x) FfxInt32x4(FfxInt32(x))130131/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector.132///133/// @ingroup GPU134#define FFX_BROADCAST_MIN_FLOAT16(x) FFX_MIN16_F(x)135136/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector.137///138/// @ingroup GPU139#define FFX_BROADCAST_MIN_FLOAT16X2(x) FFX_MIN16_F2(FFX_MIN16_F(x))140141/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector.142///143/// @ingroup GPU144#define FFX_BROADCAST_MIN_FLOAT16X3(x) FFX_MIN16_F3(FFX_MIN16_F(x))145146/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector.147///148/// @ingroup GPU149#define FFX_BROADCAST_MIN_FLOAT16X4(x) FFX_MIN16_F4(FFX_MIN16_F(x))150151/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector.152///153/// @ingroup GPU154#define FFX_BROADCAST_MIN_UINT16(x) FFX_MIN16_U(x)155156/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector.157///158/// @ingroup GPU159#define FFX_BROADCAST_MIN_UINT16X2(x) FFX_MIN16_U2(FFX_MIN16_U(x))160161/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector.162///163/// @ingroup GPU164#define FFX_BROADCAST_MIN_UINT16X3(x) FFX_MIN16_U3(FFX_MIN16_U(x))165166/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector.167///168/// @ingroup GPU169#define FFX_BROADCAST_MIN_UINT16X4(x) FFX_MIN16_U4(FFX_MIN16_U(x))170171/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector.172///173/// @ingroup GPU174#define FFX_BROADCAST_MIN_INT16(x) FFX_MIN16_I(x)175176/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector.177///178/// @ingroup GPU179#define FFX_BROADCAST_MIN_INT16X2(x) FFX_MIN16_I2(FFX_MIN16_I(x))180181/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector.182///183/// @ingroup GPU184#define FFX_BROADCAST_MIN_INT16X3(x) FFX_MIN16_I3(FFX_MIN16_I(x))185186/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector.187///188/// @ingroup GPU189#define FFX_BROADCAST_MIN_INT16X4(x) FFX_MIN16_I4(FFX_MIN16_I(x))190191#if !defined(FFX_SKIP_EXT)192#if FFX_HALF193#extension GL_EXT_shader_16bit_storage : require194#extension GL_EXT_shader_explicit_arithmetic_types : require195#endif // FFX_HALF196197#if defined(FFX_LONG)198#extension GL_ARB_gpu_shader_int64 : require199#extension GL_NV_shader_atomic_int64 : require200#endif // #if defined(FFX_LONG)201202#if defined(FFX_WAVE)203#extension GL_KHR_shader_subgroup_arithmetic : require204#extension GL_KHR_shader_subgroup_ballot : require205#extension GL_KHR_shader_subgroup_quad : require206#extension GL_KHR_shader_subgroup_shuffle : require207#endif // #if defined(FFX_WAVE)208#endif // #if !defined(FFX_SKIP_EXT)209210// Forward declarations211FfxFloat32 ffxSqrt(FfxFloat32 x);212FfxFloat32x2 ffxSqrt(FfxFloat32x2 x);213FfxFloat32x3 ffxSqrt(FfxFloat32x3 x);214FfxFloat32x4 ffxSqrt(FfxFloat32x4 x);215216/// Interprets the bit pattern of x as a floating-point number.217///218/// @param [in] value The input value.219///220/// @returns221/// The input interpreted as a floating-point number.222///223/// @ingroup GLSL224FfxFloat32 ffxAsFloat(FfxUInt32 x)225{226return uintBitsToFloat(x);227}228229/// Interprets the bit pattern of x as a floating-point number.230///231/// @param [in] value The input value.232///233/// @returns234/// The input interpreted as a floating-point number.235///236/// @ingroup GLSL237FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)238{239return uintBitsToFloat(x);240}241242/// Interprets the bit pattern of x as a floating-point number.243///244/// @param [in] value The input value.245///246/// @returns247/// The input interpreted as a floating-point number.248///249/// @ingroup GLSL250FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)251{252return uintBitsToFloat(x);253}254255/// Interprets the bit pattern of x as a floating-point number.256///257/// @param [in] value The input value.258///259/// @returns260/// The input interpreted as a floating-point number.261///262/// @ingroup GLSL263FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)264{265return uintBitsToFloat(x);266}267268/// Interprets the bit pattern of x as an unsigned integer.269///270/// @param [in] value The input value.271///272/// @returns273/// The input interpreted as an unsigned integer.274///275/// @ingroup GLSL276FfxUInt32 ffxAsUInt32(FfxFloat32 x)277{278return floatBitsToUint(x);279}280281/// Interprets the bit pattern of x as an unsigned integer.282///283/// @param [in] value The input value.284///285/// @returns286/// The input interpreted as an unsigned integer.287///288/// @ingroup GLSL289FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)290{291return floatBitsToUint(x);292}293294/// Interprets the bit pattern of x as an unsigned integer.295///296/// @param [in] value The input value.297///298/// @returns299/// The input interpreted as an unsigned integer.300///301/// @ingroup GLSL302FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)303{304return floatBitsToUint(x);305}306307/// Interprets the bit pattern of x as an unsigned integer.308///309/// @param [in] value The input value.310///311/// @returns312/// The input interpreted as an unsigned integer.313///314/// @ingroup GLSL315FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)316{317return floatBitsToUint(x);318}319320/// Convert a 32bit IEEE 754 floating point value to its nearest 16bit equivalent.321///322/// @param [in] value The value to convert.323///324/// @returns325/// The nearest 16bit equivalent of <c><i>value</i></c>.326///327/// @ingroup GLSL328FfxUInt32 f32tof16(FfxFloat32 value)329{330return packHalf2x16(FfxFloat32x2(value, 0.0));331}332333/// Broadcast a scalar value to a 2-dimensional floating point vector.334///335/// @param [in] value The value to to broadcast.336///337/// @returns338/// A 2-dimensional floating point vector with <c><i>value</i></c> in each component.339///340/// @ingroup GLSL341FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)342{343return FfxFloat32x2(value, value);344}345346/// Broadcast a scalar value to a 3-dimensional floating point vector.347///348/// @param [in] value The value to to broadcast.349///350/// @returns351/// A 3-dimensional floating point vector with <c><i>value</i></c> in each component.352///353/// @ingroup GLSL354FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)355{356return FfxFloat32x3(value, value, value);357}358359/// Broadcast a scalar value to a 4-dimensional floating point vector.360///361/// @param [in] value The value to to broadcast.362///363/// @returns364/// A 4-dimensional floating point vector with <c><i>value</i></c> in each component.365///366/// @ingroup GLSL367FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)368{369return FfxFloat32x4(value, value, value, value);370}371372/// Broadcast a scalar value to a 2-dimensional signed integer vector.373///374/// @param [in] value The value to to broadcast.375///376/// @returns377/// A 2-dimensional signed integer vector with <c><i>value</i></c> in each component.378///379/// @ingroup GLSL380FfxInt32x2 ffxBroadcast2(FfxInt32 value)381{382return FfxInt32x2(value, value);383}384385/// Broadcast a scalar value to a 3-dimensional signed integer vector.386///387/// @param [in] value The value to to broadcast.388///389/// @returns390/// A 3-dimensional signed integer vector with <c><i>value</i></c> in each component.391///392/// @ingroup GLSL393FfxInt32x3 ffxBroadcast3(FfxInt32 value)394{395return FfxInt32x3(value, value, value);396}397398/// Broadcast a scalar value to a 4-dimensional signed integer vector.399///400/// @param [in] value The value to to broadcast.401///402/// @returns403/// A 4-dimensional signed integer vector with <c><i>value</i></c> in each component.404///405/// @ingroup GLSL406FfxInt32x4 ffxBroadcast4(FfxInt32 value)407{408return FfxInt32x4(value, value, value, value);409}410411/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.412///413/// @param [in] value The value to to broadcast.414///415/// @returns416/// A 2-dimensional unsigned integer vector with <c><i>value</i></c> in each component.417///418/// @ingroup GLSL419FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)420{421return FfxUInt32x2(value, value);422}423424/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.425///426/// @param [in] value The value to to broadcast.427///428/// @returns429/// A 3-dimensional unsigned integer vector with <c><i>value</i></c> in each component.430///431/// @ingroup GLSL432FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)433{434return FfxUInt32x3(value, value, value);435}436437/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.438///439/// @param [in] value The value to to broadcast.440///441/// @returns442/// A 4-dimensional unsigned integer vector with <c><i>value</i></c> in each component.443///444/// @ingroup GLSL445FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)446{447return FfxUInt32x4(value, value, value, value);448}449450///451///452/// @ingroup GLSL453FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)454{455return bitfieldExtract(src, FfxInt32(off), FfxInt32(bits));456}457458///459///460/// @ingroup GLSL461FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)462{463return (ins & mask) | (src & (~mask));464}465466// Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<<bits)-1', and 'bits' needs to be an immediate.467///468///469/// @ingroup GLSL470FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)471{472return bitfieldInsert(src, ins, 0, FfxInt32(bits));473}474475/// Compute the linear interopation between two values.476///477/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the478/// following math:479///480/// (1 - t) * x + t * y481///482/// @param [in] x The first value to lerp between.483/// @param [in] y The second value to lerp between.484/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.485///486/// @returns487/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.488///489/// @ingroup GLSL490FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)491{492return mix(x, y, t);493}494495/// Compute the linear interopation between two values.496///497/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the498/// following math:499///500/// (1 - t) * x + t * y501///502/// @param [in] x The first value to lerp between.503/// @param [in] y The second value to lerp between.504/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.505///506/// @returns507/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.508///509/// @ingroup GLSL510FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)511{512return mix(x, y, t);513}514515/// Compute the linear interopation between two values.516///517/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the518/// following math:519///520/// (1 - t) * x + t * y521///522/// @param [in] x The first value to lerp between.523/// @param [in] y The second value to lerp between.524/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.525///526/// @returns527/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.528///529/// @ingroup GLSL530FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)531{532return mix(x, y, t);533}534535/// Compute the linear interopation between two values.536///537/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the538/// following math:539///540/// (1 - t) * x + t * y541///542/// @param [in] x The first value to lerp between.543/// @param [in] y The second value to lerp between.544/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.545///546/// @returns547/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.548///549/// @ingroup GLSL550FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)551{552return mix(x, y, t);553}554555/// Compute the linear interopation between two values.556///557/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the558/// following math:559///560/// (1 - t) * x + t * y561///562/// @param [in] x The first value to lerp between.563/// @param [in] y The second value to lerp between.564/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.565///566/// @returns567/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.568///569/// @ingroup GLSL570FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)571{572return mix(x, y, t);573}574575/// Compute the linear interopation between two values.576///577/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the578/// following math:579///580/// (1 - t) * x + t * y581///582/// @param [in] x The first value to lerp between.583/// @param [in] y The second value to lerp between.584/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.585///586/// @returns587/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.588///589/// @ingroup GLSL590FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)591{592return mix(x, y, t);593}594595/// Compute the linear interopation between two values.596///597/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the598/// following math:599///600/// (1 - t) * x + t * y601///602/// @param [in] x The first value to lerp between.603/// @param [in] y The second value to lerp between.604/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.605///606/// @returns607/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.608///609/// @ingroup GLSL610FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)611{612return mix(x, y, t);613}614615/// Compute the maximum of three values.616///617/// NOTE: This function should compile down to a single V_MAX3_F32 operation on618/// GCN or RDNA hardware.619///620/// @param [in] x The first value to include in the max calculation.621/// @param [in] y The second value to include in the max calcuation.622/// @param [in] z The third value to include in the max calcuation.623///624/// @returns625/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.626///627/// @ingroup GLSL628FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)629{630return max(x, max(y, z));631}632633/// Compute the maximum of three values.634///635/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on636/// GCN or RDNA hardware.637///638/// @param [in] x The first value to include in the max calculation.639/// @param [in] y The second value to include in the max calcuation.640/// @param [in] z The third value to include in the max calcuation.641///642/// @returns643/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.644///645/// @ingroup GLSL646FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)647{648return max(x, max(y, z));649}650651/// Compute the maximum of three values.652///653/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on654/// GCN or RDNA hardware.655///656/// @param [in] x The first value to include in the max calculation.657/// @param [in] y The second value to include in the max calcuation.658/// @param [in] z The third value to include in the max calcuation.659///660/// @returns661/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.662///663/// @ingroup GLSL664FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)665{666return max(x, max(y, z));667}668669/// Compute the maximum of three values.670///671/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on672/// GCN or RDNA hardware.673///674/// @param [in] x The first value to include in the max calculation.675/// @param [in] y The second value to include in the max calcuation.676/// @param [in] z The third value to include in the max calcuation.677///678/// @returns679/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.680///681/// @ingroup GLSL682FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)683{684return max(x, max(y, z));685}686687/// Compute the maximum of three values.688///689/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on690/// GCN or RDNA hardware.691///692/// @param [in] x The first value to include in the max calculation.693/// @param [in] y The second value to include in the max calcuation.694/// @param [in] z The third value to include in the max calcuation.695///696/// @returns697/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.698///699/// @ingroup GLSL700FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)701{702return max(x, max(y, z));703}704705/// Compute the maximum of three values.706///707/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on708/// GCN or RDNA hardware.709///710/// @param [in] x The first value to include in the max calculation.711/// @param [in] y The second value to include in the max calcuation.712/// @param [in] z The third value to include in the max calcuation.713///714/// @returns715/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.716///717/// @ingroup GLSL718FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)719{720return max(x, max(y, z));721}722723/// Compute the maximum of three values.724///725/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on726/// GCN/RDNA hardware.727///728/// @param [in] x The first value to include in the max calculation.729/// @param [in] y The second value to include in the max calcuation.730/// @param [in] z The third value to include in the max calcuation.731///732/// @returns733/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.734///735/// @ingroup GLSL736FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)737{738return max(x, max(y, z));739}740741/// Compute the maximum of three values.742///743/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on744/// GCN/RDNA hardware.745///746/// @param [in] x The first value to include in the max calculation.747/// @param [in] y The second value to include in the max calcuation.748/// @param [in] z The third value to include in the max calcuation.749///750/// @returns751/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.752///753/// @ingroup GLSL754FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)755{756return max(x, max(y, z));757}758759/// Compute the median of three values.760///761/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on762/// GCN/RDNA hardware.763///764/// @param [in] x The first value to include in the median calculation.765/// @param [in] y The second value to include in the median calcuation.766/// @param [in] z The third value to include in the median calcuation.767///768/// @returns769/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.770///771/// @ingroup GLSL772FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)773{774return max(min(x, y), min(max(x, y), z));775}776777/// Compute the median of three values.778///779/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on780/// GCN/RDNA hardware.781///782/// @param [in] x The first value to include in the median calculation.783/// @param [in] y The second value to include in the median calcuation.784/// @param [in] z The third value to include in the median calcuation.785///786/// @returns787/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.788///789/// @ingroup GLSL790FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)791{792return max(min(x, y), min(max(x, y), z));793}794795/// Compute the median of three values.796///797/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on798/// GCN/RDNA hardware.799///800/// @param [in] x The first value to include in the median calculation.801/// @param [in] y The second value to include in the median calcuation.802/// @param [in] z The third value to include in the median calcuation.803///804/// @returns805/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.806///807/// @ingroup GLSL808FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)809{810return max(min(x, y), min(max(x, y), z));811}812813/// Compute the median of three values.814///815/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on816/// GCN/RDNA hardware.817///818/// @param [in] x The first value to include in the median calculation.819/// @param [in] y The second value to include in the median calcuation.820/// @param [in] z The third value to include in the median calcuation.821///822/// @returns823/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.824///825/// @ingroup GLSL826FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)827{828return max(min(x, y), min(max(x, y), z));829}830831/// Compute the median of three values.832///833/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on834/// GCN/RDNA hardware.835///836/// @param [in] x The first value to include in the median calculation.837/// @param [in] y The second value to include in the median calcuation.838/// @param [in] z The third value to include in the median calcuation.839///840/// @returns841/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.842///843/// @ingroup GLSL844FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z)845{846return max(min(x, y), min(max(x, y), z));847}848849/// Compute the median of three values.850///851/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on852/// GCN/RDNA hardware.853///854/// @param [in] x The first value to include in the median calculation.855/// @param [in] y The second value to include in the median calcuation.856/// @param [in] z The third value to include in the median calcuation.857///858/// @returns859/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.860///861/// @ingroup GLSL862FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z)863{864return max(min(x, y), min(max(x, y), z));865}866867/// Compute the median of three values.868///869/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on870/// GCN/RDNA hardware.871///872/// @param [in] x The first value to include in the median calculation.873/// @param [in] y The second value to include in the median calcuation.874/// @param [in] z The third value to include in the median calcuation.875///876/// @returns877/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.878///879/// @ingroup GLSL880FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z)881{882return max(min(x, y), min(max(x, y), z));883}884885/// Compute the median of three values.886///887/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on888/// GCN/RDNA hardware.889///890/// @param [in] x The first value to include in the median calculation.891/// @param [in] y The second value to include in the median calcuation.892/// @param [in] z The third value to include in the median calcuation.893///894/// @returns895/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.896///897/// @ingroup GLSL898FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)899{900return max(min(x, y), min(max(x, y), z));901}902903904/// Compute the minimum of three values.905///906/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</i></c> operation on907/// GCN and RDNA hardware.908///909/// @param [in] x The first value to include in the min calculation.910/// @param [in] y The second value to include in the min calcuation.911/// @param [in] z The third value to include in the min calcuation.912///913/// @returns914/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.915///916/// @ingroup GLSL917FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)918{919return min(x, min(y, z));920}921922/// Compute the minimum of three values.923///924/// NOTE: This function should compile down to a single V_MIN3_F32 operation on925/// GCN/RDNA hardware.926///927/// @param [in] x The first value to include in the min calculation.928/// @param [in] y The second value to include in the min calcuation.929/// @param [in] z The third value to include in the min calcuation.930///931/// @returns932/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.933///934/// @ingroup GLSL935FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)936{937return min(x, min(y, z));938}939940/// Compute the minimum of three values.941///942/// NOTE: This function should compile down to a single V_MIN3_F32 operation on943/// GCN/RDNA hardware.944///945/// @param [in] x The first value to include in the min calculation.946/// @param [in] y The second value to include in the min calcuation.947/// @param [in] z The third value to include in the min calcuation.948///949/// @returns950/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.951///952/// @ingroup GLSL953FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)954{955return min(x, min(y, z));956}957958/// Compute the minimum of three values.959///960/// NOTE: This function should compile down to a single V_MIN3_F32 operation on961/// GCN/RDNA hardware.962///963/// @param [in] x The first value to include in the min calculation.964/// @param [in] y The second value to include in the min calcuation.965/// @param [in] z The third value to include in the min calcuation.966///967/// @returns968/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.969///970/// @ingroup GLSL971FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)972{973return min(x, min(y, z));974}975976/// Compute the minimum of three values.977///978/// NOTE: This function should compile down to a single V_MIN3_F32 operation on979/// GCN/RDNA hardware.980///981/// @param [in] x The first value to include in the min calculation.982/// @param [in] y The second value to include in the min calcuation.983/// @param [in] z The third value to include in the min calcuation.984///985/// @returns986/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.987///988/// @ingroup GLSL989FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)990{991return min(x, min(y, z));992}993994/// Compute the minimum of three values.995///996/// NOTE: This function should compile down to a single V_MIN3_F32 operation on997/// GCN/RDNA hardware.998///999/// @param [in] x The first value to include in the min calculation.1000/// @param [in] y The second value to include in the min calcuation.1001/// @param [in] z The third value to include in the min calcuation.1002///1003/// @returns1004/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.1005///1006/// @ingroup GLSL1007FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)1008{1009return min(x, min(y, z));1010}10111012/// Compute the minimum of three values.1013///1014/// NOTE: This function should compile down to a single V_MIN3_F32 operation on1015/// GCN/RDNA hardware.1016///1017/// @param [in] x The first value to include in the min calculation.1018/// @param [in] y The second value to include in the min calcuation.1019/// @param [in] z The third value to include in the min calcuation.1020///1021/// @returns1022/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.1023///1024/// @ingroup GLSL1025FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)1026{1027return min(x, min(y, z));1028}10291030/// Compute the minimum of three values.1031///1032/// NOTE: This function should compile down to a single V_MIN3_F32 operation on1033/// GCN/RDNA hardware.1034///1035/// @param [in] x The first value to include in the min calculation.1036/// @param [in] y The second value to include in the min calcuation.1037/// @param [in] z The third value to include in the min calcuation.1038///1039/// @returns1040/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.1041///1042/// @ingroup GLSL1043FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)1044{1045return min(x, min(y, z));1046}10471048/// Compute the reciprocal of a value.1049///1050/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.1051///1052/// @param [in] x The value to compute the reciprocal for.1053///1054/// @returns1055/// The reciprocal value of <c><i>x</i></c>.1056///1057/// @ingroup GLSL1058FfxFloat32 rcp(FfxFloat32 x)1059{1060return FfxFloat32(1.0) / x;1061}10621063/// Compute the reciprocal of a value.1064///1065/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.1066///1067/// @param [in] x The value to compute the reciprocal for.1068///1069/// @returns1070/// The reciprocal value of <c><i>x</i></c>.1071///1072/// @ingroup GLSL1073FfxFloat32x2 rcp(FfxFloat32x2 x)1074{1075return ffxBroadcast2(1.0) / x;1076}10771078/// Compute the reciprocal of a value.1079///1080/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.1081///1082/// @param [in] x The value to compute the reciprocal for.1083///1084/// @returns1085/// The reciprocal value of <c><i>x</i></c>.1086///1087/// @ingroup GLSL1088FfxFloat32x3 rcp(FfxFloat32x3 x)1089{1090return ffxBroadcast3(1.0) / x;1091}10921093/// Compute the reciprocal of a value.1094///1095/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.1096///1097/// @param [in] x The value to compute the reciprocal for.1098///1099/// @returns1100/// The reciprocal value of <c><i>x</i></c>.1101///1102/// @ingroup GLSL1103FfxFloat32x4 rcp(FfxFloat32x4 x)1104{1105return ffxBroadcast4(1.0) / x;1106}11071108/// Compute the reciprocal square root of a value.1109///1110/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.1111///1112/// @param [in] x The value to compute the reciprocal for.1113///1114/// @returns1115/// The reciprocal square root value of <c><i>x</i></c>.1116///1117/// @ingroup GLSL1118FfxFloat32 rsqrt(FfxFloat32 x)1119{1120return FfxFloat32(1.0) / ffxSqrt(x);1121}11221123/// Compute the reciprocal square root of a value.1124///1125/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.1126///1127/// @param [in] x The value to compute the reciprocal for.1128///1129/// @returns1130/// The reciprocal square root value of <c><i>x</i></c>.1131///1132/// @ingroup GLSL1133FfxFloat32x2 rsqrt(FfxFloat32x2 x)1134{1135return ffxBroadcast2(1.0) / ffxSqrt(x);1136}11371138/// Compute the reciprocal square root of a value.1139///1140/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.1141///1142/// @param [in] x The value to compute the reciprocal for.1143///1144/// @returns1145/// The reciprocal square root value of <c><i>x</i></c>.1146///1147/// @ingroup GLSL1148FfxFloat32x3 rsqrt(FfxFloat32x3 x)1149{1150return ffxBroadcast3(1.0) / ffxSqrt(x);1151}11521153/// Compute the reciprocal square root of a value.1154///1155/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.1156///1157/// @param [in] x The value to compute the reciprocal for.1158///1159/// @returns1160/// The reciprocal square root value of <c><i>x</i></c>.1161///1162/// @ingroup GLSL1163FfxFloat32x4 rsqrt(FfxFloat32x4 x)1164{1165return ffxBroadcast4(1.0) / ffxSqrt(x);1166}11671168/// Clamp a value to a [0..1] range.1169///1170/// @param [in] x The value to clamp to [0..1] range.1171///1172/// @returns1173/// The clamped version of <c><i>x</i></c>.1174///1175/// @ingroup GLSL1176FfxFloat32 ffxSaturate(FfxFloat32 x)1177{1178return clamp(x, FfxFloat32(0.0), FfxFloat32(1.0));1179}11801181/// Clamp a value to a [0..1] range.1182///1183/// @param [in] x The value to clamp to [0..1] range.1184///1185/// @returns1186/// The clamped version of <c><i>x</i></c>.1187///1188/// @ingroup GLSL1189FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)1190{1191return clamp(x, ffxBroadcast2(0.0), ffxBroadcast2(1.0));1192}11931194/// Clamp a value to a [0..1] range.1195///1196/// @param [in] x The value to clamp to [0..1] range.1197///1198/// @returns1199/// The clamped version of <c><i>x</i></c>.1200///1201/// @ingroup GLSL1202FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)1203{1204return clamp(x, ffxBroadcast3(0.0), ffxBroadcast3(1.0));1205}12061207/// Clamp a value to a [0..1] range.1208///1209/// @param [in] x The value to clamp to [0..1] range.1210///1211/// @returns1212/// The clamped version of <c><i>x</i></c>.1213///1214/// @ingroup GLSL1215FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)1216{1217return clamp(x, ffxBroadcast4(0.0), ffxBroadcast4(1.0));1218}12191220/// Compute the factional part of a decimal value.1221///1222/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.1223///1224/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is1225/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic1226/// function.1227///1228/// @param [in] x The value to compute the fractional part from.1229///1230/// @returns1231/// The fractional part of <c><i>x</i></c>.1232///1233/// @ingroup HLSL1234FfxFloat32 ffxFract(FfxFloat32 x)1235{1236return fract(x);1237}12381239/// Compute the factional part of a decimal value.1240///1241/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.1242///1243/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is1244/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic1245/// function.1246///1247/// @param [in] x The value to compute the fractional part from.1248///1249/// @returns1250/// The fractional part of <c><i>x</i></c>.1251///1252/// @ingroup HLSL1253FfxFloat32x2 ffxFract(FfxFloat32x2 x)1254{1255return fract(x);1256}12571258/// Compute the factional part of a decimal value.1259///1260/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.1261///1262/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is1263/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic1264/// function.1265///1266/// @param [in] x The value to compute the fractional part from.1267///1268/// @returns1269/// The fractional part of <c><i>x</i></c>.1270///1271/// @ingroup HLSL1272FfxFloat32x3 ffxFract(FfxFloat32x3 x)1273{1274return fract(x);1275}12761277/// Compute the factional part of a decimal value.1278///1279/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.1280///1281/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is1282/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic1283/// function.1284///1285/// @param [in] x The value to compute the fractional part from.1286///1287/// @returns1288/// The fractional part of <c><i>x</i></c>.1289///1290/// @ingroup HLSL1291FfxFloat32x4 ffxFract(FfxFloat32x4 x)1292{1293return fract(x);1294}12951296FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)1297{1298return FfxUInt32(FfxInt32(a) >> FfxInt32(b));1299}13001301#if FFX_HALF13021303#define FFX_UINT32_TO_FLOAT16X2(x) unpackFloat2x16(FfxUInt32(x))13041305FfxFloat16x4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x)1306{1307return FfxFloat16x4(unpackFloat2x16(x.x), unpackFloat2x16(x.y));1308}1309#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x))1310#define FFX_UINT32_TO_UINT16X2(x) unpackUint2x16(FfxUInt32(x))1311#define FFX_UINT32X2_TO_UINT16X4(x) unpackUint4x16(pack64(FfxUInt32x2(x)))1312//------------------------------------------------------------------------------------------------------------------------------1313#define FFX_FLOAT16X2_TO_UINT32(x) packFloat2x16(FfxFloat16x2(x))1314FfxUInt32x2 ffxFloat16x4ToUint32x2(FfxFloat16x4 x)1315{1316return FfxUInt32x2(packFloat2x16(x.xy), packFloat2x16(x.zw));1317}1318#define FFX_FLOAT16X4_TO_UINT32X2(x) ffxFloat16x4ToUint32x2(FfxFloat16x4(x))1319#define FFX_UINT16X2_TO_UINT32(x) packUint2x16(FfxUInt16x2(x))1320#define FFX_UINT16X4_TO_UINT32X2(x) unpack32(packUint4x16(FfxUInt16x4(x)))1321//==============================================================================================================================1322#define FFX_TO_UINT16(x) halfBitsToUint16(FfxFloat16(x))1323#define FFX_TO_UINT16X2(x) halfBitsToUint16(FfxFloat16x2(x))1324#define FFX_TO_UINT16X3(x) halfBitsToUint16(FfxFloat16x3(x))1325#define FFX_TO_UINT16X4(x) halfBitsToUint16(FfxFloat16x4(x))1326//------------------------------------------------------------------------------------------------------------------------------1327#define FFX_TO_FLOAT16(x) uint16BitsToHalf(FfxUInt16(x))1328#define FFX_TO_FLOAT16X2(x) uint16BitsToHalf(FfxUInt16x2(x))1329#define FFX_TO_FLOAT16X3(x) uint16BitsToHalf(FfxUInt16x3(x))1330#define FFX_TO_FLOAT16X4(x) uint16BitsToHalf(FfxUInt16x4(x))1331//==============================================================================================================================1332FfxFloat16 ffxBroadcastFloat16(FfxFloat16 a)1333{1334return FfxFloat16(a);1335}1336FfxFloat16x2 ffxBroadcastFloat16x2(FfxFloat16 a)1337{1338return FfxFloat16x2(a, a);1339}1340FfxFloat16x3 ffxBroadcastFloat16x3(FfxFloat16 a)1341{1342return FfxFloat16x3(a, a, a);1343}1344FfxFloat16x4 ffxBroadcastFloat16x4(FfxFloat16 a)1345{1346return FfxFloat16x4(a, a, a, a);1347}1348#define FFX_BROADCAST_FLOAT16(a) FfxFloat16(a)1349#define FFX_BROADCAST_FLOAT16X2(a) FfxFloat16x2(FfxFloat16(a))1350#define FFX_BROADCAST_FLOAT16X3(a) FfxFloat16x3(FfxFloat16(a))1351#define FFX_BROADCAST_FLOAT16X4(a) FfxFloat16x4(FfxFloat16(a))1352//------------------------------------------------------------------------------------------------------------------------------1353FfxInt16 ffxBroadcastInt16(FfxInt16 a)1354{1355return FfxInt16(a);1356}1357FfxInt16x2 ffxBroadcastInt16x2(FfxInt16 a)1358{1359return FfxInt16x2(a, a);1360}1361FfxInt16x3 ffxBroadcastInt16x3(FfxInt16 a)1362{1363return FfxInt16x3(a, a, a);1364}1365FfxInt16x4 ffxBroadcastInt16x4(FfxInt16 a)1366{1367return FfxInt16x4(a, a, a, a);1368}1369#define FFX_BROADCAST_INT16(a) FfxInt16(a)1370#define FFX_BROADCAST_INT16X2(a) FfxInt16x2(FfxInt16(a))1371#define FFX_BROADCAST_INT16X3(a) FfxInt16x3(FfxInt16(a))1372#define FFX_BROADCAST_INT16X4(a) FfxInt16x4(FfxInt16(a))1373//------------------------------------------------------------------------------------------------------------------------------1374FfxUInt16 ffxBroadcastUInt16(FfxUInt16 a)1375{1376return FfxUInt16(a);1377}1378FfxUInt16x2 ffxBroadcastUInt16x2(FfxUInt16 a)1379{1380return FfxUInt16x2(a, a);1381}1382FfxUInt16x3 ffxBroadcastUInt16x3(FfxUInt16 a)1383{1384return FfxUInt16x3(a, a, a);1385}1386FfxUInt16x4 ffxBroadcastUInt16x4(FfxUInt16 a)1387{1388return FfxUInt16x4(a, a, a, a);1389}1390#define FFX_BROADCAST_UINT16(a) FfxUInt16(a)1391#define FFX_BROADCAST_UINT16X2(a) FfxUInt16x2(FfxUInt16(a))1392#define FFX_BROADCAST_UINT16X3(a) FfxUInt16x3(FfxUInt16(a))1393#define FFX_BROADCAST_UINT16X4(a) FfxUInt16x4(FfxUInt16(a))1394//==============================================================================================================================1395FfxUInt16 ffxAbsHalf(FfxUInt16 a)1396{1397return FfxUInt16(abs(FfxInt16(a)));1398}1399FfxUInt16x2 ffxAbsHalf(FfxUInt16x2 a)1400{1401return FfxUInt16x2(abs(FfxInt16x2(a)));1402}1403FfxUInt16x3 ffxAbsHalf(FfxUInt16x3 a)1404{1405return FfxUInt16x3(abs(FfxInt16x3(a)));1406}1407FfxUInt16x4 ffxAbsHalf(FfxUInt16x4 a)1408{1409return FfxUInt16x4(abs(FfxInt16x4(a)));1410}1411//------------------------------------------------------------------------------------------------------------------------------1412FfxFloat16 ffxClampHalf(FfxFloat16 x, FfxFloat16 n, FfxFloat16 m)1413{1414return clamp(x, n, m);1415}1416FfxFloat16x2 ffxClampHalf(FfxFloat16x2 x, FfxFloat16x2 n, FfxFloat16x2 m)1417{1418return clamp(x, n, m);1419}1420FfxFloat16x3 ffxClampHalf(FfxFloat16x3 x, FfxFloat16x3 n, FfxFloat16x3 m)1421{1422return clamp(x, n, m);1423}1424FfxFloat16x4 ffxClampHalf(FfxFloat16x4 x, FfxFloat16x4 n, FfxFloat16x4 m)1425{1426return clamp(x, n, m);1427}1428//------------------------------------------------------------------------------------------------------------------------------1429FfxFloat16 ffxFract(FfxFloat16 x)1430{1431return fract(x);1432}1433FfxFloat16x2 ffxFract(FfxFloat16x2 x)1434{1435return fract(x);1436}1437FfxFloat16x3 ffxFract(FfxFloat16x3 x)1438{1439return fract(x);1440}1441FfxFloat16x4 ffxFract(FfxFloat16x4 x)1442{1443return fract(x);1444}1445//------------------------------------------------------------------------------------------------------------------------------1446FfxFloat16 ffxLerp(FfxFloat16 x, FfxFloat16 y, FfxFloat16 a)1447{1448return mix(x, y, a);1449}1450FfxFloat16x2 ffxLerp(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16 a)1451{1452return mix(x, y, a);1453}1454FfxFloat16x2 ffxLerp(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 a)1455{1456return mix(x, y, a);1457}1458FfxFloat16x3 ffxLerp(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 a)1459{1460return mix(x, y, a);1461}1462FfxFloat16x3 ffxLerp(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16 a)1463{1464return mix(x, y, a);1465}1466FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16 a)1467{1468return mix(x, y, a);1469}1470FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 a)1471{1472return mix(x, y, a);1473}1474//------------------------------------------------------------------------------------------------------------------------------1475// No packed version of ffxMid3.1476FfxFloat16 ffxMed3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)1477{1478return max(min(x, y), min(max(x, y), z));1479}1480FfxFloat16x2 ffxMed3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)1481{1482return max(min(x, y), min(max(x, y), z));1483}1484FfxFloat16x3 ffxMed3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)1485{1486return max(min(x, y), min(max(x, y), z));1487}1488FfxFloat16x4 ffxMed3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)1489{1490return max(min(x, y), min(max(x, y), z));1491}1492FfxInt16 ffxMed3Half(FfxInt16 x, FfxInt16 y, FfxInt16 z)1493{1494return max(min(x, y), min(max(x, y), z));1495}1496FfxInt16x2 ffxMed3Half(FfxInt16x2 x, FfxInt16x2 y, FfxInt16x2 z)1497{1498return max(min(x, y), min(max(x, y), z));1499}1500FfxInt16x3 ffxMed3Half(FfxInt16x3 x, FfxInt16x3 y, FfxInt16x3 z)1501{1502return max(min(x, y), min(max(x, y), z));1503}1504FfxInt16x4 ffxMed3Half(FfxInt16x4 x, FfxInt16x4 y, FfxInt16x4 z)1505{1506return max(min(x, y), min(max(x, y), z));1507}1508//------------------------------------------------------------------------------------------------------------------------------1509// No packed version of ffxMax3.1510FfxFloat16 ffxMax3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)1511{1512return max(x, max(y, z));1513}1514FfxFloat16x2 ffxMax3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)1515{1516return max(x, max(y, z));1517}1518FfxFloat16x3 ffxMax3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)1519{1520return max(x, max(y, z));1521}1522FfxFloat16x4 ffxMax3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)1523{1524return max(x, max(y, z));1525}1526//------------------------------------------------------------------------------------------------------------------------------1527// No packed version of ffxMin3.1528FfxFloat16 ffxMin3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)1529{1530return min(x, min(y, z));1531}1532FfxFloat16x2 ffxMin3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)1533{1534return min(x, min(y, z));1535}1536FfxFloat16x3 ffxMin3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)1537{1538return min(x, min(y, z));1539}1540FfxFloat16x4 ffxMin3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)1541{1542return min(x, min(y, z));1543}1544//------------------------------------------------------------------------------------------------------------------------------1545FfxFloat16 ffxReciprocalHalf(FfxFloat16 x)1546{1547return FFX_BROADCAST_FLOAT16(1.0) / x;1548}1549FfxFloat16x2 ffxReciprocalHalf(FfxFloat16x2 x)1550{1551return FFX_BROADCAST_FLOAT16X2(1.0) / x;1552}1553FfxFloat16x3 ffxReciprocalHalf(FfxFloat16x3 x)1554{1555return FFX_BROADCAST_FLOAT16X3(1.0) / x;1556}1557FfxFloat16x4 ffxReciprocalHalf(FfxFloat16x4 x)1558{1559return FFX_BROADCAST_FLOAT16X4(1.0) / x;1560}1561//------------------------------------------------------------------------------------------------------------------------------1562FfxFloat16 ffxReciprocalSquareRootHalf(FfxFloat16 x)1563{1564return FFX_BROADCAST_FLOAT16(1.0) / sqrt(x);1565}1566FfxFloat16x2 ffxReciprocalSquareRootHalf(FfxFloat16x2 x)1567{1568return FFX_BROADCAST_FLOAT16X2(1.0) / sqrt(x);1569}1570FfxFloat16x3 ffxReciprocalSquareRootHalf(FfxFloat16x3 x)1571{1572return FFX_BROADCAST_FLOAT16X3(1.0) / sqrt(x);1573}1574FfxFloat16x4 ffxReciprocalSquareRootHalf(FfxFloat16x4 x)1575{1576return FFX_BROADCAST_FLOAT16X4(1.0) / sqrt(x);1577}1578//------------------------------------------------------------------------------------------------------------------------------1579FfxFloat16 ffxSaturate(FfxFloat16 x)1580{1581return clamp(x, FFX_BROADCAST_FLOAT16(0.0), FFX_BROADCAST_FLOAT16(1.0));1582}1583FfxFloat16x2 ffxSaturate(FfxFloat16x2 x)1584{1585return clamp(x, FFX_BROADCAST_FLOAT16X2(0.0), FFX_BROADCAST_FLOAT16X2(1.0));1586}1587FfxFloat16x3 ffxSaturate(FfxFloat16x3 x)1588{1589return clamp(x, FFX_BROADCAST_FLOAT16X3(0.0), FFX_BROADCAST_FLOAT16X3(1.0));1590}1591FfxFloat16x4 ffxSaturate(FfxFloat16x4 x)1592{1593return clamp(x, FFX_BROADCAST_FLOAT16X4(0.0), FFX_BROADCAST_FLOAT16X4(1.0));1594}1595//------------------------------------------------------------------------------------------------------------------------------1596FfxUInt16 ffxBitShiftRightHalf(FfxUInt16 a, FfxUInt16 b)1597{1598return FfxUInt16(FfxInt16(a) >> FfxInt16(b));1599}1600FfxUInt16x2 ffxBitShiftRightHalf(FfxUInt16x2 a, FfxUInt16x2 b)1601{1602return FfxUInt16x2(FfxInt16x2(a) >> FfxInt16x2(b));1603}1604FfxUInt16x3 ffxBitShiftRightHalf(FfxUInt16x3 a, FfxUInt16x3 b)1605{1606return FfxUInt16x3(FfxInt16x3(a) >> FfxInt16x3(b));1607}1608FfxUInt16x4 ffxBitShiftRightHalf(FfxUInt16x4 a, FfxUInt16x4 b)1609{1610return FfxUInt16x4(FfxInt16x4(a) >> FfxInt16x4(b));1611}1612#endif // FFX_HALF16131614#if defined(FFX_WAVE)1615// Where 'x' must be a compile time literal.1616FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x)1617{1618return subgroupShuffleXor(v, x);1619}1620FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)1621{1622return subgroupShuffleXor(v, x);1623}1624FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)1625{1626return subgroupShuffleXor(v, x);1627}1628FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)1629{1630return subgroupShuffleXor(v, x);1631}1632FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x)1633{1634return subgroupShuffleXor(v, x);1635}1636FfxUInt32x2 AWaveXorU2(FfxUInt32x2 v, FfxUInt32 x)1637{1638return subgroupShuffleXor(v, x);1639}1640FfxUInt32x3 AWaveXorU3(FfxUInt32x3 v, FfxUInt32 x)1641{1642return subgroupShuffleXor(v, x);1643}1644FfxUInt32x4 AWaveXorU4(FfxUInt32x4 v, FfxUInt32 x)1645{1646return subgroupShuffleXor(v, x);1647}16481649//------------------------------------------------------------------------------------------------------------------------------1650#if FFX_HALF1651FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x)1652{1653return FFX_UINT32_TO_FLOAT16X2(subgroupShuffleXor(FFX_FLOAT16X2_TO_UINT32(v), x));1654}1655FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x)1656{1657return FFX_UINT32X2_TO_FLOAT16X4(subgroupShuffleXor(FFX_FLOAT16X4_TO_UINT32X2(v), x));1658}1659FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x)1660{1661return FFX_UINT32_TO_UINT16X2(subgroupShuffleXor(FFX_UINT16X2_TO_UINT32(v), x));1662}1663FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x)1664{1665return FFX_UINT32X2_TO_UINT16X4(subgroupShuffleXor(FFX_UINT16X4_TO_UINT32X2(v), x));1666}1667#endif // FFX_HALF1668#endif // #if defined(FFX_WAVE)166916701671