Path: blob/master/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h
9899 views
// This file is part of the FidelityFX SDK.1//2// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.3//4// Permission is hereby granted, free of charge, to any person obtaining a copy5// of this software and associated documentation files (the "Software"), to deal6// in the Software without restriction, including without limitation the rights7// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell8// copies of the Software, and to permit persons to whom the Software is9// furnished to do so, subject to the following conditions:10// The above copyright notice and this permission notice shall be included in11// all copies or substantial portions of the Software.12//13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN19// THE SOFTWARE.2021/// A define for abstracting shared memory between shading languages.22///23/// @ingroup GPU24#define FFX_GROUPSHARED groupshared2526/// A define for abstracting compute memory barriers between shading languages.27///28/// @ingroup GPU29#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync3031/// A define added to accept static markup on functions to aid CPU/GPU portability of code.32///33/// @ingroup GPU34#define FFX_STATIC static3536/// A define for abstracting loop unrolling between shading languages.37///38/// @ingroup GPU39#define FFX_UNROLL [unroll]4041/// A define for abstracting a 'greater than' comparison operator between two types.42///43/// @ingroup GPU44#define FFX_GREATER_THAN(x, y) x > y4546/// A define for abstracting a 'greater than or equal' comparison operator between two types.47///48/// @ingroup GPU49#define FFX_GREATER_THAN_EQUAL(x, y) x >= y5051/// A define for abstracting a 'less than' comparison operator between two types.52///53/// @ingroup GPU54#define FFX_LESS_THAN(x, y) x < y5556/// A define for abstracting a 'less than or equal' comparison operator between two types.57///58/// @ingroup GPU59#define FFX_LESS_THAN_EQUAL(x, y) x <= y6061/// A define for abstracting an 'equal' comparison operator between two types.62///63/// @ingroup GPU64#define FFX_EQUAL(x, y) x == y6566/// A define for abstracting a 'not equal' comparison operator between two types.67///68/// @ingroup GPU69#define FFX_NOT_EQUAL(x, y) x != y7071/// Broadcast a scalar value to a 1-dimensional floating point vector.72///73/// @ingroup GPU74#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x)7576/// Broadcast a scalar value to a 2-dimensional floating point vector.77///78/// @ingroup GPU79#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32(x)8081/// Broadcast a scalar value to a 3-dimensional floating point vector.82///83/// @ingroup GPU84#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32(x)8586/// Broadcast a scalar value to a 4-dimensional floating point vector.87///88/// @ingroup GPU89#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32(x)9091/// Broadcast a scalar value to a 1-dimensional unsigned integer vector.92///93/// @ingroup GPU94#define FFX_BROADCAST_UINT32(x) FfxUInt32(x)9596/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.97///98/// @ingroup GPU99#define FFX_BROADCAST_UINT32X2(x) FfxUInt32(x)100101/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.102///103/// @ingroup GPU104#define FFX_BROADCAST_UINT32X3(x) FfxUInt32(x)105106/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.107///108/// @ingroup GPU109#define FFX_BROADCAST_UINT32X4(x) FfxUInt32(x)110111/// Broadcast a scalar value to a 1-dimensional signed integer vector.112///113/// @ingroup GPU114#define FFX_BROADCAST_INT32(x) FfxInt32(x)115116/// Broadcast a scalar value to a 2-dimensional signed integer vector.117///118/// @ingroup GPU119#define FFX_BROADCAST_INT32X2(x) FfxInt32(x)120121/// Broadcast a scalar value to a 3-dimensional signed integer vector.122///123/// @ingroup GPU124#define FFX_BROADCAST_INT32X3(x) FfxInt32(x)125126/// Broadcast a scalar value to a 4-dimensional signed integer vector.127///128/// @ingroup GPU129#define FFX_BROADCAST_INT32X4(x) FfxInt32(x)130131/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector.132///133/// @ingroup GPU134#define FFX_BROADCAST_MIN_FLOAT16(a) FFX_MIN16_F(a)135136/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector.137///138/// @ingroup GPU139#define FFX_BROADCAST_MIN_FLOAT16X2(a) FFX_MIN16_F(a)140141/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector.142///143/// @ingroup GPU144#define FFX_BROADCAST_MIN_FLOAT16X3(a) FFX_MIN16_F(a)145146/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector.147///148/// @ingroup GPU149#define FFX_BROADCAST_MIN_FLOAT16X4(a) FFX_MIN16_F(a)150151/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector.152///153/// @ingroup GPU154#define FFX_BROADCAST_MIN_UINT16(a) FFX_MIN16_U(a)155156/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector.157///158/// @ingroup GPU159#define FFX_BROADCAST_MIN_UINT16X2(a) FFX_MIN16_U(a)160161/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector.162///163/// @ingroup GPU164#define FFX_BROADCAST_MIN_UINT16X3(a) FFX_MIN16_U(a)165166/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector.167///168/// @ingroup GPU169#define FFX_BROADCAST_MIN_UINT16X4(a) FFX_MIN16_U(a)170171/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector.172///173/// @ingroup GPU174#define FFX_BROADCAST_MIN_INT16(a) FFX_MIN16_I(a)175176/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector.177///178/// @ingroup GPU179#define FFX_BROADCAST_MIN_INT16X2(a) FFX_MIN16_I(a)180181/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector.182///183/// @ingroup GPU184#define FFX_BROADCAST_MIN_INT16X3(a) FFX_MIN16_I(a)185186/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector.187///188/// @ingroup GPU189#define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a)190191/// Pack 2x32-bit floating point values in a single 32bit value.192///193/// This function first converts each component of <c><i>value</i></c> into their nearest 16-bit floating194/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the195/// 32bit unsigned integer respectively.196///197/// @param [in] value A 2-dimensional floating point value to convert and pack.198///199/// @returns200/// A packed 32bit value containing 2 16bit floating point values.201///202/// @ingroup HLSL203FfxUInt32 packHalf2x16(FfxFloat32x2 value)204{205return f32tof16(value.x) | (f32tof16(value.y) << 16);206}207208/// Broadcast a scalar value to a 2-dimensional floating point vector.209///210/// @param [in] value The value to to broadcast.211///212/// @returns213/// A 2-dimensional floating point vector with <c><i>value</i></c> in each component.214///215/// @ingroup HLSL216FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)217{218return FfxFloat32x2(value, value);219}220221/// Broadcast a scalar value to a 3-dimensional floating point vector.222///223/// @param [in] value The value to to broadcast.224///225/// @returns226/// A 3-dimensional floating point vector with <c><i>value</i></c> in each component.227///228/// @ingroup HLSL229FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)230{231return FfxFloat32x3(value, value, value);232}233234/// Broadcast a scalar value to a 4-dimensional floating point vector.235///236/// @param [in] value The value to to broadcast.237///238/// @returns239/// A 4-dimensional floating point vector with <c><i>value</i></c> in each component.240///241/// @ingroup HLSL242FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)243{244return FfxFloat32x4(value, value, value, value);245}246247/// Broadcast a scalar value to a 2-dimensional signed integer vector.248///249/// @param [in] value The value to to broadcast.250///251/// @returns252/// A 2-dimensional signed integer vector with <c><i>value</i></c> in each component.253///254/// @ingroup HLSL255FfxInt32x2 ffxBroadcast2(FfxInt32 value)256{257return FfxInt32x2(value, value);258}259260/// Broadcast a scalar value to a 3-dimensional signed integer vector.261///262/// @param [in] value The value to to broadcast.263///264/// @returns265/// A 3-dimensional signed integer vector with <c><i>value</i></c> in each component.266///267/// @ingroup HLSL268FfxUInt32x3 ffxBroadcast3(FfxInt32 value)269{270return FfxUInt32x3(value, value, value);271}272273/// Broadcast a scalar value to a 4-dimensional signed integer vector.274///275/// @param [in] value The value to to broadcast.276///277/// @returns278/// A 4-dimensional signed integer vector with <c><i>value</i></c> in each component.279///280/// @ingroup HLSL281FfxInt32x4 ffxBroadcast4(FfxInt32 value)282{283return FfxInt32x4(value, value, value, value);284}285286/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.287///288/// @param [in] value The value to to broadcast.289///290/// @returns291/// A 2-dimensional unsigned integer vector with <c><i>value</i></c> in each component.292///293/// @ingroup HLSL294FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)295{296return FfxUInt32x2(value, value);297}298299/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.300///301/// @param [in] value The value to to broadcast.302///303/// @returns304/// A 3-dimensional unsigned integer vector with <c><i>value</i></c> in each component.305///306/// @ingroup HLSL307FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)308{309return FfxUInt32x3(value, value, value);310}311312/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.313///314/// @param [in] value The value to to broadcast.315///316/// @returns317/// A 4-dimensional unsigned integer vector with <c><i>value</i></c> in each component.318///319/// @ingroup HLSL320FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)321{322return FfxUInt32x4(value, value, value, value);323}324325FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)326{327FfxUInt32 mask = (1u << bits) - 1;328return (src >> off) & mask;329}330331FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)332{333return (ins & mask) | (src & (~mask));334}335336FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)337{338FfxUInt32 mask = (1u << bits) - 1;339return (ins & mask) | (src & (~mask));340}341342/// Interprets the bit pattern of x as an unsigned integer.343///344/// @param [in] value The input value.345///346/// @returns347/// The input interpreted as an unsigned integer.348///349/// @ingroup HLSL350FfxUInt32 ffxAsUInt32(FfxFloat32 x)351{352return asuint(x);353}354355/// Interprets the bit pattern of x as an unsigned integer.356///357/// @param [in] value The input value.358///359/// @returns360/// The input interpreted as an unsigned integer.361///362/// @ingroup HLSL363FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)364{365return asuint(x);366}367368/// Interprets the bit pattern of x as an unsigned integer.369///370/// @param [in] value The input value.371///372/// @returns373/// The input interpreted as an unsigned integer.374///375/// @ingroup HLSL376FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)377{378return asuint(x);379}380381/// Interprets the bit pattern of x as an unsigned integer.382///383/// @param [in] value The input value.384///385/// @returns386/// The input interpreted as an unsigned integer.387///388/// @ingroup HLSL389FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)390{391return asuint(x);392}393394/// Interprets the bit pattern of x as a floating-point number.395///396/// @param [in] value The input value.397///398/// @returns399/// The input interpreted as a floating-point number.400///401/// @ingroup HLSL402FfxFloat32 ffxAsFloat(FfxUInt32 x)403{404return asfloat(x);405}406407/// Interprets the bit pattern of x as a floating-point number.408///409/// @param [in] value The input value.410///411/// @returns412/// The input interpreted as a floating-point number.413///414/// @ingroup HLSL415FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)416{417return asfloat(x);418}419420/// Interprets the bit pattern of x as a floating-point number.421///422/// @param [in] value The input value.423///424/// @returns425/// The input interpreted as a floating-point number.426///427/// @ingroup HLSL428FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)429{430return asfloat(x);431}432433/// Interprets the bit pattern of x as a floating-point number.434///435/// @param [in] value The input value.436///437/// @returns438/// The input interpreted as a floating-point number.439///440/// @ingroup HLSL441FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)442{443return asfloat(x);444}445446/// Compute the linear interopation between two values.447///448/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the449/// following math:450///451/// (1 - t) * x + t * y452///453/// @param [in] x The first value to lerp between.454/// @param [in] y The second value to lerp between.455/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.456///457/// @returns458/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.459///460/// @ingroup HLSL461FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)462{463return lerp(x, y, t);464}465466/// Compute the linear interopation between two values.467///468/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the469/// following math:470///471/// (1 - t) * x + t * y472///473/// @param [in] x The first value to lerp between.474/// @param [in] y The second value to lerp between.475/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.476///477/// @returns478/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.479///480/// @ingroup HLSL481FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)482{483return lerp(x, y, t);484}485486/// Compute the linear interopation between two values.487///488/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the489/// following math:490///491/// (1 - t) * x + t * y492///493/// @param [in] x The first value to lerp between.494/// @param [in] y The second value to lerp between.495/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.496///497/// @returns498/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.499///500/// @ingroup HLSL501FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)502{503return lerp(x, y, t);504}505506/// Compute the linear interopation between two values.507///508/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the509/// following math:510///511/// (1 - t) * x + t * y512///513/// @param [in] x The first value to lerp between.514/// @param [in] y The second value to lerp between.515/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.516///517/// @returns518/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.519///520/// @ingroup HLSL521FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)522{523return lerp(x, y, t);524}525526/// Compute the linear interopation between two values.527///528/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the529/// following math:530///531/// (1 - t) * x + t * y532///533/// @param [in] x The first value to lerp between.534/// @param [in] y The second value to lerp between.535/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.536///537/// @returns538/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.539///540/// @ingroup HLSL541FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)542{543return lerp(x, y, t);544}545546/// Compute the linear interopation between two values.547///548/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the549/// following math:550///551/// (1 - t) * x + t * y552///553/// @param [in] x The first value to lerp between.554/// @param [in] y The second value to lerp between.555/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.556///557/// @returns558/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.559///560/// @ingroup HLSL561FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)562{563return lerp(x, y, t);564}565566/// Compute the linear interopation between two values.567///568/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the569/// following math:570///571/// (1 - t) * x + t * y572///573/// @param [in] x The first value to lerp between.574/// @param [in] y The second value to lerp between.575/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.576///577/// @returns578/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.579///580/// @ingroup HLSL581FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)582{583return lerp(x, y, t);584}585586/// Clamp a value to a [0..1] range.587///588/// @param [in] x The value to clamp to [0..1] range.589///590/// @returns591/// The clamped version of <c><i>x</i></c>.592///593/// @ingroup HLSL594FfxFloat32 ffxSaturate(FfxFloat32 x)595{596return saturate(x);597}598599/// Clamp a value to a [0..1] range.600///601/// @param [in] x The value to clamp to [0..1] range.602///603/// @returns604/// The clamped version of <c><i>x</i></c>.605///606/// @ingroup HLSL607FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)608{609return saturate(x);610}611612/// Clamp a value to a [0..1] range.613///614/// @param [in] x The value to clamp to [0..1] range.615///616/// @returns617/// The clamped version of <c><i>x</i></c>.618///619/// @ingroup HLSL620FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)621{622return saturate(x);623}624625/// Clamp a value to a [0..1] range.626///627/// @param [in] x The value to clamp to [0..1] range.628///629/// @returns630/// The clamped version of <c><i>x</i></c>.631///632/// @ingroup HLSL633FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)634{635return saturate(x);636}637638/// Compute the factional part of a decimal value.639///640/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.641///642/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is643/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic644/// function.645///646/// @param [in] x The value to compute the fractional part from.647///648/// @returns649/// The fractional part of <c><i>x</i></c>.650///651/// @ingroup HLSL652FfxFloat32 ffxFract(FfxFloat32 x)653{654return x - floor(x);655}656657/// Compute the factional part of a decimal value.658///659/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.660///661/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is662/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic663/// function.664///665/// @param [in] x The value to compute the fractional part from.666///667/// @returns668/// The fractional part of <c><i>x</i></c>.669///670/// @ingroup HLSL671FfxFloat32x2 ffxFract(FfxFloat32x2 x)672{673return x - floor(x);674}675676/// Compute the factional part of a decimal value.677///678/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.679///680/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is681/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic682/// function.683///684/// @param [in] x The value to compute the fractional part from.685///686/// @returns687/// The fractional part of <c><i>x</i></c>.688///689/// @ingroup HLSL690FfxFloat32x3 ffxFract(FfxFloat32x3 x)691{692return x - floor(x);693}694695/// Compute the factional part of a decimal value.696///697/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.698///699/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is700/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic701/// function.702///703/// @param [in] x The value to compute the fractional part from.704///705/// @returns706/// The fractional part of <c><i>x</i></c>.707///708/// @ingroup HLSL709FfxFloat32x4 ffxFract(FfxFloat32x4 x)710{711return x - floor(x);712}713714/// Compute the maximum of three values.715///716/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.717///718/// @param [in] x The first value to include in the max calculation.719/// @param [in] y The second value to include in the max calcuation.720/// @param [in] z The third value to include in the max calcuation.721///722/// @returns723/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.724///725/// @ingroup HLSL726FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)727{728return max(x, max(y, z));729}730731/// Compute the maximum of three values.732///733/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.734///735/// @param [in] x The first value to include in the max calculation.736/// @param [in] y The second value to include in the max calcuation.737/// @param [in] z The third value to include in the max calcuation.738///739/// @returns740/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.741///742/// @ingroup HLSL743FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)744{745return max(x, max(y, z));746}747748/// Compute the maximum of three values.749///750/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.751///752/// @param [in] x The first value to include in the max calculation.753/// @param [in] y The second value to include in the max calcuation.754/// @param [in] z The third value to include in the max calcuation.755///756/// @returns757/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.758///759/// @ingroup HLSL760FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)761{762return max(x, max(y, z));763}764765/// Compute the maximum of three values.766///767/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.768///769/// @param [in] x The first value to include in the max calculation.770/// @param [in] y The second value to include in the max calcuation.771/// @param [in] z The third value to include in the max calcuation.772///773/// @returns774/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.775///776/// @ingroup HLSL777FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)778{779return max(x, max(y, z));780}781782/// Compute the maximum of three values.783///784/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.785///786/// @param [in] x The first value to include in the max calculation.787/// @param [in] y The second value to include in the max calcuation.788/// @param [in] z The third value to include in the max calcuation.789///790/// @returns791/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.792///793/// @ingroup HLSL794FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)795{796return max(x, max(y, z));797}798799/// Compute the maximum of three values.800///801/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.802///803/// @param [in] x The first value to include in the max calculation.804/// @param [in] y The second value to include in the max calcuation.805/// @param [in] z The third value to include in the max calcuation.806///807/// @returns808/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.809///810/// @ingroup HLSL811FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)812{813return max(x, max(y, z));814}815816/// Compute the maximum of three values.817///818/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.819///820/// @param [in] x The first value to include in the max calculation.821/// @param [in] y The second value to include in the max calcuation.822/// @param [in] z The third value to include in the max calcuation.823///824/// @returns825/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.826///827/// @ingroup HLSL828FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)829{830return max(x, max(y, z));831}832833/// Compute the maximum of three values.834///835/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.836///837/// @param [in] x The first value to include in the max calculation.838/// @param [in] y The second value to include in the max calcuation.839/// @param [in] z The third value to include in the max calcuation.840///841/// @returns842/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.843///844/// @ingroup HLSL845FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)846{847return max(x, max(y, z));848}849850/// Compute the median of three values.851///852/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.853///854/// @param [in] x The first value to include in the median calculation.855/// @param [in] y The second value to include in the median calcuation.856/// @param [in] z The third value to include in the median calcuation.857///858/// @returns859/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.860///861/// @ingroup HLSL862FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)863{864return max(min(x, y), min(max(x, y), z));865}866867/// Compute the median of three values.868///869/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.870///871/// @param [in] x The first value to include in the median calculation.872/// @param [in] y The second value to include in the median calcuation.873/// @param [in] z The third value to include in the median calcuation.874///875/// @returns876/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.877///878/// @ingroup HLSL879FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)880{881return max(min(x, y), min(max(x, y), z));882}883884/// Compute the median of three values.885///886/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.887///888/// @param [in] x The first value to include in the median calculation.889/// @param [in] y The second value to include in the median calcuation.890/// @param [in] z The third value to include in the median calcuation.891///892/// @returns893/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.894///895/// @ingroup HLSL896FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)897{898return max(min(x, y), min(max(x, y), z));899}900901/// Compute the median of three values.902///903/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.904///905/// @param [in] x The first value to include in the median calculation.906/// @param [in] y The second value to include in the median calcuation.907/// @param [in] z The third value to include in the median calcuation.908///909/// @returns910/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.911///912/// @ingroup HLSL913FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)914{915return max(min(x, y), min(max(x, y), z));916}917918/// Compute the median of three values.919///920/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.921///922/// @param [in] x The first value to include in the median calculation.923/// @param [in] y The second value to include in the median calcuation.924/// @param [in] z The third value to include in the median calcuation.925///926/// @returns927/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.928///929/// @ingroup HLSL930FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z)931{932return max(min(x, y), min(max(x, y), z));933// return min(max(min(y, z), x), max(y, z));934// return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));935}936937/// Compute the median of three values.938///939/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.940///941/// @param [in] x The first value to include in the median calculation.942/// @param [in] y The second value to include in the median calcuation.943/// @param [in] z The third value to include in the median calcuation.944///945/// @returns946/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.947///948/// @ingroup HLSL949FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z)950{951return max(min(x, y), min(max(x, y), z));952// return min(max(min(y, z), x), max(y, z));953// return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));954}955956/// Compute the median of three values.957///958/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.959///960/// @param [in] x The first value to include in the median calculation.961/// @param [in] y The second value to include in the median calcuation.962/// @param [in] z The third value to include in the median calcuation.963///964/// @returns965/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.966///967/// @ingroup HLSL968FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z)969{970return max(min(x, y), min(max(x, y), z));971}972973/// Compute the median of three values.974///975/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on GCN/RDNA hardware.976///977/// @param [in] x The first value to include in the median calculation.978/// @param [in] y The second value to include in the median calcuation.979/// @param [in] z The third value to include in the median calcuation.980///981/// @returns982/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.983///984/// @ingroup HLSL985FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)986{987return max(min(x, y), min(max(x, y), z));988}989990/// Compute the minimum of three values.991///992/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</i></c> operation on GCN/RDNA hardware.993///994/// @param [in] x The first value to include in the min calculation.995/// @param [in] y The second value to include in the min calcuation.996/// @param [in] z The third value to include in the min calcuation.997///998/// @returns999/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.1000///1001/// @ingroup HLSL1002FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)1003{1004return min(x, min(y, z));1005}10061007/// Compute the minimum of three values.1008///1009/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</i></c> operation on GCN/RDNA hardware.1010///1011/// @param [in] x The first value to include in the min calculation.1012/// @param [in] y The second value to include in the min calcuation.1013/// @param [in] z The third value to include in the min calcuation.1014///1015/// @returns1016/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.1017///1018/// @ingroup HLSL1019FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)1020{1021return min(x, min(y, z));1022}10231024/// Compute the minimum of three values.1025///1026/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</c></i> operation on GCN/RDNA hardware.1027///1028/// @param [in] x The first value to include in the min calculation.1029/// @param [in] y The second value to include in the min calcuation.1030/// @param [in] z The third value to include in the min calcuation.1031///1032/// @returns1033/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.1034///1035/// @ingroup HLSL1036FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)1037{1038return min(x, min(y, z));1039}10401041/// Compute the minimum of three values.1042///1043/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.1044///1045/// @param [in] x The first value to include in the min calculation.1046/// @param [in] y The second value to include in the min calcuation.1047/// @param [in] z The third value to include in the min calcuation.1048///1049/// @returns1050/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.1051///1052/// @ingroup HLSL1053FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)1054{1055return min(x, min(y, z));1056}10571058/// Compute the minimum of three values.1059///1060/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.1061///1062/// @param [in] x The first value to include in the min calculation.1063/// @param [in] y The second value to include in the min calcuation.1064/// @param [in] z The third value to include in the min calcuation.1065///1066/// @returns1067/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.1068///1069/// @ingroup HLSL1070FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)1071{1072return min(x, min(y, z));1073}10741075/// Compute the minimum of three values.1076///1077/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.1078///1079/// @param [in] x The first value to include in the min calculation.1080/// @param [in] y The second value to include in the min calcuation.1081/// @param [in] z The third value to include in the min calcuation.1082///1083/// @returns1084/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.1085///1086/// @ingroup HLSL1087FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)1088{1089return min(x, min(y, z));1090}10911092/// Compute the minimum of three values.1093///1094/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.1095///1096/// @param [in] x The first value to include in the min calculation.1097/// @param [in] y The second value to include in the min calcuation.1098/// @param [in] z The third value to include in the min calcuation.1099///1100/// @returns1101/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.1102///1103/// @ingroup HLSL1104FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)1105{1106return min(x, min(y, z));1107}11081109/// Compute the minimum of three values.1110///1111/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.1112///1113/// @param [in] x The first value to include in the min calculation.1114/// @param [in] y The second value to include in the min calcuation.1115/// @param [in] z The third value to include in the min calcuation.1116///1117/// @returns1118/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.1119///1120/// @ingroup HLSL1121FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)1122{1123return min(x, min(y, z));1124}112511261127FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)1128{1129return FfxUInt32(FfxInt32(a) >> FfxInt32(b));1130}11311132//==============================================================================================================================1133// HLSL HALF1134//==============================================================================================================================1135#if FFX_HALF11361137//==============================================================================================================================1138// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly).1139// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/1140FFX_MIN16_F2 ffxUint32ToFloat16x2(FfxUInt32 x)1141{1142FfxFloat32x2 t = f16tof32(FfxUInt32x2(x & 0xFFFF, x >> 16));1143return FFX_MIN16_F2(t);1144}1145FFX_MIN16_F4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x)1146{1147return FFX_MIN16_F4(ffxUint32ToFloat16x2(x.x), ffxUint32ToFloat16x2(x.y));1148}1149FFX_MIN16_U2 ffxUint32ToUint16x2(FfxUInt32 x)1150{1151FfxUInt32x2 t = FfxUInt32x2(x & 0xFFFF, x >> 16);1152return FFX_MIN16_U2(t);1153}1154FFX_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x)1155{1156return FFX_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y));1157}1158#define FFX_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x))1159#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x))1160#define FFX_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x))1161#define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x))1162//------------------------------------------------------------------------------------------------------------------------------1163FfxUInt32 FFX_MIN16_F2ToUint32(FFX_MIN16_F2 x)1164{1165return f32tof16(x.x) + (f32tof16(x.y) << 16);1166}1167FfxUInt32x2 FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4 x)1168{1169return FfxUInt32x2(FFX_MIN16_F2ToUint32(x.xy), FFX_MIN16_F2ToUint32(x.zw));1170}1171FfxUInt32 FFX_MIN16_U2ToUint32(FFX_MIN16_U2 x)1172{1173return FfxUInt32(x.x) + (FfxUInt32(x.y) << 16);1174}1175FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x)1176{1177return FfxUInt32x2(FFX_MIN16_U2ToUint32(x.xy), FFX_MIN16_U2ToUint32(x.zw));1178}1179#define FFX_FLOAT16X2_TO_UINT32(x) FFX_MIN16_F2ToUint32(FFX_MIN16_F2(x))1180#define FFX_FLOAT16X4_TO_UINT32X2(x) FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4(x))1181#define FFX_UINT16X2_TO_UINT32(x) FFX_MIN16_U2ToUint32(FFX_MIN16_U2(x))1182#define FFX_UINT16X4_TO_UINT32X2(x) FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4(x))11831184#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)1185#define FFX_TO_UINT16(x) asuint16(x)1186#define FFX_TO_UINT16X2(x) asuint16(x)1187#define FFX_TO_UINT16X3(x) asuint16(x)1188#define FFX_TO_UINT16X4(x) asuint16(x)1189#else1190#define FFX_TO_UINT16(a) FFX_MIN16_U(f32tof16(FfxFloat32(a)))1191#define FFX_TO_UINT16X2(a) FFX_MIN16_U2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y))1192#define FFX_TO_UINT16X3(a) FFX_MIN16_U3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z))1193#define FFX_TO_UINT16X4(a) FFX_MIN16_U4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w))1194#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)11951196#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)1197#define FFX_TO_FLOAT16(x) asfloat16(x)1198#define FFX_TO_FLOAT16X2(x) asfloat16(x)1199#define FFX_TO_FLOAT16X3(x) asfloat16(x)1200#define FFX_TO_FLOAT16X4(x) asfloat16(x)1201#else1202#define FFX_TO_FLOAT16(a) FFX_MIN16_F(f16tof32(FfxUInt32(a)))1203#define FFX_TO_FLOAT16X2(a) FFX_MIN16_F2(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y))1204#define FFX_TO_FLOAT16X3(a) FFX_MIN16_F3(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z))1205#define FFX_TO_FLOAT16X4(a) FFX_MIN16_F4(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z), FFX_TO_FLOAT16((a).w))1206#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)12071208//==============================================================================================================================1209#define FFX_BROADCAST_FLOAT16(a) FFX_MIN16_F(a)1210#define FFX_BROADCAST_FLOAT16X2(a) FFX_MIN16_F(a)1211#define FFX_BROADCAST_FLOAT16X3(a) FFX_MIN16_F(a)1212#define FFX_BROADCAST_FLOAT16X4(a) FFX_MIN16_F(a)12131214//------------------------------------------------------------------------------------------------------------------------------1215#define FFX_BROADCAST_INT16(a) FFX_MIN16_I(a)1216#define FFX_BROADCAST_INT16X2(a) FFX_MIN16_I(a)1217#define FFX_BROADCAST_INT16X3(a) FFX_MIN16_I(a)1218#define FFX_BROADCAST_INT16X4(a) FFX_MIN16_I(a)12191220//------------------------------------------------------------------------------------------------------------------------------1221#define FFX_BROADCAST_UINT16(a) FFX_MIN16_U(a)1222#define FFX_BROADCAST_UINT16X2(a) FFX_MIN16_U(a)1223#define FFX_BROADCAST_UINT16X3(a) FFX_MIN16_U(a)1224#define FFX_BROADCAST_UINT16X4(a) FFX_MIN16_U(a)12251226//==============================================================================================================================1227FFX_MIN16_U ffxAbsHalf(FFX_MIN16_U a)1228{1229return FFX_MIN16_U(abs(FFX_MIN16_I(a)));1230}1231FFX_MIN16_U2 ffxAbsHalf(FFX_MIN16_U2 a)1232{1233return FFX_MIN16_U2(abs(FFX_MIN16_I2(a)));1234}1235FFX_MIN16_U3 ffxAbsHalf(FFX_MIN16_U3 a)1236{1237return FFX_MIN16_U3(abs(FFX_MIN16_I3(a)));1238}1239FFX_MIN16_U4 ffxAbsHalf(FFX_MIN16_U4 a)1240{1241return FFX_MIN16_U4(abs(FFX_MIN16_I4(a)));1242}1243//------------------------------------------------------------------------------------------------------------------------------1244FFX_MIN16_F ffxClampHalf(FFX_MIN16_F x, FFX_MIN16_F n, FFX_MIN16_F m)1245{1246return max(n, min(x, m));1247}1248FFX_MIN16_F2 ffxClampHalf(FFX_MIN16_F2 x, FFX_MIN16_F2 n, FFX_MIN16_F2 m)1249{1250return max(n, min(x, m));1251}1252FFX_MIN16_F3 ffxClampHalf(FFX_MIN16_F3 x, FFX_MIN16_F3 n, FFX_MIN16_F3 m)1253{1254return max(n, min(x, m));1255}1256FFX_MIN16_F4 ffxClampHalf(FFX_MIN16_F4 x, FFX_MIN16_F4 n, FFX_MIN16_F4 m)1257{1258return max(n, min(x, m));1259}1260//------------------------------------------------------------------------------------------------------------------------------1261// V_FRACT_F16 (note DX frac() is different).1262FFX_MIN16_F ffxFract(FFX_MIN16_F x)1263{1264return x - floor(x);1265}1266FFX_MIN16_F2 ffxFract(FFX_MIN16_F2 x)1267{1268return x - floor(x);1269}1270FFX_MIN16_F3 ffxFract(FFX_MIN16_F3 x)1271{1272return x - floor(x);1273}1274FFX_MIN16_F4 ffxFract(FFX_MIN16_F4 x)1275{1276return x - floor(x);1277}1278//------------------------------------------------------------------------------------------------------------------------------1279FFX_MIN16_F ffxLerp(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F a)1280{1281return lerp(x, y, a);1282}1283FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F a)1284{1285return lerp(x, y, a);1286}1287FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 a)1288{1289return lerp(x, y, a);1290}1291FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F a)1292{1293return lerp(x, y, a);1294}1295FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 a)1296{1297return lerp(x, y, a);1298}1299FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F a)1300{1301return lerp(x, y, a);1302}1303FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 a)1304{1305return lerp(x, y, a);1306}1307//------------------------------------------------------------------------------------------------------------------------------1308FFX_MIN16_F ffxMax3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)1309{1310return max(x, max(y, z));1311}1312FFX_MIN16_F2 ffxMax3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)1313{1314return max(x, max(y, z));1315}1316FFX_MIN16_F3 ffxMax3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)1317{1318return max(x, max(y, z));1319}1320FFX_MIN16_F4 ffxMax3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)1321{1322return max(x, max(y, z));1323}1324//------------------------------------------------------------------------------------------------------------------------------1325FFX_MIN16_F ffxMin3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)1326{1327return min(x, min(y, z));1328}1329FFX_MIN16_F2 ffxMin3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)1330{1331return min(x, min(y, z));1332}1333FFX_MIN16_F3 ffxMin3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)1334{1335return min(x, min(y, z));1336}1337FFX_MIN16_F4 ffxMin3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)1338{1339return min(x, min(y, z));1340}1341//------------------------------------------------------------------------------------------------------------------------------1342FFX_MIN16_F ffxMed3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)1343{1344return max(min(x, y), min(max(x, y), z));1345}1346FFX_MIN16_F2 ffxMed3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)1347{1348return max(min(x, y), min(max(x, y), z));1349}1350FFX_MIN16_F3 ffxMed3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)1351{1352return max(min(x, y), min(max(x, y), z));1353}1354FFX_MIN16_F4 ffxMed3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)1355{1356return max(min(x, y), min(max(x, y), z));1357}1358//------------------------------------------------------------------------------------------------------------------------------1359FFX_MIN16_I ffxMed3Half(FFX_MIN16_I x, FFX_MIN16_I y, FFX_MIN16_I z)1360{1361return max(min(x, y), min(max(x, y), z));1362}1363FFX_MIN16_I2 ffxMed3Half(FFX_MIN16_I2 x, FFX_MIN16_I2 y, FFX_MIN16_I2 z)1364{1365return max(min(x, y), min(max(x, y), z));1366}1367FFX_MIN16_I3 ffxMed3Half(FFX_MIN16_I3 x, FFX_MIN16_I3 y, FFX_MIN16_I3 z)1368{1369return max(min(x, y), min(max(x, y), z));1370}1371FFX_MIN16_I4 ffxMed3Half(FFX_MIN16_I4 x, FFX_MIN16_I4 y, FFX_MIN16_I4 z)1372{1373return max(min(x, y), min(max(x, y), z));1374}1375//------------------------------------------------------------------------------------------------------------------------------1376FFX_MIN16_F ffxReciprocalHalf(FFX_MIN16_F x)1377{1378return rcp(x);1379}1380FFX_MIN16_F2 ffxReciprocalHalf(FFX_MIN16_F2 x)1381{1382return rcp(x);1383}1384FFX_MIN16_F3 ffxReciprocalHalf(FFX_MIN16_F3 x)1385{1386return rcp(x);1387}1388FFX_MIN16_F4 ffxReciprocalHalf(FFX_MIN16_F4 x)1389{1390return rcp(x);1391}1392//------------------------------------------------------------------------------------------------------------------------------1393FFX_MIN16_F ffxReciprocalSquareRootHalf(FFX_MIN16_F x)1394{1395return rsqrt(x);1396}1397FFX_MIN16_F2 ffxReciprocalSquareRootHalf(FFX_MIN16_F2 x)1398{1399return rsqrt(x);1400}1401FFX_MIN16_F3 ffxReciprocalSquareRootHalf(FFX_MIN16_F3 x)1402{1403return rsqrt(x);1404}1405FFX_MIN16_F4 ffxReciprocalSquareRootHalf(FFX_MIN16_F4 x)1406{1407return rsqrt(x);1408}1409//------------------------------------------------------------------------------------------------------------------------------1410FFX_MIN16_F ffxSaturate(FFX_MIN16_F x)1411{1412return saturate(x);1413}1414FFX_MIN16_F2 ffxSaturate(FFX_MIN16_F2 x)1415{1416return saturate(x);1417}1418FFX_MIN16_F3 ffxSaturate(FFX_MIN16_F3 x)1419{1420return saturate(x);1421}1422FFX_MIN16_F4 ffxSaturate(FFX_MIN16_F4 x)1423{1424return saturate(x);1425}1426//------------------------------------------------------------------------------------------------------------------------------1427FFX_MIN16_U ffxBitShiftRightHalf(FFX_MIN16_U a, FFX_MIN16_U b)1428{1429return FFX_MIN16_U(FFX_MIN16_I(a) >> FFX_MIN16_I(b));1430}1431FFX_MIN16_U2 ffxBitShiftRightHalf(FFX_MIN16_U2 a, FFX_MIN16_U2 b)1432{1433return FFX_MIN16_U2(FFX_MIN16_I2(a) >> FFX_MIN16_I2(b));1434}1435FFX_MIN16_U3 ffxBitShiftRightHalf(FFX_MIN16_U3 a, FFX_MIN16_U3 b)1436{1437return FFX_MIN16_U3(FFX_MIN16_I3(a) >> FFX_MIN16_I3(b));1438}1439FFX_MIN16_U4 ffxBitShiftRightHalf(FFX_MIN16_U4 a, FFX_MIN16_U4 b)1440{1441return FFX_MIN16_U4(FFX_MIN16_I4(a) >> FFX_MIN16_I4(b));1442}1443#endif // FFX_HALF14441445//==============================================================================================================================1446// HLSL WAVE1447//==============================================================================================================================1448#if defined(FFX_WAVE)1449// Where 'x' must be a compile time literal.1450FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x)1451{1452return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);1453}1454FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)1455{1456return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);1457}1458FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)1459{1460return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);1461}1462FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)1463{1464return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);1465}1466FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x)1467{1468return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);1469}1470FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x)1471{1472return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);1473}1474FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x)1475{1476return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);1477}1478FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x)1479{1480return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);1481}14821483#if FFX_HALF1484FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x)1485{1486return FFX_UINT32_TO_FLOAT16X2(WaveReadLaneAt(FFX_FLOAT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));1487}1488FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x)1489{1490return FFX_UINT32X2_TO_FLOAT16X4(WaveReadLaneAt(FFX_FLOAT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x));1491}1492FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x)1493{1494return FFX_UINT32_TO_UINT16X2(WaveReadLaneAt(FFX_UINT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));1495}1496FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x)1497{1498return AW4_FFX_UINT32(WaveReadLaneAt(FFX_UINT32_AW4(v), WaveGetLaneIndex() ^ x));1499}1500#endif // FFX_HALF1501#endif // #if defined(FFX_WAVE)150215031504