Path: blob/master/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
9917 views
// This file is part of the FidelityFX SDK.1//2// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.3//4// Permission is hereby granted, free of charge, to any person obtaining a copy5// of this software and associated documentation files (the "Software"), to deal6// in the Software without restriction, including without limitation the rights7// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell8// copies of the Software, and to permit persons to whom the Software is9// furnished to do so, subject to the following conditions:10// The above copyright notice and this permission notice shall be included in11// all copies or substantial portions of the Software.12//13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN19// THE SOFTWARE.2021#ifndef FFX_FSR2_SAMPLE_H22#define FFX_FSR2_SAMPLE_H2324// suppress warnings25#ifdef FFX_HLSL26#pragma warning(disable: 4008) // potentially divide by zero27#endif //FFX_HLSL2829struct FetchedBilinearSamples {3031FfxFloat32x4 fColor00;32FfxFloat32x4 fColor10;3334FfxFloat32x4 fColor01;35FfxFloat32x4 fColor11;36};3738struct FetchedBicubicSamples {3940FfxFloat32x4 fColor00;41FfxFloat32x4 fColor10;42FfxFloat32x4 fColor20;43FfxFloat32x4 fColor30;4445FfxFloat32x4 fColor01;46FfxFloat32x4 fColor11;47FfxFloat32x4 fColor21;48FfxFloat32x4 fColor31;4950FfxFloat32x4 fColor02;51FfxFloat32x4 fColor12;52FfxFloat32x4 fColor22;53FfxFloat32x4 fColor32;5455FfxFloat32x4 fColor03;56FfxFloat32x4 fColor13;57FfxFloat32x4 fColor23;58FfxFloat32x4 fColor33;59};6061#if FFX_HALF62struct FetchedBilinearSamplesMin16 {6364FFX_MIN16_F4 fColor00;65FFX_MIN16_F4 fColor10;6667FFX_MIN16_F4 fColor01;68FFX_MIN16_F4 fColor11;69};7071struct FetchedBicubicSamplesMin16 {7273FFX_MIN16_F4 fColor00;74FFX_MIN16_F4 fColor10;75FFX_MIN16_F4 fColor20;76FFX_MIN16_F4 fColor30;7778FFX_MIN16_F4 fColor01;79FFX_MIN16_F4 fColor11;80FFX_MIN16_F4 fColor21;81FFX_MIN16_F4 fColor31;8283FFX_MIN16_F4 fColor02;84FFX_MIN16_F4 fColor12;85FFX_MIN16_F4 fColor22;86FFX_MIN16_F4 fColor32;8788FFX_MIN16_F4 fColor03;89FFX_MIN16_F4 fColor13;90FFX_MIN16_F4 fColor23;91FFX_MIN16_F4 fColor33;92};93#else //FFX_HALF94#define FetchedBicubicSamplesMin16 FetchedBicubicSamples95#define FetchedBilinearSamplesMin16 FetchedBilinearSamples96#endif //FFX_HALF9798FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t)99{100return A + (B - A) * t;101}102103FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac)104{105FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);106FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);107FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);108return fColorXY;109}110111#if FFX_HALF112FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t)113{114return A + (B - A) * t;115}116117FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac)118{119FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);120FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);121FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);122return fColorXY;123}124#endif125126FfxFloat32 Lanczos2NoClamp(FfxFloat32 x)127{128const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants129return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x));130}131132FfxFloat32 Lanczos2(FfxFloat32 x)133{134x = ffxMin(abs(x), 2.0f);135return Lanczos2NoClamp(x);136}137138#if FFX_HALF139140#if 0141FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x)142{143const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants144return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x));145}146#endif147148FFX_MIN16_F Lanczos2(FFX_MIN16_F x)149{150x = ffxMin(abs(x), FFX_MIN16_F(2.0f));151return FFX_MIN16_F(Lanczos2NoClamp(x));152}153#endif //FFX_HALF154155// FSR1 lanczos approximation. Input is x*x and must be <= 4.156FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2)157{158FfxFloat32 a = (2.0f / 5.0f) * x2 - 1;159FfxFloat32 b = (1.0f / 4.0f) * x2 - 1;160return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b);161}162163#if FFX_HALF164FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2)165{166FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1);167FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1);168return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b);169}170#endif //FFX_HALF171172FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2)173{174x2 = ffxMin(x2, 4.0f);175return Lanczos2ApproxSqNoClamp(x2);176}177178#if FFX_HALF179FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2)180{181x2 = ffxMin(x2, FFX_MIN16_F(4.0f));182return Lanczos2ApproxSqNoClamp(x2);183}184#endif //FFX_HALF185186FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x)187{188return Lanczos2ApproxSqNoClamp(x * x);189}190191#if FFX_HALF192FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x)193{194return Lanczos2ApproxSqNoClamp(x * x);195}196#endif //FFX_HALF197198FfxFloat32 Lanczos2Approx(FfxFloat32 x)199{200return Lanczos2ApproxSq(x * x);201}202203#if FFX_HALF204FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x)205{206return Lanczos2ApproxSq(x * x);207}208#endif //FFX_HALF209210FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x)211{212return SampleLanczos2Weight(abs(x));213}214215#if FFX_HALF216FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x)217{218return FFX_MIN16_F(SampleLanczos2Weight(abs(x)));219}220#endif //FFX_HALF221222FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)223{224FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t);225FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t);226FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t);227FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t);228return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);229}230#if FFX_HALF231FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)232{233FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t);234FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t);235FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t);236FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t);237return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);238}239#endif240241FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)242{243FfxFloat32 fWeight0 = Lanczos2(-1.f - t);244FfxFloat32 fWeight1 = Lanczos2(-0.f - t);245FfxFloat32 fWeight2 = Lanczos2(+1.f - t);246FfxFloat32 fWeight3 = Lanczos2(+2.f - t);247return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);248}249250FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)251{252FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);253FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);254FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);255FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);256FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);257258// Deringing259260// TODO: only use 4 by checking jitter261const FfxInt32 iDeringingSampleCount = 4;262const FfxFloat32x4 fDeringingSamples[4] = {263Samples.fColor11,264Samples.fColor21,265Samples.fColor12,266Samples.fColor22,267};268269FfxFloat32x4 fDeringingMin = fDeringingSamples[0];270FfxFloat32x4 fDeringingMax = fDeringingSamples[0];271272FFX_UNROLL273for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {274275fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);276fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);277}278279fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);280281return fColorXY;282}283284#if FFX_HALF285FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)286{287FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t);288FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t);289FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t);290FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t);291return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);292}293294FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)295{296FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);297FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);298FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);299FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);300FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);301302// Deringing303304// TODO: only use 4 by checking jitter305const FfxInt32 iDeringingSampleCount = 4;306const FFX_MIN16_F4 fDeringingSamples[4] = {307Samples.fColor11,308Samples.fColor21,309Samples.fColor12,310Samples.fColor22,311};312313FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];314FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];315316FFX_UNROLL317for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)318{319fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);320fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);321}322323fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);324325return fColorXY;326}327#endif //FFX_HALF328329330FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)331{332FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);333FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);334FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);335FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);336FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);337338// Deringing339340// TODO: only use 4 by checking jitter341const FfxInt32 iDeringingSampleCount = 4;342const FfxFloat32x4 fDeringingSamples[4] = {343Samples.fColor11,344Samples.fColor21,345Samples.fColor12,346Samples.fColor22,347};348349FfxFloat32x4 fDeringingMin = fDeringingSamples[0];350FfxFloat32x4 fDeringingMax = fDeringingSamples[0];351352FFX_UNROLL353for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {354355fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);356fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);357}358359fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);360361return fColorXY;362}363364#if FFX_HALF365FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)366{367FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);368FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);369FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);370FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);371FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);372373// Deringing374375// TODO: only use 4 by checking jitter376const FfxInt32 iDeringingSampleCount = 4;377const FFX_MIN16_F4 fDeringingSamples[4] = {378Samples.fColor11,379Samples.fColor21,380Samples.fColor12,381Samples.fColor22,382};383384FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];385FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];386387FFX_UNROLL388for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)389{390fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);391fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);392}393394fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);395396return fColorXY;397}398#endif //FFX_HALF399400401402FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)403{404FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t);405FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t);406FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t);407FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t);408return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);409}410411#if FFX_HALF412FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)413{414FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t);415FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t);416FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t);417FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t);418return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);419}420#endif //FFX_HALF421422FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)423{424FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);425FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);426FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);427FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);428FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);429430// Deringing431432// TODO: only use 4 by checking jitter433const FfxInt32 iDeringingSampleCount = 4;434const FfxFloat32x4 fDeringingSamples[4] = {435Samples.fColor11,436Samples.fColor21,437Samples.fColor12,438Samples.fColor22,439};440441FfxFloat32x4 fDeringingMin = fDeringingSamples[0];442FfxFloat32x4 fDeringingMax = fDeringingSamples[0];443444FFX_UNROLL445for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)446{447fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);448fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);449}450451fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);452453return fColorXY;454}455456#if FFX_HALF457FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)458{459FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);460FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);461FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);462FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);463FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);464465// Deringing466467// TODO: only use 4 by checking jitter468const FfxInt32 iDeringingSampleCount = 4;469const FFX_MIN16_F4 fDeringingSamples[4] = {470Samples.fColor11,471Samples.fColor21,472Samples.fColor12,473Samples.fColor22,474};475476FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];477FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];478479FFX_UNROLL480for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)481{482fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);483fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);484}485486fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);487488return fColorXY;489}490#endif491492// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant.493FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)494{495FfxInt32x2 result = iPxSample + iPxOffset;496result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;497result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;498result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;499result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;500return result;501}502#if FFX_HALF503FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)504{505FFX_MIN16_I2 result = iPxSample + iPxOffset;506result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;507result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;508result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;509result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;510return result;511}512#endif //FFX_HALF513514515#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \516SampleType Name(AddrType iPxSample, AddrType iTextureSize) \517{ \518SampleType Samples; \519\520Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \521Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \522Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \523Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \524\525Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \526Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \527Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \528Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \529\530Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \531Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \532Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \533Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \534\535Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \536Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \537Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \538Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \539\540return Samples; \541}542543#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \544DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)545546#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \547DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)548549#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \550SampleType Name(AddrType iPxSample, AddrType iTextureSize) \551{ \552SampleType Samples; \553Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \554Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \555Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \556Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \557return Samples; \558}559560#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \561DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)562563#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \564DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)565566// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)567// is common, so iPxSample can "jitter"568#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \569FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \570{ \571FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \572/* Clamp base coords */ \573fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \574fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \575/* */ \576FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \577FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \578FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \579return fColorXY; \580}581582#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \583FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \584{ \585FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \586/* Clamp base coords */ \587fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \588fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \589/* */ \590FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \591FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \592FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \593return fColorXY; \594}595596#define FFX_FSR2_CONCAT_ID(x, y) x ## y597#define FFX_FSR2_CONCAT(x, y) FFX_FSR2_CONCAT_ID(x, y)598#define FFX_FSR2_SAMPLER_1D_0 Lanczos2599#define FFX_FSR2_SAMPLER_1D_1 Lanczos2LUT600#define FFX_FSR2_SAMPLER_1D_2 Lanczos2Approx601602#define FFX_FSR2_GET_LANCZOS_SAMPLER1D(x) FFX_FSR2_CONCAT(FFX_FSR2_SAMPLER_1D_, x)603604#endif //!defined( FFX_FSR2_SAMPLE_H )605606607