Path: blob/master/thirdparty/amd-fsr2/shaders/ffx_fsr2_upsample.h
9898 views
// This file is part of the FidelityFX SDK.1//2// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.3//4// Permission is hereby granted, free of charge, to any person obtaining a copy5// of this software and associated documentation files (the "Software"), to deal6// in the Software without restriction, including without limitation the rights7// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell8// copies of the Software, and to permit persons to whom the Software is9// furnished to do so, subject to the following conditions:10// The above copyright notice and this permission notice shall be included in11// all copies or substantial portions of the Software.12//13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN19// THE SOFTWARE.2021#ifndef FFX_FSR2_UPSAMPLE_H22#define FFX_FSR2_UPSAMPLE_H2324FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16;2526void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor)27{28fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);29}30#if FFX_HALF31void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor)32{33fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);34}35#endif3637#ifndef FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE38#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate39#endif4041FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight)42{43FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;44#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE45FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));46#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT47FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));48#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE49FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));50#else51#error "Invalid Lanczos type"52#endif53return fSampleWeight;54}5556#if FFX_HALF57FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight)58{59FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;60#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE61FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));62#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT63FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));64#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE65FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));6667// To Test: Save reciproqual sqrt compute68// FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));69#else70#error "Invalid Lanczos type"71#endif72return fSampleWeight;73}74#endif7576FfxFloat32 ComputeMaxKernelWeight() {77const FfxFloat32 fKernelSizeBias = 1.0f;7879FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias);8081return ffxMin(FfxFloat32(1.99f), fKernelWeight);82}8384FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params,85FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor)86{87#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF88#include "ffx_fsr2_force16_begin.h"89#endif90// We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly)91FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position92FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position93FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors...9495#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF96#include "ffx_fsr2_force16_end.h"97#endif9899FfxFloat32x3 fSamples[iLanczos2SampleCount];100101FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0102103FfxInt32x2 offsetTL;104offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1);105offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1);106107//Load samples108// If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3].109// Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox.110// This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values.111const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y;112const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x;113114FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL);115116FFX_UNROLL117for (FfxInt32 row = 0; row < 3; row++) {118119FFX_UNROLL120for (FfxInt32 col = 0; col < 3; col++) {121FfxInt32 iSampleIndex = col + (row << 2);122123FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);124FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow;125126const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize()));127128fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord));129}130}131132FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f);133134FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos);135136// Identify how much of each upsampled color to be used for this frame137const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample));138const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor);139140const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f));141const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor));142const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor);143144const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f));145146FFX_UNROLL147for (FfxInt32 row = 0; row < 3; row++) {148FFX_UNROLL149for (FfxInt32 col = 0; col < 3; col++) {150FfxInt32 iSampleIndex = col + (row << 2);151152const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);153const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow);154FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset;155156FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow;157158const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize())));159FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias));160161fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight);162163// Update rectification box164{165const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset);166const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq);167168const FfxBoolean bInitialSample = (row == 0) && (col == 0);169RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight);170}171}172}173174RectificationBoxComputeVarianceBoxData(clippingBox);175176fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR2_EPSILON);177178if (fColorAndWeight.w > FSR2_EPSILON) {179// Normalize for deringing (we need to compare colors)180fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w;181fColorAndWeight.w *= fUpsampleLanczosWeightScale;182183Deringing(clippingBox, fColorAndWeight.xyz);184}185186#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF187#include "ffx_fsr2_force16_end.h"188#endif189190return fColorAndWeight;191}192193#endif //!defined( FFX_FSR2_UPSAMPLE_H )194195196