CoCalc -- ffx_fsr2_depth

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h
⁹⁸⁹⁹ views
1
// This file is part of the FidelityFX SDK.
2
//
3
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
4
//
5
// Permission is hereby granted, free of charge, to any person obtaining a copy
6
// of this software and associated documentation files (the "Software"), to deal
7
// in the Software without restriction, including without limitation the rights
8
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
// copies of the Software, and to permit persons to whom the Software is
10
// furnished to do so, subject to the following conditions:
11
// The above copyright notice and this permission notice shall be included in
12
// all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
// THE SOFTWARE.
21

22
#ifndef FFX_FSR2_DEPTH_CLIP_H
23
#define FFX_FSR2_DEPTH_CLIP_H
24

25
FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f;
26

27
FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample)
28
{
29
    FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample);
30
    BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize());
31

32
    FfxFloat32 fDilatedSum = 0.0f;
33
    FfxFloat32 fDepth = 0.0f;
34
    FfxFloat32 fWeightSum = 0.0f;
35
    for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
36

37
        const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
38
        const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
39

40
        if (IsOnScreen(iSamplePos, RenderSize())) {
41
            const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
42
            if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
43

44
                const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos);
45
                const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample);
46

47
                const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
48

49
                if (fDepthDiff > 0.0f) {
50

51
#if FFX_FSR2_OPTION_INVERTED_DEPTH
52
                    const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample);
53
#else
54
                    const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample);
55
#endif
56
                    
57
                    const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth);
58
                    const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth);
59

60
                    const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize()));
61
                    const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
62

63
                    const FfxFloat32 Ksep = 1.37e-05f;
64
                    const FfxFloat32 Kfov = length(fCorner) / length(fCenter);
65
                    const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold;
66

67
                    const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f)));
68
                    const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor);
69
                    fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight;
70
                    fWeightSum += fWeight;
71
                }
72
            }
73
        }
74
    }
75

76
    return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f;
77
}
78

79
FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize)
80
{
81
    FfxFloat32 minconvergence = 1.0f;
82

83
    FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos);
84
    FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize());
85
    FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus);
86

87
    const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f;
88

89
    if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) {
90
        for (FfxInt32 y = -1; y <= 1; ++y) {
91
            for (FfxInt32 x = -1; x <= 1; ++x) {
92

93
                FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize);
94

95
                FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp);
96
                FfxFloat32 fVelocityUv = length(fMotionVector);
97

98
                fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
99
                fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
100
                minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv));
101
            }
102
        }
103
    }
104

105
    return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f);
106
}
107

108
FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos)
109
{
110
    const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters();
111
    FfxFloat32 fDepthMax = 0.0f;
112
    FfxFloat32 fDepthMin = fMaxDistInMeters;
113

114
    FfxInt32 iMaxDistFound = 0;
115

116
    for (FfxInt32 y = -1; y < 2; y++) {
117
        for (FfxInt32 x = -1; x < 2; x++) {
118

119
            const FfxInt32x2 iOffset = FfxInt32x2(x, y);
120
            const FfxInt32x2 iSamplePos = iPxPos + iOffset;
121

122
            const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, RenderSize()) ? 1.0f : 0.0f;
123
            FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor;
124

125
            iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth);
126

127
            fDepthMin = ffxMin(fDepthMin, fDepth);
128
            fDepthMax = ffxMax(fDepthMax, fDepth);
129
        }
130
    }
131

132
    return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f);
133
}
134

135
FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos)
136
{
137
    const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize();
138

139
    FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
140
    FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
141
    fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize());
142
    FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv);
143

144
    float fPxDistance = length(fMotionVector * DisplaySize());
145
    return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0;
146
}
147

148
void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence)
149
{
150
    // Compensate for bilinear sampling in accumulation pass
151

152
    FfxFloat32x3 fReferenceColor = LoadInputColor(iPxLrPos).xyz;
153
    FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence);
154

155
    float fMasksSum = 0.0f;
156

157
    FfxFloat32x3 fColorSamples[9];
158
    FfxFloat32 fReactiveSamples[9];
159
    FfxFloat32 fTransparencyAndCompositionSamples[9];
160

161
    FFX_UNROLL
162
    for (FfxInt32 y = -1; y < 2; y++) {
163
        FFX_UNROLL
164
        for (FfxInt32 x = -1; x < 2; x++) {
165

166
            const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));
167

168
            FfxInt32 sampleIdx = (y + 1) * 3 + x + 1;
169

170
            FfxFloat32x3 fColorSample = LoadInputColor(sampleCoord).xyz;
171
            FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord);
172
            FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord);
173

174
            fColorSamples[sampleIdx] = fColorSample;
175
            fReactiveSamples[sampleIdx] = fReactiveSample;
176
            fTransparencyAndCompositionSamples[sampleIdx] = fTransparencyAndCompositionSample;
177

178
            fMasksSum += (fReactiveSample + fTransparencyAndCompositionSample);
179
        }
180
    }
181

182
    if (fMasksSum > 0)
183
    {
184
        for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++)
185
        {
186
            FfxFloat32x3 fColorSample = fColorSamples[sampleIdx];
187
            FfxFloat32 fReactiveSample = fReactiveSamples[sampleIdx];
188
            FfxFloat32 fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx];
189

190
            const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample));
191
            const FfxFloat32 fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq;
192

193
            // Increase power for non-similar samples
194
            const FfxFloat32 fPowerBiasMax = 6.0f;
195
            const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax);
196
            const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower);
197
            const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower);
198

199
            fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample));
200
        }
201
    }
202

203
    StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor);
204
}
205

206
FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos)
207
{
208
    //We assume linear data. if non-linear input (sRGB, ...),
209
    //then we should convert to linear first and back to sRGB on output.
210
    FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
211

212
    fRgb = PrepareRgb(fRgb, Exposure(), PreExposure());
213

214
    const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb);
215

216
    return fPreparedYCoCg;
217
}
218

219
FfxFloat32 EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector)
220
{
221
    FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1)));
222
    FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0)));
223
    FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1)));
224

225
    return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f)));
226
}
227

228
void DepthClip(FfxInt32x2 iPxPos)
229
{
230
    FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize();
231
    FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
232

233
    // Discard tiny mvs
234
    fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f);
235

236
    const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector;
237
    const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos);
238
    const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos));
239

240
    // Compute prepared input color and depth clip
241
    FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector);
242
    FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos);
243
    StorePreparedInputColor(iPxPos, FfxFloat32x4(fPreparedYCoCg, fDepthClip));
244

245
    // Compute dilated reactive mask
246
#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
247
    FfxInt32x2 iSamplePos = iPxPos;
248
#else
249
    FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos);
250
#endif
251

252
    FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize());
253
    FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos));
254

255
    PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence));
256
}
257

258
#endif //!defined( FFX_FSR2_DEPTH_CLIPH )
259
Product

Resources

Company