CoCalc -- ffx_fsr2

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/amd-fsr2/shaders/ffx_fsr2_upsample.h
⁹⁸⁹⁸ views
1
// This file is part of the FidelityFX SDK.
2
//
3
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
4
//
5
// Permission is hereby granted, free of charge, to any person obtaining a copy
6
// of this software and associated documentation files (the "Software"), to deal
7
// in the Software without restriction, including without limitation the rights
8
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
// copies of the Software, and to permit persons to whom the Software is
10
// furnished to do so, subject to the following conditions:
11
// The above copyright notice and this permission notice shall be included in
12
// all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
// THE SOFTWARE.
21

22
#ifndef FFX_FSR2_UPSAMPLE_H
23
#define FFX_FSR2_UPSAMPLE_H
24

25
FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16;
26

27
void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor)
28
{
29
    fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
30
}
31
#if FFX_HALF
32
void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor)
33
{
34
    fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
35
}
36
#endif
37

38
#ifndef FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE
39
#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate
40
#endif
41

42
FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight)
43
{
44
    FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
45
#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
46
    FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
47
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
48
    FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
49
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
50
    FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
51
#else
52
#error "Invalid Lanczos type"
53
#endif
54
    return fSampleWeight;
55
}
56

57
#if FFX_HALF
58
FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight)
59
{
60
    FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
61
#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
62
    FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
63
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
64
    FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
65
#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
66
    FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
67

68
    // To Test: Save reciproqual sqrt compute
69
    // FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
70
#else
71
#error "Invalid Lanczos type"
72
#endif
73
    return fSampleWeight;
74
}
75
#endif
76

77
FfxFloat32 ComputeMaxKernelWeight() {
78
    const FfxFloat32 fKernelSizeBias = 1.0f;
79

80
    FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias);
81

82
    return ffxMin(FfxFloat32(1.99f), fKernelWeight);
83
}
84

85
FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params,
86
    FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor)
87
{
88
    #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
89
    #include "ffx_fsr2_force16_begin.h"
90
    #endif
91
    // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly)
92
    FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f);      // Destination resolution output pixel center position
93
    FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor();                   // Source resolution output pixel center position
94
    FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos));                     // TODO: what about weird upscale factors...
95

96
    #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
97
    #include "ffx_fsr2_force16_end.h"
98
    #endif
99

100
    FfxFloat32x3 fSamples[iLanczos2SampleCount];
101

102
    FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0
103

104
    FfxInt32x2 offsetTL;
105
    offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1);
106
    offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1);
107

108
    //Load samples
109
    // If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3].
110
    // Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox.
111
    // This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values.
112
    const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y;
113
    const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x;
114

115
    FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL);
116

117
    FFX_UNROLL
118
    for (FfxInt32 row = 0; row < 3; row++) {
119

120
        FFX_UNROLL
121
            for (FfxInt32 col = 0; col < 3; col++) {
122
                FfxInt32 iSampleIndex = col + (row << 2);
123

124
                FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);
125
                FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow;
126

127
                const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize()));
128

129
                fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord));
130
            }
131
    }
132

133
    FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f);
134

135
    FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos);
136

137
    // Identify how much of each upsampled color to be used for this frame
138
    const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample));
139
    const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor);
140

141
    const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f));
142
    const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor));
143
    const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor);
144

145
    const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f));
146

147
    FFX_UNROLL
148
    for (FfxInt32 row = 0; row < 3; row++) {
149
        FFX_UNROLL
150
        for (FfxInt32 col = 0; col < 3; col++) {
151
            FfxInt32 iSampleIndex = col + (row << 2);
152

153
            const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);
154
            const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow);
155
            FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset;
156

157
            FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow;
158

159
            const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize())));
160
            FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias));
161

162
            fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight);
163

164
            // Update rectification box
165
            {
166
                const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset);
167
                const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq);
168

169
                const FfxBoolean bInitialSample = (row == 0) && (col == 0);
170
                RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight);
171
            }
172
        }
173
    }
174

175
    RectificationBoxComputeVarianceBoxData(clippingBox);
176

177
    fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR2_EPSILON);
178

179
    if (fColorAndWeight.w > FSR2_EPSILON) {
180
        // Normalize for deringing (we need to compare colors)
181
        fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w;
182
        fColorAndWeight.w *= fUpsampleLanczosWeightScale;
183

184
        Deringing(clippingBox, fColorAndWeight.xyz);
185
    }
186

187
    #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
188
    #include "ffx_fsr2_force16_end.h"
189
    #endif
190

191
    return fColorAndWeight;
192
}
193

194
#endif //!defined( FFX_FSR2_UPSAMPLE_H )
195

196
Product

Resources

Company