CoCalc -- ffx_fsr2_tcr

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h
⁹⁹⁰⁰ views
1
// This file is part of the FidelityFX SDK.
2
//
3
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
4
//
5
// Permission is hereby granted, free of charge, to any person obtaining a copy
6
// of this software and associated documentation files (the "Software"), to deal
7
// in the Software without restriction, including without limitation the rights
8
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
// copies of the Software, and to permit persons to whom the Software is
10
// furnished to do so, subject to the following conditions:
11
// The above copyright notice and this permission notice shall be included in
12
// all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
// THE SOFTWARE.
21

22
#define USE_YCOCG 1
23

24
#define fAutogenEpsilon 0.01f
25

26
// EXPERIMENTAL
27

28
FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
29
{
30
    FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
31
    FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
32
    FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
33
    FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
34

35
#if USE_YCOCG    
36
    colorPreAlpha = RGBToYCoCg(colorPreAlpha);
37
    colorPostAlpha = RGBToYCoCg(colorPostAlpha);
38
    colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
39
    colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
40
#endif
41

42
    FfxFloat32x3 colorDeltaCurr = colorPostAlpha - colorPreAlpha;
43
    FfxFloat32x3 colorDeltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;
44
    bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDeltaCurr), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
45
    bool hadAlpha = any(FFX_GREATER_THAN(abs(colorDeltaPrev), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
46

47
    FfxFloat32x3 X = colorPreAlpha;
48
    FfxFloat32x3 Y = colorPostAlpha;
49
    FfxFloat32x3 Z = colorPrevPreAlpha;
50
    FfxFloat32x3 W = colorPrevPostAlpha;
51

52
    FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1))));
53

54
    // cleanup very small values
55
    retVal = (retVal < getTcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f);
56

57
    return retVal;
58
}
59

60
// works ok: thin edges
61
FFX_MIN16_F ComputeAutoTC_02(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
62
{
63
    FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
64
    FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
65
    FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
66
    FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
67

68
#if USE_YCOCG    
69
    colorPreAlpha = RGBToYCoCg(colorPreAlpha);
70
    colorPostAlpha = RGBToYCoCg(colorPostAlpha);
71
    colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
72
    colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
73
#endif
74

75
    FfxFloat32x3 colorDelta = colorPostAlpha - colorPreAlpha;
76
    FfxFloat32x3 colorPrevDelta = colorPrevPostAlpha - colorPrevPreAlpha;
77
    bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
78
    bool hadAlpha = any(FFX_GREATER_THAN(abs(colorPrevDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
79

80
    FfxFloat32x3 delta = colorPostAlpha - colorPreAlpha;              //prev+1*d = post   => d = color, alpha =
81
    FfxFloat32x3 deltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;
82

83
    FfxFloat32x3 X = colorPrevPreAlpha;
84
    FfxFloat32x3 N = colorPreAlpha - colorPrevPreAlpha;
85
    FfxFloat32x3 YAminusXA = colorPrevPostAlpha - colorPrevPreAlpha;
86
    FfxFloat32x3 NminusNA = colorPostAlpha - colorPrevPostAlpha;
87

88
    FfxFloat32x3 A = (hasAlpha || hadAlpha) ? NminusNA / max(FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon), N) : FfxFloat32x3(0, 0, 0);
89

90
    FFX_MIN16_F retVal = FFX_MIN16_F( max(max(A.x, A.y), A.z) );
91

92
    // only pixels that have significantly changed in color shuold be considered
93
    retVal = ffxSaturate(retVal * FFX_MIN16_F(length(colorPostAlpha - colorPrevPostAlpha)) );
94

95
    return retVal;
96
}
97

98
// This function computes the TransparencyAndComposition mask:
99
// This mask indicates pixels that should discard locks and apply color clamping.
100
// 
101
// Typically this is the case for translucent pixels (that don't write depth values) or pixels where the correctness of 
102
// the MVs can not be guaranteed (e.g. procedutal movement or vegetation that does not have MVs to reduce the cost during rasterization)
103
// Also, large changes in color due to changed lighting should be marked to remove locks on pixels with "old" lighting.
104
//
105
// This function takes a opaque only and a final texture and uses internal copies of those textures from the last frame.
106
// The function tries to determine where the color changes between opaque only and final image to determine the pixels that use transparency.
107
// Also it uses the previous frames and detects where the use of transparency changed to mark those pixels.
108
// Additionally it marks pixels where the color changed significantly in the opaque only image, e.g. due to lighting or texture animation.
109
// 
110
// In the final step it stores the current textures in internal textures for the next frame
111

112
FFX_MIN16_F ComputeTransparencyAndComposition(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
113
{
114
    FFX_MIN16_F retVal = ComputeAutoTC_02(uDispatchThreadId, iPrevIdx);
115

116
    // [branch]
117
    if (retVal > FFX_MIN16_F(0.01f))
118
    {
119
        retVal = ComputeAutoTC_01(uDispatchThreadId, iPrevIdx);
120
    }
121
    return retVal;
122
}
123

124
float computeSolidEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)
125
{
126
    float lum[9];
127
    int i = 0;
128
    for (int y = -1; y < 2; ++y)
129
    {
130
        for (int x = -1; x < 2; ++x)
131
        {
132
            FfxFloat32x3 curCol  = LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb;
133
            FfxFloat32x3 prevCol = LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb;
134
            lum[i++] = length(curCol - prevCol);
135
        }
136
    }
137

138
    //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);
139
    //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);
140

141
    //return sqrt(gradX * gradX + gradY * gradY);
142

143
    float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);
144
    float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);
145

146
    return sqrt(sqrt(gradX * gradY));
147
}
148

149
float computeAlphaEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)
150
{
151
    float lum[9];
152
    int i = 0;
153
    for (int y = -1; y < 2; ++y)
154
    {
155
        for (int x = -1; x < 2; ++x)
156
        {
157
            FfxFloat32x3 curCol  = abs(LoadInputColor(curPos + FFX_MIN16_I2(x, y)).rgb - LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb);
158
            FfxFloat32x3 prevCol = abs(LoadPrevPostAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb - LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb);
159
            lum[i++] = length(curCol - prevCol);
160
        }
161
    }
162

163
    //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);
164
    //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);
165

166
    //return sqrt(gradX * gradX + gradY * gradY);
167

168
    float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);
169
    float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);
170

171
    return sqrt(sqrt(gradX * gradY));
172
}
173

174
FFX_MIN16_F ComputeAabbOverlap(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
175
{
176
    FFX_MIN16_F retVal = FFX_MIN16_F(0.f);
177

178
    FfxFloat32x2 fMotionVector = LoadInputMotionVector(uDispatchThreadId);
179
    FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
180
    FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
181
    FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
182
    FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
183

184
#if USE_YCOCG    
185
    colorPreAlpha = RGBToYCoCg(colorPreAlpha);
186
    colorPostAlpha = RGBToYCoCg(colorPostAlpha);
187
    colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
188
    colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
189
#endif
190
    FfxFloat32x3 minPrev = FFX_MIN16_F3(+1000.f, +1000.f, +1000.f);
191
    FfxFloat32x3 maxPrev = FFX_MIN16_F3(-1000.f, -1000.f, -1000.f);
192
    for (int y = -1; y < 2; ++y)
193
    {
194
        for (int x = -1; x < 2; ++x)
195
        {
196
            FfxFloat32x3 W = LoadPrevPostAlpha(iPrevIdx + FFX_MIN16_I2(x, y));
197

198
#if USE_YCOCG
199
            W = RGBToYCoCg(W);
200
#endif
201
            minPrev = min(minPrev, W);
202
            maxPrev = max(maxPrev, W);
203
        }
204
    }
205
    // instead of computing the overlap: simply count how many samples are outside
206
    // set reactive based on that
207
    FFX_MIN16_F count = FFX_MIN16_F(0.f);
208
    for (int y = -1; y < 2; ++y)
209
    {
210
        for (int x = -1; x < 2; ++x)
211
        {
212
            FfxFloat32x3 Y = LoadInputColor(uDispatchThreadId + FFX_MIN16_I2(x, y));
213

214
#if USE_YCOCG
215
            Y = RGBToYCoCg(Y);
216
#endif
217
            count += ((Y.x < minPrev.x) || (Y.x > maxPrev.x)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
218
            count += ((Y.y < minPrev.y) || (Y.y > maxPrev.y)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
219
            count += ((Y.z < minPrev.z) || (Y.z > maxPrev.z)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
220
        }
221
    }
222
    retVal = count / FFX_MIN16_F(27.f);
223

224
    return retVal;
225
}
226

227

228
// This function computes the Reactive mask:
229
// We want pixels marked where the alpha portion of the frame changes a lot between neighbours
230
// Those pixels are expected to change quickly between frames, too. (e.g. small particles, reflections on curved surfaces...)
231
// As a result history would not be trustworthy.
232
// On the other hand we don't want pixels marked where pre-alpha has a large differnce, since those would profit from accumulation
233
// For mirrors we may assume the pre-alpha is pretty uniform color.
234
// 
235
// This works well generally, but also marks edge pixels
236
FFX_MIN16_F ComputeReactive(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
237
{
238
    // we only get here if alpha has a significant contribution and has changed since last frame.
239
    FFX_MIN16_F retVal = FFX_MIN16_F(0.f);
240

241
    // mark pixels with huge variance in alpha as reactive
242
    FFX_MIN16_F alphaEdge = FFX_MIN16_F(computeAlphaEdge(uDispatchThreadId, iPrevIdx));
243
    FFX_MIN16_F opaqueEdge = FFX_MIN16_F(computeSolidEdge(uDispatchThreadId, iPrevIdx));
244
    retVal = ffxSaturate(alphaEdge - opaqueEdge);
245

246
    // the above also marks edge pixels due to jitter, so we need to cancel those out
247

248

249
    return retVal;
250
}
251

252
Product

Resources

Company