CoCalc -- ffx_fsr2_compute_luminance

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h
⁹⁹⁰⁰ views
1
// This file is part of the FidelityFX SDK.
2
//
3
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
4
//
5
// Permission is hereby granted, free of charge, to any person obtaining a copy
6
// of this software and associated documentation files (the "Software"), to deal
7
// in the Software without restriction, including without limitation the rights
8
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
// copies of the Software, and to permit persons to whom the Software is
10
// furnished to do so, subject to the following conditions:
11
// The above copyright notice and this permission notice shall be included in
12
// all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
// THE SOFTWARE.
21

22
FFX_GROUPSHARED FfxUInt32 spdCounter;
23

24
#ifndef SPD_PACKED_ONLY
25
FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16];
26
FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16];
27
FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16];
28
FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16];
29

30
FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice)
31
{
32
    FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize();
33
    fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions());
34
    FfxFloat32x3 fRgb = SampleInputColor(fUv);
35

36
    fRgb /= PreExposure();
37
   
38
    //compute log luma
39
    const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb)));
40

41
    // Make sure out of screen pixels contribute no value to the end result
42
    const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f;
43

44
    return FfxFloat32x4(result, 0, 0, 0);
45
}
46

47
FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice)
48
{
49
    return SPD_LoadMipmap5(tex);
50
}
51

52
void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice)
53
{
54
    if (index == LumaMipLevelToUse() || index == 5)
55
    {
56
        SPD_SetMipmap(pix, index, outValue.r);
57
    }
58

59
    if (index == MipCount() - 1) { //accumulate on 1x1 level
60

61
        if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0))))
62
        {
63
            FfxFloat32 prev = SPD_LoadExposureBuffer().y;
64
            FfxFloat32 result = outValue.r;
65

66
            if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values
67
            {
68
                FfxFloat32 rate = 1.0f;
69
                result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate));
70
            }
71
            FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result);
72
            SPD_SetExposureBuffer(spdOutput);
73
        }
74
    }
75
}
76

77
void SpdIncreaseAtomicCounter(FfxUInt32 slice)
78
{
79
    SPD_IncreaseAtomicCounter(spdCounter);
80
}
81

82
FfxUInt32 SpdGetAtomicCounter()
83
{
84
    return spdCounter;
85
}
86

87
void SpdResetAtomicCounter(FfxUInt32 slice)
88
{
89
    SPD_ResetAtomicCounter();
90
}
91

92
FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
93
{
94
    return FfxFloat32x4(
95
        spdIntermediateR[x][y],
96
        spdIntermediateG[x][y],
97
        spdIntermediateB[x][y],
98
        spdIntermediateA[x][y]);
99
}
100
void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
101
{
102
    spdIntermediateR[x][y] = value.x;
103
    spdIntermediateG[x][y] = value.y;
104
    spdIntermediateB[x][y] = value.z;
105
    spdIntermediateA[x][y] = value.w;
106
}
107
FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
108
{
109
    return (v0 + v1 + v2 + v3) * 0.25f;
110
}
111
#endif
112

113
// define fetch and store functions Packed
114
#if FFX_HALF
115
#error Callback must be implemented
116

117
FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16];
118
FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16];
119

120
FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice)
121
{
122
    return FfxFloat16x4(imgDst[0][FfxFloat32x3(tex, slice)]);
123
}
124
FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice)
125
{
126
    return FfxFloat16x4(imgDst6[FfxUInt32x3(p, slice)]);
127
}
128
void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice)
129
{
130
    if (index == LumaMipLevelToUse() || index == 5)
131
    {
132
        imgDst6[FfxUInt32x3(p, slice)] = FfxFloat32x4(value);
133
        return;
134
    }
135
    imgDst[mip + 1][FfxUInt32x3(p, slice)] = FfxFloat32x4(value);
136
}
137
void SpdIncreaseAtomicCounter(FfxUInt32 slice)
138
{
139
    InterlockedAdd(rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice], 1, spdCounter);
140
}
141
FfxUInt32 SpdGetAtomicCounter()
142
{
143
    return spdCounter;
144
}
145
void SpdResetAtomicCounter(FfxUInt32 slice)
146
{
147
    rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice] = 0;
148
}
149
FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y)
150
{
151
    return FfxFloat16x4(
152
        spdIntermediateRG[x][y].x,
153
        spdIntermediateRG[x][y].y,
154
        spdIntermediateBA[x][y].x,
155
        spdIntermediateBA[x][y].y);
156
}
157
void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value)
158
{
159
    spdIntermediateRG[x][y] = value.xy;
160
    spdIntermediateBA[x][y] = value.zw;
161
}
162
FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3)
163
{
164
    return (v0 + v1 + v2 + v3) * FfxFloat16(0.25);
165
}
166
#endif
167

168
#include "ffx_spd.h"
169

170
void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex)
171
{
172
#if FFX_HALF
173
    SpdDownsampleH(
174
        FfxUInt32x2(WorkGroupId.xy),
175
        FfxUInt32(LocalThreadIndex),
176
        FfxUInt32(MipCount()),
177
        FfxUInt32(NumWorkGroups()),
178
        FfxUInt32(WorkGroupId.z),
179
        FfxUInt32x2(WorkGroupOffset()));
180
#else
181
    SpdDownsample(
182
        FfxUInt32x2(WorkGroupId.xy),
183
        FfxUInt32(LocalThreadIndex),
184
        FfxUInt32(MipCount()),
185
        FfxUInt32(NumWorkGroups()),
186
        FfxUInt32(WorkGroupId.z),
187
        FfxUInt32x2(WorkGroupOffset()));
188
#endif
189
}
190
Product

Resources

Company