Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
9917 views
1
// This file is part of the FidelityFX SDK.
2
//
3
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
4
//
5
// Permission is hereby granted, free of charge, to any person obtaining a copy
6
// of this software and associated documentation files (the "Software"), to deal
7
// in the Software without restriction, including without limitation the rights
8
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
// copies of the Software, and to permit persons to whom the Software is
10
// furnished to do so, subject to the following conditions:
11
// The above copyright notice and this permission notice shall be included in
12
// all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
// THE SOFTWARE.
21
22
#ifndef FFX_FSR2_SAMPLE_H
23
#define FFX_FSR2_SAMPLE_H
24
25
// suppress warnings
26
#ifdef FFX_HLSL
27
#pragma warning(disable: 4008) // potentially divide by zero
28
#endif //FFX_HLSL
29
30
struct FetchedBilinearSamples {
31
32
FfxFloat32x4 fColor00;
33
FfxFloat32x4 fColor10;
34
35
FfxFloat32x4 fColor01;
36
FfxFloat32x4 fColor11;
37
};
38
39
struct FetchedBicubicSamples {
40
41
FfxFloat32x4 fColor00;
42
FfxFloat32x4 fColor10;
43
FfxFloat32x4 fColor20;
44
FfxFloat32x4 fColor30;
45
46
FfxFloat32x4 fColor01;
47
FfxFloat32x4 fColor11;
48
FfxFloat32x4 fColor21;
49
FfxFloat32x4 fColor31;
50
51
FfxFloat32x4 fColor02;
52
FfxFloat32x4 fColor12;
53
FfxFloat32x4 fColor22;
54
FfxFloat32x4 fColor32;
55
56
FfxFloat32x4 fColor03;
57
FfxFloat32x4 fColor13;
58
FfxFloat32x4 fColor23;
59
FfxFloat32x4 fColor33;
60
};
61
62
#if FFX_HALF
63
struct FetchedBilinearSamplesMin16 {
64
65
FFX_MIN16_F4 fColor00;
66
FFX_MIN16_F4 fColor10;
67
68
FFX_MIN16_F4 fColor01;
69
FFX_MIN16_F4 fColor11;
70
};
71
72
struct FetchedBicubicSamplesMin16 {
73
74
FFX_MIN16_F4 fColor00;
75
FFX_MIN16_F4 fColor10;
76
FFX_MIN16_F4 fColor20;
77
FFX_MIN16_F4 fColor30;
78
79
FFX_MIN16_F4 fColor01;
80
FFX_MIN16_F4 fColor11;
81
FFX_MIN16_F4 fColor21;
82
FFX_MIN16_F4 fColor31;
83
84
FFX_MIN16_F4 fColor02;
85
FFX_MIN16_F4 fColor12;
86
FFX_MIN16_F4 fColor22;
87
FFX_MIN16_F4 fColor32;
88
89
FFX_MIN16_F4 fColor03;
90
FFX_MIN16_F4 fColor13;
91
FFX_MIN16_F4 fColor23;
92
FFX_MIN16_F4 fColor33;
93
};
94
#else //FFX_HALF
95
#define FetchedBicubicSamplesMin16 FetchedBicubicSamples
96
#define FetchedBilinearSamplesMin16 FetchedBilinearSamples
97
#endif //FFX_HALF
98
99
FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t)
100
{
101
return A + (B - A) * t;
102
}
103
104
FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac)
105
{
106
FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
107
FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
108
FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
109
return fColorXY;
110
}
111
112
#if FFX_HALF
113
FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t)
114
{
115
return A + (B - A) * t;
116
}
117
118
FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac)
119
{
120
FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
121
FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
122
FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
123
return fColorXY;
124
}
125
#endif
126
127
FfxFloat32 Lanczos2NoClamp(FfxFloat32 x)
128
{
129
const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants
130
return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x));
131
}
132
133
FfxFloat32 Lanczos2(FfxFloat32 x)
134
{
135
x = ffxMin(abs(x), 2.0f);
136
return Lanczos2NoClamp(x);
137
}
138
139
#if FFX_HALF
140
141
#if 0
142
FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x)
143
{
144
const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants
145
return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x));
146
}
147
#endif
148
149
FFX_MIN16_F Lanczos2(FFX_MIN16_F x)
150
{
151
x = ffxMin(abs(x), FFX_MIN16_F(2.0f));
152
return FFX_MIN16_F(Lanczos2NoClamp(x));
153
}
154
#endif //FFX_HALF
155
156
// FSR1 lanczos approximation. Input is x*x and must be <= 4.
157
FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2)
158
{
159
FfxFloat32 a = (2.0f / 5.0f) * x2 - 1;
160
FfxFloat32 b = (1.0f / 4.0f) * x2 - 1;
161
return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b);
162
}
163
164
#if FFX_HALF
165
FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2)
166
{
167
FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1);
168
FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1);
169
return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b);
170
}
171
#endif //FFX_HALF
172
173
FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2)
174
{
175
x2 = ffxMin(x2, 4.0f);
176
return Lanczos2ApproxSqNoClamp(x2);
177
}
178
179
#if FFX_HALF
180
FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2)
181
{
182
x2 = ffxMin(x2, FFX_MIN16_F(4.0f));
183
return Lanczos2ApproxSqNoClamp(x2);
184
}
185
#endif //FFX_HALF
186
187
FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x)
188
{
189
return Lanczos2ApproxSqNoClamp(x * x);
190
}
191
192
#if FFX_HALF
193
FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x)
194
{
195
return Lanczos2ApproxSqNoClamp(x * x);
196
}
197
#endif //FFX_HALF
198
199
FfxFloat32 Lanczos2Approx(FfxFloat32 x)
200
{
201
return Lanczos2ApproxSq(x * x);
202
}
203
204
#if FFX_HALF
205
FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x)
206
{
207
return Lanczos2ApproxSq(x * x);
208
}
209
#endif //FFX_HALF
210
211
FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x)
212
{
213
return SampleLanczos2Weight(abs(x));
214
}
215
216
#if FFX_HALF
217
FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x)
218
{
219
return FFX_MIN16_F(SampleLanczos2Weight(abs(x)));
220
}
221
#endif //FFX_HALF
222
223
FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
224
{
225
FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t);
226
FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t);
227
FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t);
228
FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t);
229
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
230
}
231
#if FFX_HALF
232
FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
233
{
234
FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t);
235
FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t);
236
FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t);
237
FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t);
238
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
239
}
240
#endif
241
242
FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
243
{
244
FfxFloat32 fWeight0 = Lanczos2(-1.f - t);
245
FfxFloat32 fWeight1 = Lanczos2(-0.f - t);
246
FfxFloat32 fWeight2 = Lanczos2(+1.f - t);
247
FfxFloat32 fWeight3 = Lanczos2(+2.f - t);
248
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
249
}
250
251
FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
252
{
253
FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
254
FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
255
FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
256
FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
257
FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
258
259
// Deringing
260
261
// TODO: only use 4 by checking jitter
262
const FfxInt32 iDeringingSampleCount = 4;
263
const FfxFloat32x4 fDeringingSamples[4] = {
264
Samples.fColor11,
265
Samples.fColor21,
266
Samples.fColor12,
267
Samples.fColor22,
268
};
269
270
FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
271
FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
272
273
FFX_UNROLL
274
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
275
276
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
277
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
278
}
279
280
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
281
282
return fColorXY;
283
}
284
285
#if FFX_HALF
286
FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
287
{
288
FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t);
289
FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t);
290
FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t);
291
FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t);
292
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
293
}
294
295
FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
296
{
297
FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
298
FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
299
FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
300
FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
301
FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
302
303
// Deringing
304
305
// TODO: only use 4 by checking jitter
306
const FfxInt32 iDeringingSampleCount = 4;
307
const FFX_MIN16_F4 fDeringingSamples[4] = {
308
Samples.fColor11,
309
Samples.fColor21,
310
Samples.fColor12,
311
Samples.fColor22,
312
};
313
314
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
315
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
316
317
FFX_UNROLL
318
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
319
{
320
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
321
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
322
}
323
324
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
325
326
return fColorXY;
327
}
328
#endif //FFX_HALF
329
330
331
FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
332
{
333
FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
334
FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
335
FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
336
FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
337
FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
338
339
// Deringing
340
341
// TODO: only use 4 by checking jitter
342
const FfxInt32 iDeringingSampleCount = 4;
343
const FfxFloat32x4 fDeringingSamples[4] = {
344
Samples.fColor11,
345
Samples.fColor21,
346
Samples.fColor12,
347
Samples.fColor22,
348
};
349
350
FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
351
FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
352
353
FFX_UNROLL
354
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
355
356
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
357
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
358
}
359
360
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
361
362
return fColorXY;
363
}
364
365
#if FFX_HALF
366
FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
367
{
368
FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
369
FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
370
FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
371
FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
372
FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
373
374
// Deringing
375
376
// TODO: only use 4 by checking jitter
377
const FfxInt32 iDeringingSampleCount = 4;
378
const FFX_MIN16_F4 fDeringingSamples[4] = {
379
Samples.fColor11,
380
Samples.fColor21,
381
Samples.fColor12,
382
Samples.fColor22,
383
};
384
385
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
386
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
387
388
FFX_UNROLL
389
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
390
{
391
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
392
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
393
}
394
395
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
396
397
return fColorXY;
398
}
399
#endif //FFX_HALF
400
401
402
403
FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
404
{
405
FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t);
406
FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t);
407
FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t);
408
FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t);
409
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
410
}
411
412
#if FFX_HALF
413
FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
414
{
415
FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t);
416
FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t);
417
FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t);
418
FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t);
419
return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
420
}
421
#endif //FFX_HALF
422
423
FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
424
{
425
FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
426
FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
427
FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
428
FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
429
FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
430
431
// Deringing
432
433
// TODO: only use 4 by checking jitter
434
const FfxInt32 iDeringingSampleCount = 4;
435
const FfxFloat32x4 fDeringingSamples[4] = {
436
Samples.fColor11,
437
Samples.fColor21,
438
Samples.fColor12,
439
Samples.fColor22,
440
};
441
442
FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
443
FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
444
445
FFX_UNROLL
446
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
447
{
448
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
449
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
450
}
451
452
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
453
454
return fColorXY;
455
}
456
457
#if FFX_HALF
458
FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
459
{
460
FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
461
FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
462
FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
463
FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
464
FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
465
466
// Deringing
467
468
// TODO: only use 4 by checking jitter
469
const FfxInt32 iDeringingSampleCount = 4;
470
const FFX_MIN16_F4 fDeringingSamples[4] = {
471
Samples.fColor11,
472
Samples.fColor21,
473
Samples.fColor12,
474
Samples.fColor22,
475
};
476
477
FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
478
FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
479
480
FFX_UNROLL
481
for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
482
{
483
fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
484
fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
485
}
486
487
fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
488
489
return fColorXY;
490
}
491
#endif
492
493
// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant.
494
FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
495
{
496
FfxInt32x2 result = iPxSample + iPxOffset;
497
result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
498
result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
499
result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
500
result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
501
return result;
502
}
503
#if FFX_HALF
504
FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
505
{
506
FFX_MIN16_I2 result = iPxSample + iPxOffset;
507
result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
508
result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
509
result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
510
result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
511
return result;
512
}
513
#endif //FFX_HALF
514
515
516
#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \
517
SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
518
{ \
519
SampleType Samples; \
520
\
521
Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \
522
Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \
523
Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \
524
Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \
525
\
526
Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \
527
Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
528
Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
529
Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \
530
\
531
Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \
532
Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
533
Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
534
Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \
535
\
536
Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \
537
Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \
538
Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \
539
Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \
540
\
541
return Samples; \
542
}
543
544
#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \
545
DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
546
547
#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \
548
DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
549
550
#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \
551
SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
552
{ \
553
SampleType Samples; \
554
Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
555
Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
556
Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
557
Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
558
return Samples; \
559
}
560
561
#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \
562
DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
563
564
#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \
565
DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
566
567
// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)
568
// is common, so iPxSample can "jitter"
569
#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \
570
FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
571
{ \
572
FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \
573
/* Clamp base coords */ \
574
fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \
575
fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \
576
/* */ \
577
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
578
FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \
579
FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
580
return fColorXY; \
581
}
582
583
#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \
584
FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
585
{ \
586
FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \
587
/* Clamp base coords */ \
588
fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \
589
fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \
590
/* */ \
591
FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
592
FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \
593
FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
594
return fColorXY; \
595
}
596
597
#define FFX_FSR2_CONCAT_ID(x, y) x ## y
598
#define FFX_FSR2_CONCAT(x, y) FFX_FSR2_CONCAT_ID(x, y)
599
#define FFX_FSR2_SAMPLER_1D_0 Lanczos2
600
#define FFX_FSR2_SAMPLER_1D_1 Lanczos2LUT
601
#define FFX_FSR2_SAMPLER_1D_2 Lanczos2Approx
602
603
#define FFX_FSR2_GET_LANCZOS_SAMPLER1D(x) FFX_FSR2_CONCAT(FFX_FSR2_SAMPLER_1D_, x)
604
605
#endif //!defined( FFX_FSR2_SAMPLE_H )
606
607