CoCalc -- ffx_core

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/amd-fsr2/shaders/ffx_core_glsl.h
⁹⁹¹⁷ views
1
// This file is part of the FidelityFX SDK.
2
//
3
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
4
//
5
// Permission is hereby granted, free of charge, to any person obtaining a copy
6
// of this software and associated documentation files (the "Software"), to deal
7
// in the Software without restriction, including without limitation the rights
8
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
// copies of the Software, and to permit persons to whom the Software is
10
// furnished to do so, subject to the following conditions:
11
// The above copyright notice and this permission notice shall be included in
12
// all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
// THE SOFTWARE.
21

22
/// A define for abstracting shared memory between shading languages.
23
///
24
/// @ingroup GPU
25
#define FFX_GROUPSHARED shared
26

27
/// A define for abstracting compute memory barriers between shading languages.
28
///
29
/// @ingroup GPU
30
#define FFX_GROUP_MEMORY_BARRIER() barrier()
31

32
/// A define added to accept static markup on functions to aid CPU/GPU portability of code.
33
///
34
/// @ingroup GPU
35
#define FFX_STATIC
36

37
/// A define for abstracting loop unrolling between shading languages.
38
///
39
/// @ingroup GPU 
40
#define FFX_UNROLL
41

42
/// A define for abstracting a 'greater than' comparison operator between two types.
43
///
44
/// @ingroup GPU
45
#define FFX_GREATER_THAN(x, y) greaterThan(x, y)
46

47
/// A define for abstracting a 'greater than or equal' comparison operator between two types.
48
///
49
/// @ingroup GPU
50
#define FFX_GREATER_THAN_EQUAL(x, y) greaterThanEqual(x, y)
51

52
/// A define for abstracting a 'less than' comparison operator between two types.
53
///
54
/// @ingroup GPU
55
#define FFX_LESS_THAN(x, y) lessThan(x, y)
56

57
/// A define for abstracting a 'less than or equal' comparison operator between two types.
58
///
59
/// @ingroup GPU
60
#define FFX_LESS_THAN_EQUAL(x, y) lessThanEqual(x, y)
61

62
/// A define for abstracting an 'equal' comparison operator between two types.
63
///
64
/// @ingroup GPU
65
#define FFX_EQUAL(x, y) equal(x, y)
66

67
/// A define for abstracting a 'not equal' comparison operator between two types.
68
///
69
/// @ingroup GPU
70
#define FFX_NOT_EQUAL(x, y) notEqual(x, y)
71

72
/// Broadcast a scalar value to a 1-dimensional floating point vector.
73
///
74
/// @ingroup GPU
75
#define FFX_BROADCAST_FLOAT32(x)   FfxFloat32(x)
76

77
/// Broadcast a scalar value to a 2-dimensional floating point vector.
78
///
79
/// @ingroup GPU
80
#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32x2(FfxFloat32(x))
81

82
/// Broadcast a scalar value to a 3-dimensional floating point vector.
83
///
84
/// @ingroup GPU
85
#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32x3(FfxFloat32(x))
86

87
/// Broadcast a scalar value to a 4-dimensional floating point vector.
88
///
89
/// @ingroup GPU
90
#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32x4(FfxFloat32(x))
91

92
/// Broadcast a scalar value to a 1-dimensional unsigned integer vector.
93
///
94
/// @ingroup GPU
95
#define FFX_BROADCAST_UINT32(x)   FfxUInt32(x)
96

97
/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
98
///
99
/// @ingroup GPU
100
#define FFX_BROADCAST_UINT32X2(x) FfxUInt32x2(FfxUInt32(x))
101

102
/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.
103
///
104
/// @ingroup GPU
105
#define FFX_BROADCAST_UINT32X3(x) FfxUInt32x3(FfxUInt32(x))
106

107
/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
108
///
109
/// @ingroup GPU
110
#define FFX_BROADCAST_UINT32X4(x) FfxUInt32x4(FfxUInt32(x))
111

112
/// Broadcast a scalar value to a 1-dimensional signed integer vector.
113
///
114
/// @ingroup GPU
115
#define FFX_BROADCAST_INT32(x)   FfxInt32(x)
116

117
/// Broadcast a scalar value to a 2-dimensional signed integer vector.
118
///
119
/// @ingroup GPU
120
#define FFX_BROADCAST_INT32X2(x) FfxInt32x2(FfxInt32(x))
121

122
/// Broadcast a scalar value to a 3-dimensional signed integer vector.
123
///
124
/// @ingroup GPU
125
#define FFX_BROADCAST_INT32X3(x) FfxInt32x3(FfxInt32(x))
126

127
/// Broadcast a scalar value to a 4-dimensional signed integer vector.
128
///
129
/// @ingroup GPU
130
#define FFX_BROADCAST_INT32X4(x) FfxInt32x4(FfxInt32(x))
131

132
/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector.
133
///
134
/// @ingroup GPU
135
#define FFX_BROADCAST_MIN_FLOAT16(x)   FFX_MIN16_F(x)
136

137
/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector.
138
///
139
/// @ingroup GPU
140
#define FFX_BROADCAST_MIN_FLOAT16X2(x) FFX_MIN16_F2(FFX_MIN16_F(x))
141

142
/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector.
143
///
144
/// @ingroup GPU
145
#define FFX_BROADCAST_MIN_FLOAT16X3(x) FFX_MIN16_F3(FFX_MIN16_F(x))
146

147
/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector.
148
///
149
/// @ingroup GPU
150
#define FFX_BROADCAST_MIN_FLOAT16X4(x) FFX_MIN16_F4(FFX_MIN16_F(x))
151

152
/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector.
153
///
154
/// @ingroup GPU
155
#define FFX_BROADCAST_MIN_UINT16(x)   FFX_MIN16_U(x)
156

157
/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector.
158
///
159
/// @ingroup GPU
160
#define FFX_BROADCAST_MIN_UINT16X2(x) FFX_MIN16_U2(FFX_MIN16_U(x))
161

162
/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector.
163
///
164
/// @ingroup GPU
165
#define FFX_BROADCAST_MIN_UINT16X3(x) FFX_MIN16_U3(FFX_MIN16_U(x))
166

167
/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector.
168
///
169
/// @ingroup GPU
170
#define FFX_BROADCAST_MIN_UINT16X4(x) FFX_MIN16_U4(FFX_MIN16_U(x))
171

172
/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector.
173
///
174
/// @ingroup GPU
175
#define FFX_BROADCAST_MIN_INT16(x)   FFX_MIN16_I(x)
176

177
/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector.
178
///
179
/// @ingroup GPU
180
#define FFX_BROADCAST_MIN_INT16X2(x) FFX_MIN16_I2(FFX_MIN16_I(x))
181

182
/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector.
183
///
184
/// @ingroup GPU
185
#define FFX_BROADCAST_MIN_INT16X3(x) FFX_MIN16_I3(FFX_MIN16_I(x))
186

187
/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector.
188
///
189
/// @ingroup GPU
190
#define FFX_BROADCAST_MIN_INT16X4(x) FFX_MIN16_I4(FFX_MIN16_I(x))
191

192
#if !defined(FFX_SKIP_EXT)
193
#if FFX_HALF
194
    #extension GL_EXT_shader_16bit_storage : require
195
    #extension GL_EXT_shader_explicit_arithmetic_types : require
196
#endif // FFX_HALF
197

198
#if defined(FFX_LONG)
199
    #extension GL_ARB_gpu_shader_int64 : require
200
    #extension GL_NV_shader_atomic_int64 : require
201
#endif // #if defined(FFX_LONG)
202

203
#if defined(FFX_WAVE)
204
    #extension GL_KHR_shader_subgroup_arithmetic : require
205
    #extension GL_KHR_shader_subgroup_ballot : require
206
    #extension GL_KHR_shader_subgroup_quad : require
207
    #extension GL_KHR_shader_subgroup_shuffle : require
208
#endif // #if defined(FFX_WAVE)
209
#endif // #if !defined(FFX_SKIP_EXT)
210

211
// Forward declarations
212
FfxFloat32   ffxSqrt(FfxFloat32 x);
213
FfxFloat32x2 ffxSqrt(FfxFloat32x2 x);
214
FfxFloat32x3 ffxSqrt(FfxFloat32x3 x);
215
FfxFloat32x4 ffxSqrt(FfxFloat32x4 x);
216

217
/// Interprets the bit pattern of x as a floating-point number.
218
///
219
/// @param [in] value               The input value.
220
///
221
/// @returns
222
/// The input interpreted as a floating-point number.
223
///
224
/// @ingroup GLSL
225
FfxFloat32 ffxAsFloat(FfxUInt32 x)
226
{
227
    return uintBitsToFloat(x);
228
}
229

230
/// Interprets the bit pattern of x as a floating-point number.
231
///
232
/// @param [in] value               The input value.
233
///
234
/// @returns
235
/// The input interpreted as a floating-point number.
236
///
237
/// @ingroup GLSL
238
FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)
239
{
240
    return uintBitsToFloat(x);
241
}
242

243
/// Interprets the bit pattern of x as a floating-point number.
244
///
245
/// @param [in] value               The input value.
246
///
247
/// @returns
248
/// The input interpreted as a floating-point number.
249
///
250
/// @ingroup GLSL
251
FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)
252
{
253
    return uintBitsToFloat(x);
254
}
255

256
/// Interprets the bit pattern of x as a floating-point number.
257
///
258
/// @param [in] value               The input value.
259
///
260
/// @returns
261
/// The input interpreted as a floating-point number.
262
///
263
/// @ingroup GLSL
264
FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)
265
{
266
    return uintBitsToFloat(x);
267
}
268

269
/// Interprets the bit pattern of x as an unsigned integer.
270
///
271
/// @param [in] value               The input value.
272
///
273
/// @returns
274
/// The input interpreted as an unsigned integer.
275
///
276
/// @ingroup GLSL
277
FfxUInt32 ffxAsUInt32(FfxFloat32 x)
278
{
279
    return floatBitsToUint(x);
280
}
281

282
/// Interprets the bit pattern of x as an unsigned integer.
283
///
284
/// @param [in] value               The input value.
285
///
286
/// @returns
287
/// The input interpreted as an unsigned integer.
288
///
289
/// @ingroup GLSL
290
FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)
291
{
292
    return floatBitsToUint(x);
293
}
294

295
/// Interprets the bit pattern of x as an unsigned integer.
296
///
297
/// @param [in] value               The input value.
298
///
299
/// @returns
300
/// The input interpreted as an unsigned integer.
301
///
302
/// @ingroup GLSL
303
FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)
304
{
305
    return floatBitsToUint(x);
306
}
307

308
/// Interprets the bit pattern of x as an unsigned integer.
309
///
310
/// @param [in] value               The input value.
311
///
312
/// @returns
313
/// The input interpreted as an unsigned integer.
314
///
315
/// @ingroup GLSL
316
FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)
317
{
318
    return floatBitsToUint(x);
319
}
320

321
/// Convert a 32bit IEEE 754 floating point value to its nearest 16bit equivalent.
322
///
323
/// @param [in] value               The value to convert.
324
/// 
325
/// @returns
326
/// The nearest 16bit equivalent of <c><i>value</i></c>.
327
/// 
328
/// @ingroup GLSL
329
FfxUInt32 f32tof16(FfxFloat32 value)
330
{
331
    return packHalf2x16(FfxFloat32x2(value, 0.0));
332
}
333

334
/// Broadcast a scalar value to a 2-dimensional floating point vector.
335
///
336
/// @param [in] value               The value to to broadcast.
337
///
338
/// @returns
339
/// A 2-dimensional floating point vector with <c><i>value</i></c> in each component.
340
///
341
/// @ingroup GLSL
342
FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)
343
{
344
    return FfxFloat32x2(value, value);
345
}
346

347
/// Broadcast a scalar value to a 3-dimensional floating point vector.
348
///
349
/// @param [in] value               The value to to broadcast.
350
///
351
/// @returns
352
/// A 3-dimensional floating point vector with <c><i>value</i></c> in each component.
353
///
354
/// @ingroup GLSL
355
FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)
356
{
357
    return FfxFloat32x3(value, value, value);
358
}
359

360
/// Broadcast a scalar value to a 4-dimensional floating point vector.
361
///
362
/// @param [in] value               The value to to broadcast.
363
///
364
/// @returns
365
/// A 4-dimensional floating point vector with <c><i>value</i></c> in each component.
366
///
367
/// @ingroup GLSL
368
FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)
369
{
370
    return FfxFloat32x4(value, value, value, value);
371
}
372

373
/// Broadcast a scalar value to a 2-dimensional signed integer vector.
374
///
375
/// @param [in] value               The value to to broadcast.
376
///
377
/// @returns
378
/// A 2-dimensional signed integer vector with <c><i>value</i></c> in each component.
379
///
380
/// @ingroup GLSL
381
FfxInt32x2 ffxBroadcast2(FfxInt32 value)
382
{
383
    return FfxInt32x2(value, value);
384
}
385

386
/// Broadcast a scalar value to a 3-dimensional signed integer vector.
387
///
388
/// @param [in] value               The value to to broadcast.
389
///
390
/// @returns
391
/// A 3-dimensional signed integer vector with <c><i>value</i></c> in each component.
392
///
393
/// @ingroup GLSL
394
FfxInt32x3 ffxBroadcast3(FfxInt32 value)
395
{
396
    return FfxInt32x3(value, value, value);
397
}
398

399
/// Broadcast a scalar value to a 4-dimensional signed integer vector.
400
///
401
/// @param [in] value               The value to to broadcast.
402
///
403
/// @returns
404
/// A 4-dimensional signed integer vector with <c><i>value</i></c> in each component.
405
///
406
/// @ingroup GLSL
407
FfxInt32x4 ffxBroadcast4(FfxInt32 value)
408
{
409
    return FfxInt32x4(value, value, value, value);
410
}
411

412
/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
413
///
414
/// @param [in] value               The value to to broadcast.
415
///
416
/// @returns
417
/// A 2-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
418
///
419
/// @ingroup GLSL
420
FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)
421
{
422
    return FfxUInt32x2(value, value);
423
}
424

425
/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.
426
///
427
/// @param [in] value               The value to to broadcast.
428
///
429
/// @returns
430
/// A 3-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
431
///
432
/// @ingroup GLSL
433
FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)
434
{
435
    return FfxUInt32x3(value, value, value);
436
}
437

438
/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
439
///
440
/// @param [in] value               The value to to broadcast.
441
///
442
/// @returns
443
/// A 4-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
444
///
445
/// @ingroup GLSL
446
FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)
447
{
448
    return FfxUInt32x4(value, value, value, value);
449
}
450

451
///
452
///
453
/// @ingroup GLSL
454
FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)
455
{
456
    return bitfieldExtract(src, FfxInt32(off), FfxInt32(bits));
457
}
458

459
///
460
///
461
/// @ingroup GLSL
462
FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
463
{
464
    return (ins & mask) | (src & (~mask));
465
}
466

467
// Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<<bits)-1', and 'bits' needs to be an immediate.
468
///
469
///
470
/// @ingroup GLSL
471
FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)
472
{
473
    return bitfieldInsert(src, ins, 0, FfxInt32(bits));
474
}
475

476
/// Compute the linear interopation between two values.
477
///
478
/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
479
/// following math:
480
///
481
///     (1 - t) * x + t * y
482
///
483
/// @param [in] x               The first value to lerp between.
484
/// @param [in] y               The second value to lerp between.
485
/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
486
///
487
/// @returns
488
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
489
///
490
/// @ingroup GLSL
491
FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
492
{
493
    return mix(x, y, t);
494
}
495

496
/// Compute the linear interopation between two values.
497
///
498
/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
499
/// following math:
500
///
501
///     (1 - t) * x + t * y
502
///
503
/// @param [in] x               The first value to lerp between.
504
/// @param [in] y               The second value to lerp between.
505
/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
506
///
507
/// @returns
508
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
509
///
510
/// @ingroup GLSL
511
FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)
512
{
513
    return mix(x, y, t);
514
}
515

516
/// Compute the linear interopation between two values.
517
///
518
/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
519
/// following math:
520
///
521
///     (1 - t) * x + t * y
522
///
523
/// @param [in] x               The first value to lerp between.
524
/// @param [in] y               The second value to lerp between.
525
/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
526
///
527
/// @returns
528
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
529
///
530
/// @ingroup GLSL
531
FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)
532
{
533
    return mix(x, y, t);
534
}
535

536
/// Compute the linear interopation between two values.
537
///
538
/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
539
/// following math:
540
///
541
///     (1 - t) * x + t * y
542
///
543
/// @param [in] x               The first value to lerp between.
544
/// @param [in] y               The second value to lerp between.
545
/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
546
///
547
/// @returns
548
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
549
///
550
/// @ingroup GLSL
551
FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)
552
{
553
    return mix(x, y, t);
554
}
555

556
/// Compute the linear interopation between two values.
557
///
558
/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
559
/// following math:
560
///
561
///     (1 - t) * x + t * y
562
///
563
/// @param [in] x               The first value to lerp between.
564
/// @param [in] y               The second value to lerp between.
565
/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
566
///
567
/// @returns
568
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
569
///
570
/// @ingroup GLSL
571
FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)
572
{
573
    return mix(x, y, t);
574
}
575

576
/// Compute the linear interopation between two values.
577
///
578
/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
579
/// following math:
580
///
581
///     (1 - t) * x + t * y
582
///
583
/// @param [in] x               The first value to lerp between.
584
/// @param [in] y               The second value to lerp between.
585
/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
586
///
587
/// @returns
588
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
589
///
590
/// @ingroup GLSL
591
FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)
592
{
593
    return mix(x, y, t);
594
}
595

596
/// Compute the linear interopation between two values.
597
///
598
/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
599
/// following math:
600
///
601
///     (1 - t) * x + t * y
602
///
603
/// @param [in] x               The first value to lerp between.
604
/// @param [in] y               The second value to lerp between.
605
/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
606
///
607
/// @returns
608
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
609
///
610
/// @ingroup GLSL
611
FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)
612
{
613
    return mix(x, y, t);
614
}
615

616
/// Compute the maximum of three values.
617
///
618
/// NOTE: This function should compile down to a single V_MAX3_F32 operation on
619
/// GCN or RDNA hardware.
620
///
621
/// @param [in] x               The first value to include in the max calculation.
622
/// @param [in] y               The second value to include in the max calcuation.
623
/// @param [in] z               The third value to include in the max calcuation.
624
///
625
/// @returns
626
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
627
///
628
/// @ingroup GLSL
629
FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
630
{
631
    return max(x, max(y, z));
632
}
633

634
/// Compute the maximum of three values.
635
///
636
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
637
/// GCN or RDNA hardware.
638
///
639
/// @param [in] x               The first value to include in the max calculation.
640
/// @param [in] y               The second value to include in the max calcuation.
641
/// @param [in] z               The third value to include in the max calcuation.
642
///
643
/// @returns
644
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
645
///
646
/// @ingroup GLSL
647
FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
648
{
649
    return max(x, max(y, z));
650
}
651

652
/// Compute the maximum of three values.
653
///
654
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
655
/// GCN or RDNA hardware.
656
///
657
/// @param [in] x               The first value to include in the max calculation.
658
/// @param [in] y               The second value to include in the max calcuation.
659
/// @param [in] z               The third value to include in the max calcuation.
660
///
661
/// @returns
662
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
663
///
664
/// @ingroup GLSL
665
FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
666
{
667
    return max(x, max(y, z));
668
}
669

670
/// Compute the maximum of three values.
671
///
672
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
673
/// GCN or RDNA hardware.
674
///
675
/// @param [in] x               The first value to include in the max calculation.
676
/// @param [in] y               The second value to include in the max calcuation.
677
/// @param [in] z               The third value to include in the max calcuation.
678
///
679
/// @returns
680
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
681
///
682
/// @ingroup GLSL
683
FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
684
{
685
    return max(x, max(y, z));
686
}
687

688
/// Compute the maximum of three values.
689
///
690
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
691
/// GCN or RDNA hardware.
692
///
693
/// @param [in] x               The first value to include in the max calculation.
694
/// @param [in] y               The second value to include in the max calcuation.
695
/// @param [in] z               The third value to include in the max calcuation.
696
///
697
/// @returns
698
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
699
///
700
/// @ingroup GLSL
701
FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
702
{
703
    return max(x, max(y, z));
704
}
705

706
/// Compute the maximum of three values.
707
///
708
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
709
/// GCN or RDNA hardware.
710
///
711
/// @param [in] x               The first value to include in the max calculation.
712
/// @param [in] y               The second value to include in the max calcuation.
713
/// @param [in] z               The third value to include in the max calcuation.
714
///
715
/// @returns
716
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
717
///
718
/// @ingroup GLSL
719
FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
720
{
721
    return max(x, max(y, z));
722
}
723

724
/// Compute the maximum of three values.
725
///
726
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
727
/// GCN/RDNA hardware.
728
///
729
/// @param [in] x               The first value to include in the max calculation.
730
/// @param [in] y               The second value to include in the max calcuation.
731
/// @param [in] z               The third value to include in the max calcuation.
732
///
733
/// @returns
734
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
735
///
736
/// @ingroup GLSL
737
FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
738
{
739
    return max(x, max(y, z));
740
}
741

742
/// Compute the maximum of three values.
743
///
744
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
745
/// GCN/RDNA hardware.
746
///
747
/// @param [in] x               The first value to include in the max calculation.
748
/// @param [in] y               The second value to include in the max calcuation.
749
/// @param [in] z               The third value to include in the max calcuation.
750
///
751
/// @returns
752
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
753
///
754
/// @ingroup GLSL
755
FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
756
{
757
    return max(x, max(y, z));
758
}
759

760
/// Compute the median of three values.
761
///
762
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on
763
/// GCN/RDNA hardware.
764
///
765
/// @param [in] x               The first value to include in the median calculation.
766
/// @param [in] y               The second value to include in the median calcuation.
767
/// @param [in] z               The third value to include in the median calcuation.
768
///
769
/// @returns
770
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
771
///
772
/// @ingroup GLSL
773
FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
774
{
775
    return max(min(x, y), min(max(x, y), z));
776
}
777

778
/// Compute the median of three values.
779
///
780
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on
781
/// GCN/RDNA hardware.
782
///
783
/// @param [in] x               The first value to include in the median calculation.
784
/// @param [in] y               The second value to include in the median calcuation.
785
/// @param [in] z               The third value to include in the median calcuation.
786
///
787
/// @returns
788
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
789
///
790
/// @ingroup GLSL
791
FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
792
{
793
    return max(min(x, y), min(max(x, y), z));
794
}
795

796
/// Compute the median of three values.
797
///
798
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on
799
/// GCN/RDNA hardware.
800
///
801
/// @param [in] x               The first value to include in the median calculation.
802
/// @param [in] y               The second value to include in the median calcuation.
803
/// @param [in] z               The third value to include in the median calcuation.
804
///
805
/// @returns
806
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
807
///
808
/// @ingroup GLSL
809
FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
810
{
811
    return max(min(x, y), min(max(x, y), z));
812
}
813

814
/// Compute the median of three values.
815
///
816
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on
817
/// GCN/RDNA hardware.
818
///
819
/// @param [in] x               The first value to include in the median calculation.
820
/// @param [in] y               The second value to include in the median calcuation.
821
/// @param [in] z               The third value to include in the median calcuation.
822
///
823
/// @returns
824
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
825
///
826
/// @ingroup GLSL
827
FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
828
{
829
    return max(min(x, y), min(max(x, y), z));
830
}
831

832
/// Compute the median of three values.
833
///
834
/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on
835
/// GCN/RDNA hardware.
836
///
837
/// @param [in] x               The first value to include in the median calculation.
838
/// @param [in] y               The second value to include in the median calcuation.
839
/// @param [in] z               The third value to include in the median calcuation.
840
///
841
/// @returns
842
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
843
///
844
/// @ingroup GLSL
845
FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z)
846
{
847
    return max(min(x, y), min(max(x, y), z));
848
}
849

850
/// Compute the median of three values.
851
///
852
/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on
853
/// GCN/RDNA hardware.
854
///
855
/// @param [in] x               The first value to include in the median calculation.
856
/// @param [in] y               The second value to include in the median calcuation.
857
/// @param [in] z               The third value to include in the median calcuation.
858
///
859
/// @returns
860
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
861
///
862
/// @ingroup GLSL
863
FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z)
864
{
865
    return max(min(x, y), min(max(x, y), z));
866
}
867

868
/// Compute the median of three values.
869
///
870
/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on
871
/// GCN/RDNA hardware.
872
///
873
/// @param [in] x               The first value to include in the median calculation.
874
/// @param [in] y               The second value to include in the median calcuation.
875
/// @param [in] z               The third value to include in the median calcuation.
876
///
877
/// @returns
878
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
879
///
880
/// @ingroup GLSL
881
FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z)
882
{
883
    return max(min(x, y), min(max(x, y), z));
884
}
885

886
/// Compute the median of three values.
887
///
888
/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on
889
/// GCN/RDNA hardware.
890
///
891
/// @param [in] x               The first value to include in the median calculation.
892
/// @param [in] y               The second value to include in the median calcuation.
893
/// @param [in] z               The third value to include in the median calcuation.
894
///
895
/// @returns
896
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
897
///
898
/// @ingroup GLSL
899
FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)
900
{
901
    return max(min(x, y), min(max(x, y), z));
902
}
903

904

905
/// Compute the minimum of three values.
906
///
907
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</i></c> operation on
908
/// GCN and RDNA hardware.
909
///
910
/// @param [in] x               The first value to include in the min calculation.
911
/// @param [in] y               The second value to include in the min calcuation.
912
/// @param [in] z               The third value to include in the min calcuation.
913
///
914
/// @returns
915
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
916
///
917
/// @ingroup GLSL
918
FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
919
{
920
    return min(x, min(y, z));
921
}
922

923
/// Compute the minimum of three values.
924
///
925
/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
926
/// GCN/RDNA hardware.
927
///
928
/// @param [in] x               The first value to include in the min calculation.
929
/// @param [in] y               The second value to include in the min calcuation.
930
/// @param [in] z               The third value to include in the min calcuation.
931
///
932
/// @returns
933
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
934
///
935
/// @ingroup GLSL
936
FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
937
{
938
    return min(x, min(y, z));
939
}
940

941
/// Compute the minimum of three values.
942
///
943
/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
944
/// GCN/RDNA hardware.
945
///
946
/// @param [in] x               The first value to include in the min calculation.
947
/// @param [in] y               The second value to include in the min calcuation.
948
/// @param [in] z               The third value to include in the min calcuation.
949
///
950
/// @returns
951
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
952
///
953
/// @ingroup GLSL
954
FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
955
{
956
    return min(x, min(y, z));
957
}
958

959
/// Compute the minimum of three values.
960
///
961
/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
962
/// GCN/RDNA hardware.
963
///
964
/// @param [in] x               The first value to include in the min calculation.
965
/// @param [in] y               The second value to include in the min calcuation.
966
/// @param [in] z               The third value to include in the min calcuation.
967
///
968
/// @returns
969
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
970
///
971
/// @ingroup GLSL
972
FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
973
{
974
    return min(x, min(y, z));
975
}
976

977
/// Compute the minimum of three values.
978
///
979
/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
980
/// GCN/RDNA hardware.
981
///
982
/// @param [in] x               The first value to include in the min calculation.
983
/// @param [in] y               The second value to include in the min calcuation.
984
/// @param [in] z               The third value to include in the min calcuation.
985
///
986
/// @returns
987
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
988
///
989
/// @ingroup GLSL
990
FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
991
{
992
    return min(x, min(y, z));
993
}
994

995
/// Compute the minimum of three values.
996
///
997
/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
998
/// GCN/RDNA hardware.
999
///
1000
/// @param [in] x               The first value to include in the min calculation.
1001
/// @param [in] y               The second value to include in the min calcuation.
1002
/// @param [in] z               The third value to include in the min calcuation.
1003
///
1004
/// @returns
1005
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1006
///
1007
/// @ingroup GLSL
1008
FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
1009
{
1010
    return min(x, min(y, z));
1011
}
1012

1013
/// Compute the minimum of three values.
1014
///
1015
/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
1016
/// GCN/RDNA hardware.
1017
///
1018
/// @param [in] x               The first value to include in the min calculation.
1019
/// @param [in] y               The second value to include in the min calcuation.
1020
/// @param [in] z               The third value to include in the min calcuation.
1021
///
1022
/// @returns
1023
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1024
///
1025
/// @ingroup GLSL
1026
FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
1027
{
1028
    return min(x, min(y, z));
1029
}
1030

1031
/// Compute the minimum of three values.
1032
///
1033
/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
1034
/// GCN/RDNA hardware.
1035
///
1036
/// @param [in] x               The first value to include in the min calculation.
1037
/// @param [in] y               The second value to include in the min calcuation.
1038
/// @param [in] z               The third value to include in the min calcuation.
1039
///
1040
/// @returns
1041
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1042
///
1043
/// @ingroup GLSL
1044
FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
1045
{
1046
    return min(x, min(y, z));
1047
}
1048

1049
/// Compute the reciprocal of a value.
1050
///
1051
/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.
1052
///
1053
/// @param [in] x               The value to compute the reciprocal for.
1054
///
1055
/// @returns
1056
/// The reciprocal value of <c><i>x</i></c>.
1057
/// 
1058
/// @ingroup GLSL
1059
FfxFloat32 rcp(FfxFloat32 x)
1060
{
1061
    return FfxFloat32(1.0) / x;
1062
}
1063

1064
/// Compute the reciprocal of a value.
1065
///
1066
/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.
1067
///
1068
/// @param [in] x               The value to compute the reciprocal for.
1069
///
1070
/// @returns
1071
/// The reciprocal value of <c><i>x</i></c>.
1072
///
1073
/// @ingroup GLSL
1074
FfxFloat32x2 rcp(FfxFloat32x2 x)
1075
{
1076
    return ffxBroadcast2(1.0) / x;
1077
}
1078

1079
/// Compute the reciprocal of a value.
1080
///
1081
/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.
1082
///
1083
/// @param [in] x               The value to compute the reciprocal for.
1084
///
1085
/// @returns
1086
/// The reciprocal value of <c><i>x</i></c>.
1087
///
1088
/// @ingroup GLSL
1089
FfxFloat32x3 rcp(FfxFloat32x3 x)
1090
{
1091
    return ffxBroadcast3(1.0) / x;
1092
}
1093

1094
/// Compute the reciprocal of a value.
1095
///
1096
/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.
1097
///
1098
/// @param [in] x               The value to compute the reciprocal for.
1099
///
1100
/// @returns
1101
/// The reciprocal value of <c><i>x</i></c>.
1102
///
1103
/// @ingroup GLSL
1104
FfxFloat32x4 rcp(FfxFloat32x4 x)
1105
{
1106
    return ffxBroadcast4(1.0) / x;
1107
}
1108

1109
/// Compute the reciprocal square root of a value.
1110
///
1111
/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.
1112
///
1113
/// @param [in] x               The value to compute the reciprocal for.
1114
///
1115
/// @returns
1116
/// The reciprocal square root value of <c><i>x</i></c>.
1117
///
1118
/// @ingroup GLSL
1119
FfxFloat32 rsqrt(FfxFloat32 x)
1120
{
1121
    return FfxFloat32(1.0) / ffxSqrt(x);
1122
}
1123

1124
/// Compute the reciprocal square root of a value.
1125
///
1126
/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.
1127
///
1128
/// @param [in] x               The value to compute the reciprocal for.
1129
///
1130
/// @returns
1131
/// The reciprocal square root value of <c><i>x</i></c>.
1132
///
1133
/// @ingroup GLSL
1134
FfxFloat32x2 rsqrt(FfxFloat32x2 x)
1135
{
1136
    return ffxBroadcast2(1.0) / ffxSqrt(x);
1137
}
1138

1139
/// Compute the reciprocal square root of a value.
1140
///
1141
/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.
1142
///
1143
/// @param [in] x               The value to compute the reciprocal for.
1144
///
1145
/// @returns
1146
/// The reciprocal square root value of <c><i>x</i></c>.
1147
///
1148
/// @ingroup GLSL
1149
FfxFloat32x3 rsqrt(FfxFloat32x3 x)
1150
{
1151
    return ffxBroadcast3(1.0) / ffxSqrt(x);
1152
}
1153

1154
/// Compute the reciprocal square root of a value.
1155
///
1156
/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.
1157
///
1158
/// @param [in] x               The value to compute the reciprocal for.
1159
///
1160
/// @returns
1161
/// The reciprocal square root value of <c><i>x</i></c>.
1162
///
1163
/// @ingroup GLSL
1164
FfxFloat32x4 rsqrt(FfxFloat32x4 x)
1165
{
1166
    return ffxBroadcast4(1.0) / ffxSqrt(x);
1167
}
1168

1169
/// Clamp a value to a [0..1] range.
1170
///
1171
/// @param [in] x               The value to clamp to [0..1] range.
1172
///
1173
/// @returns
1174
/// The clamped version of <c><i>x</i></c>.
1175
///
1176
/// @ingroup GLSL
1177
FfxFloat32 ffxSaturate(FfxFloat32 x)
1178
{
1179
    return clamp(x, FfxFloat32(0.0), FfxFloat32(1.0));
1180
}
1181

1182
/// Clamp a value to a [0..1] range.
1183
///
1184
/// @param [in] x               The value to clamp to [0..1] range.
1185
///
1186
/// @returns
1187
/// The clamped version of <c><i>x</i></c>.
1188
///
1189
/// @ingroup GLSL
1190
FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)
1191
{
1192
    return clamp(x, ffxBroadcast2(0.0), ffxBroadcast2(1.0));
1193
}
1194

1195
/// Clamp a value to a [0..1] range.
1196
///
1197
/// @param [in] x               The value to clamp to [0..1] range.
1198
///
1199
/// @returns
1200
/// The clamped version of <c><i>x</i></c>.
1201
///
1202
/// @ingroup GLSL
1203
FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)
1204
{
1205
    return clamp(x, ffxBroadcast3(0.0), ffxBroadcast3(1.0));
1206
}
1207

1208
/// Clamp a value to a [0..1] range.
1209
///
1210
/// @param [in] x               The value to clamp to [0..1] range.
1211
///
1212
/// @returns
1213
/// The clamped version of <c><i>x</i></c>.
1214
///
1215
/// @ingroup GLSL
1216
FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)
1217
{
1218
    return clamp(x, ffxBroadcast4(0.0), ffxBroadcast4(1.0));
1219
}
1220

1221
/// Compute the factional part of a decimal value.
1222
///
1223
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
1224
///
1225
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
1226
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
1227
/// function.
1228
///
1229
/// @param [in] x               The value to compute the fractional part from.
1230
///
1231
/// @returns
1232
/// The fractional part of <c><i>x</i></c>.
1233
///
1234
/// @ingroup HLSL
1235
FfxFloat32 ffxFract(FfxFloat32 x)
1236
{
1237
    return fract(x);
1238
}
1239

1240
/// Compute the factional part of a decimal value.
1241
///
1242
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
1243
///
1244
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
1245
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
1246
/// function.
1247
///
1248
/// @param [in] x               The value to compute the fractional part from.
1249
///
1250
/// @returns
1251
/// The fractional part of <c><i>x</i></c>.
1252
///
1253
/// @ingroup HLSL
1254
FfxFloat32x2 ffxFract(FfxFloat32x2 x)
1255
{
1256
    return fract(x);
1257
}
1258

1259
/// Compute the factional part of a decimal value.
1260
///
1261
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
1262
///
1263
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
1264
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
1265
/// function.
1266
///
1267
/// @param [in] x               The value to compute the fractional part from.
1268
///
1269
/// @returns
1270
/// The fractional part of <c><i>x</i></c>.
1271
///
1272
/// @ingroup HLSL
1273
FfxFloat32x3 ffxFract(FfxFloat32x3 x)
1274
{
1275
    return fract(x);
1276
}
1277

1278
/// Compute the factional part of a decimal value.
1279
///
1280
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
1281
///
1282
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
1283
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
1284
/// function.
1285
///
1286
/// @param [in] x               The value to compute the fractional part from.
1287
///
1288
/// @returns
1289
/// The fractional part of <c><i>x</i></c>.
1290
///
1291
/// @ingroup HLSL
1292
FfxFloat32x4 ffxFract(FfxFloat32x4 x)
1293
{
1294
    return fract(x);
1295
}
1296

1297
FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
1298
{
1299
    return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
1300
}
1301

1302
#if FFX_HALF
1303

1304
#define FFX_UINT32_TO_FLOAT16X2(x) unpackFloat2x16(FfxUInt32(x))
1305

1306
FfxFloat16x4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x)
1307
{
1308
    return FfxFloat16x4(unpackFloat2x16(x.x), unpackFloat2x16(x.y));
1309
}
1310
#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x))
1311
#define FFX_UINT32_TO_UINT16X2(x) unpackUint2x16(FfxUInt32(x))
1312
#define FFX_UINT32X2_TO_UINT16X4(x) unpackUint4x16(pack64(FfxUInt32x2(x)))
1313
//------------------------------------------------------------------------------------------------------------------------------
1314
#define FFX_FLOAT16X2_TO_UINT32(x) packFloat2x16(FfxFloat16x2(x))
1315
FfxUInt32x2 ffxFloat16x4ToUint32x2(FfxFloat16x4 x)
1316
{
1317
    return FfxUInt32x2(packFloat2x16(x.xy), packFloat2x16(x.zw));
1318
}
1319
#define FFX_FLOAT16X4_TO_UINT32X2(x) ffxFloat16x4ToUint32x2(FfxFloat16x4(x))
1320
#define FFX_UINT16X2_TO_UINT32(x) packUint2x16(FfxUInt16x2(x))
1321
#define FFX_UINT16X4_TO_UINT32X2(x) unpack32(packUint4x16(FfxUInt16x4(x)))
1322
//==============================================================================================================================
1323
#define FFX_TO_UINT16(x) halfBitsToUint16(FfxFloat16(x))
1324
#define FFX_TO_UINT16X2(x) halfBitsToUint16(FfxFloat16x2(x))
1325
#define FFX_TO_UINT16X3(x) halfBitsToUint16(FfxFloat16x3(x))
1326
#define FFX_TO_UINT16X4(x) halfBitsToUint16(FfxFloat16x4(x))
1327
//------------------------------------------------------------------------------------------------------------------------------
1328
#define FFX_TO_FLOAT16(x) uint16BitsToHalf(FfxUInt16(x))
1329
#define FFX_TO_FLOAT16X2(x) uint16BitsToHalf(FfxUInt16x2(x))
1330
#define FFX_TO_FLOAT16X3(x) uint16BitsToHalf(FfxUInt16x3(x))
1331
#define FFX_TO_FLOAT16X4(x) uint16BitsToHalf(FfxUInt16x4(x))
1332
//==============================================================================================================================
1333
FfxFloat16 ffxBroadcastFloat16(FfxFloat16 a)
1334
{
1335
    return FfxFloat16(a);
1336
}
1337
FfxFloat16x2 ffxBroadcastFloat16x2(FfxFloat16 a)
1338
{
1339
    return FfxFloat16x2(a, a);
1340
}
1341
FfxFloat16x3 ffxBroadcastFloat16x3(FfxFloat16 a)
1342
{
1343
    return FfxFloat16x3(a, a, a);
1344
}
1345
FfxFloat16x4 ffxBroadcastFloat16x4(FfxFloat16 a)
1346
{
1347
    return FfxFloat16x4(a, a, a, a);
1348
}
1349
#define FFX_BROADCAST_FLOAT16(a)   FfxFloat16(a)
1350
#define FFX_BROADCAST_FLOAT16X2(a) FfxFloat16x2(FfxFloat16(a))
1351
#define FFX_BROADCAST_FLOAT16X3(a) FfxFloat16x3(FfxFloat16(a))
1352
#define FFX_BROADCAST_FLOAT16X4(a) FfxFloat16x4(FfxFloat16(a))
1353
//------------------------------------------------------------------------------------------------------------------------------
1354
FfxInt16 ffxBroadcastInt16(FfxInt16 a)
1355
{
1356
    return FfxInt16(a);
1357
}
1358
FfxInt16x2 ffxBroadcastInt16x2(FfxInt16 a)
1359
{
1360
    return FfxInt16x2(a, a);
1361
}
1362
FfxInt16x3 ffxBroadcastInt16x3(FfxInt16 a)
1363
{
1364
    return FfxInt16x3(a, a, a);
1365
}
1366
FfxInt16x4 ffxBroadcastInt16x4(FfxInt16 a)
1367
{
1368
    return FfxInt16x4(a, a, a, a);
1369
}
1370
#define FFX_BROADCAST_INT16(a)   FfxInt16(a)
1371
#define FFX_BROADCAST_INT16X2(a) FfxInt16x2(FfxInt16(a))
1372
#define FFX_BROADCAST_INT16X3(a) FfxInt16x3(FfxInt16(a))
1373
#define FFX_BROADCAST_INT16X4(a) FfxInt16x4(FfxInt16(a))
1374
//------------------------------------------------------------------------------------------------------------------------------
1375
FfxUInt16 ffxBroadcastUInt16(FfxUInt16 a)
1376
{
1377
    return FfxUInt16(a);
1378
}
1379
FfxUInt16x2 ffxBroadcastUInt16x2(FfxUInt16 a)
1380
{
1381
    return FfxUInt16x2(a, a);
1382
}
1383
FfxUInt16x3 ffxBroadcastUInt16x3(FfxUInt16 a)
1384
{
1385
    return FfxUInt16x3(a, a, a);
1386
}
1387
FfxUInt16x4 ffxBroadcastUInt16x4(FfxUInt16 a)
1388
{
1389
    return FfxUInt16x4(a, a, a, a);
1390
}
1391
#define FFX_BROADCAST_UINT16(a)   FfxUInt16(a)
1392
#define FFX_BROADCAST_UINT16X2(a) FfxUInt16x2(FfxUInt16(a))
1393
#define FFX_BROADCAST_UINT16X3(a) FfxUInt16x3(FfxUInt16(a))
1394
#define FFX_BROADCAST_UINT16X4(a) FfxUInt16x4(FfxUInt16(a))
1395
//==============================================================================================================================
1396
FfxUInt16 ffxAbsHalf(FfxUInt16 a)
1397
{
1398
    return FfxUInt16(abs(FfxInt16(a)));
1399
}
1400
FfxUInt16x2 ffxAbsHalf(FfxUInt16x2 a)
1401
{
1402
    return FfxUInt16x2(abs(FfxInt16x2(a)));
1403
}
1404
FfxUInt16x3 ffxAbsHalf(FfxUInt16x3 a)
1405
{
1406
    return FfxUInt16x3(abs(FfxInt16x3(a)));
1407
}
1408
FfxUInt16x4 ffxAbsHalf(FfxUInt16x4 a)
1409
{
1410
    return FfxUInt16x4(abs(FfxInt16x4(a)));
1411
}
1412
//------------------------------------------------------------------------------------------------------------------------------
1413
FfxFloat16 ffxClampHalf(FfxFloat16 x, FfxFloat16 n, FfxFloat16 m)
1414
{
1415
    return clamp(x, n, m);
1416
}
1417
FfxFloat16x2 ffxClampHalf(FfxFloat16x2 x, FfxFloat16x2 n, FfxFloat16x2 m)
1418
{
1419
    return clamp(x, n, m);
1420
}
1421
FfxFloat16x3 ffxClampHalf(FfxFloat16x3 x, FfxFloat16x3 n, FfxFloat16x3 m)
1422
{
1423
    return clamp(x, n, m);
1424
}
1425
FfxFloat16x4 ffxClampHalf(FfxFloat16x4 x, FfxFloat16x4 n, FfxFloat16x4 m)
1426
{
1427
    return clamp(x, n, m);
1428
}
1429
//------------------------------------------------------------------------------------------------------------------------------
1430
FfxFloat16 ffxFract(FfxFloat16 x)
1431
{
1432
    return fract(x);
1433
}
1434
FfxFloat16x2 ffxFract(FfxFloat16x2 x)
1435
{
1436
    return fract(x);
1437
}
1438
FfxFloat16x3 ffxFract(FfxFloat16x3 x)
1439
{
1440
    return fract(x);
1441
}
1442
FfxFloat16x4 ffxFract(FfxFloat16x4 x)
1443
{
1444
    return fract(x);
1445
}
1446
//------------------------------------------------------------------------------------------------------------------------------
1447
FfxFloat16 ffxLerp(FfxFloat16 x, FfxFloat16 y, FfxFloat16 a)
1448
{
1449
    return mix(x, y, a);
1450
}
1451
FfxFloat16x2 ffxLerp(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16 a)
1452
{
1453
    return mix(x, y, a);
1454
}
1455
FfxFloat16x2 ffxLerp(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 a)
1456
{
1457
    return mix(x, y, a);
1458
}
1459
FfxFloat16x3 ffxLerp(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 a)
1460
{
1461
    return mix(x, y, a);
1462
}
1463
FfxFloat16x3 ffxLerp(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16 a)
1464
{
1465
    return mix(x, y, a);
1466
}
1467
FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16 a)
1468
{
1469
    return mix(x, y, a);
1470
}
1471
FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 a)
1472
{
1473
    return mix(x, y, a);
1474
}
1475
//------------------------------------------------------------------------------------------------------------------------------
1476
// No packed version of ffxMid3.
1477
FfxFloat16 ffxMed3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
1478
{
1479
    return max(min(x, y), min(max(x, y), z));
1480
}
1481
FfxFloat16x2 ffxMed3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
1482
{
1483
    return max(min(x, y), min(max(x, y), z));
1484
}
1485
FfxFloat16x3 ffxMed3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
1486
{
1487
    return max(min(x, y), min(max(x, y), z));
1488
}
1489
FfxFloat16x4 ffxMed3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
1490
{
1491
    return max(min(x, y), min(max(x, y), z));
1492
}
1493
FfxInt16 ffxMed3Half(FfxInt16 x, FfxInt16 y, FfxInt16 z)
1494
{
1495
    return max(min(x, y), min(max(x, y), z));
1496
}
1497
FfxInt16x2 ffxMed3Half(FfxInt16x2 x, FfxInt16x2 y, FfxInt16x2 z)
1498
{
1499
    return max(min(x, y), min(max(x, y), z));
1500
}
1501
FfxInt16x3 ffxMed3Half(FfxInt16x3 x, FfxInt16x3 y, FfxInt16x3 z)
1502
{
1503
    return max(min(x, y), min(max(x, y), z));
1504
}
1505
FfxInt16x4 ffxMed3Half(FfxInt16x4 x, FfxInt16x4 y, FfxInt16x4 z)
1506
{
1507
    return max(min(x, y), min(max(x, y), z));
1508
}
1509
//------------------------------------------------------------------------------------------------------------------------------
1510
// No packed version of ffxMax3.
1511
FfxFloat16 ffxMax3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
1512
{
1513
    return max(x, max(y, z));
1514
}
1515
FfxFloat16x2 ffxMax3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
1516
{
1517
    return max(x, max(y, z));
1518
}
1519
FfxFloat16x3 ffxMax3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
1520
{
1521
    return max(x, max(y, z));
1522
}
1523
FfxFloat16x4 ffxMax3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
1524
{
1525
    return max(x, max(y, z));
1526
}
1527
//------------------------------------------------------------------------------------------------------------------------------
1528
// No packed version of ffxMin3.
1529
FfxFloat16 ffxMin3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
1530
{
1531
    return min(x, min(y, z));
1532
}
1533
FfxFloat16x2 ffxMin3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
1534
{
1535
    return min(x, min(y, z));
1536
}
1537
FfxFloat16x3 ffxMin3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
1538
{
1539
    return min(x, min(y, z));
1540
}
1541
FfxFloat16x4 ffxMin3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
1542
{
1543
    return min(x, min(y, z));
1544
}
1545
//------------------------------------------------------------------------------------------------------------------------------
1546
FfxFloat16 ffxReciprocalHalf(FfxFloat16 x)
1547
{
1548
    return FFX_BROADCAST_FLOAT16(1.0) / x;
1549
}
1550
FfxFloat16x2 ffxReciprocalHalf(FfxFloat16x2 x)
1551
{
1552
    return FFX_BROADCAST_FLOAT16X2(1.0) / x;
1553
}
1554
FfxFloat16x3 ffxReciprocalHalf(FfxFloat16x3 x)
1555
{
1556
    return FFX_BROADCAST_FLOAT16X3(1.0) / x;
1557
}
1558
FfxFloat16x4 ffxReciprocalHalf(FfxFloat16x4 x)
1559
{
1560
    return FFX_BROADCAST_FLOAT16X4(1.0) / x;
1561
}
1562
//------------------------------------------------------------------------------------------------------------------------------
1563
FfxFloat16 ffxReciprocalSquareRootHalf(FfxFloat16 x)
1564
{
1565
    return FFX_BROADCAST_FLOAT16(1.0) / sqrt(x);
1566
}
1567
FfxFloat16x2 ffxReciprocalSquareRootHalf(FfxFloat16x2 x)
1568
{
1569
    return FFX_BROADCAST_FLOAT16X2(1.0) / sqrt(x);
1570
}
1571
FfxFloat16x3 ffxReciprocalSquareRootHalf(FfxFloat16x3 x)
1572
{
1573
    return FFX_BROADCAST_FLOAT16X3(1.0) / sqrt(x);
1574
}
1575
FfxFloat16x4 ffxReciprocalSquareRootHalf(FfxFloat16x4 x)
1576
{
1577
    return FFX_BROADCAST_FLOAT16X4(1.0) / sqrt(x);
1578
}
1579
//------------------------------------------------------------------------------------------------------------------------------
1580
FfxFloat16 ffxSaturate(FfxFloat16 x)
1581
{
1582
    return clamp(x, FFX_BROADCAST_FLOAT16(0.0), FFX_BROADCAST_FLOAT16(1.0));
1583
}
1584
FfxFloat16x2 ffxSaturate(FfxFloat16x2 x)
1585
{
1586
    return clamp(x, FFX_BROADCAST_FLOAT16X2(0.0), FFX_BROADCAST_FLOAT16X2(1.0));
1587
}
1588
FfxFloat16x3 ffxSaturate(FfxFloat16x3 x)
1589
{
1590
    return clamp(x, FFX_BROADCAST_FLOAT16X3(0.0), FFX_BROADCAST_FLOAT16X3(1.0));
1591
}
1592
FfxFloat16x4 ffxSaturate(FfxFloat16x4 x)
1593
{
1594
    return clamp(x, FFX_BROADCAST_FLOAT16X4(0.0), FFX_BROADCAST_FLOAT16X4(1.0));
1595
}
1596
//------------------------------------------------------------------------------------------------------------------------------
1597
FfxUInt16 ffxBitShiftRightHalf(FfxUInt16 a, FfxUInt16 b)
1598
{
1599
    return FfxUInt16(FfxInt16(a) >> FfxInt16(b));
1600
}
1601
FfxUInt16x2 ffxBitShiftRightHalf(FfxUInt16x2 a, FfxUInt16x2 b)
1602
{
1603
    return FfxUInt16x2(FfxInt16x2(a) >> FfxInt16x2(b));
1604
}
1605
FfxUInt16x3 ffxBitShiftRightHalf(FfxUInt16x3 a, FfxUInt16x3 b)
1606
{
1607
    return FfxUInt16x3(FfxInt16x3(a) >> FfxInt16x3(b));
1608
}
1609
FfxUInt16x4 ffxBitShiftRightHalf(FfxUInt16x4 a, FfxUInt16x4 b)
1610
{
1611
    return FfxUInt16x4(FfxInt16x4(a) >> FfxInt16x4(b));
1612
}
1613
#endif // FFX_HALF
1614

1615
#if defined(FFX_WAVE)
1616
// Where 'x' must be a compile time literal.
1617
FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x)
1618
{
1619
    return subgroupShuffleXor(v, x);
1620
}
1621
FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)
1622
{
1623
    return subgroupShuffleXor(v, x);
1624
}
1625
FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)
1626
{
1627
    return subgroupShuffleXor(v, x);
1628
}
1629
FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)
1630
{
1631
    return subgroupShuffleXor(v, x);
1632
}
1633
FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x)
1634
{
1635
    return subgroupShuffleXor(v, x);
1636
}
1637
FfxUInt32x2 AWaveXorU2(FfxUInt32x2 v, FfxUInt32 x)
1638
{
1639
    return subgroupShuffleXor(v, x);
1640
}
1641
FfxUInt32x3 AWaveXorU3(FfxUInt32x3 v, FfxUInt32 x)
1642
{
1643
    return subgroupShuffleXor(v, x);
1644
}
1645
FfxUInt32x4 AWaveXorU4(FfxUInt32x4 v, FfxUInt32 x)
1646
{
1647
    return subgroupShuffleXor(v, x);
1648
}
1649

1650
//------------------------------------------------------------------------------------------------------------------------------
1651
#if FFX_HALF
1652
FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x)
1653
{
1654
    return FFX_UINT32_TO_FLOAT16X2(subgroupShuffleXor(FFX_FLOAT16X2_TO_UINT32(v), x));
1655
}
1656
FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x)
1657
{
1658
    return FFX_UINT32X2_TO_FLOAT16X4(subgroupShuffleXor(FFX_FLOAT16X4_TO_UINT32X2(v), x));
1659
}
1660
FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x)
1661
{
1662
    return FFX_UINT32_TO_UINT16X2(subgroupShuffleXor(FFX_UINT16X2_TO_UINT32(v), x));
1663
}
1664
FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x)
1665
{
1666
    return FFX_UINT32X2_TO_UINT16X4(subgroupShuffleXor(FFX_UINT16X4_TO_UINT32X2(v), x));
1667
}
1668
#endif // FFX_HALF
1669
#endif // #if defined(FFX_WAVE)
1670

1671
Product

Resources

Company