CoCalc -- ffx_core

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h
⁹⁸⁹⁹ views
1
// This file is part of the FidelityFX SDK.
2
//
3
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
4
//
5
// Permission is hereby granted, free of charge, to any person obtaining a copy
6
// of this software and associated documentation files (the "Software"), to deal
7
// in the Software without restriction, including without limitation the rights
8
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
// copies of the Software, and to permit persons to whom the Software is
10
// furnished to do so, subject to the following conditions:
11
// The above copyright notice and this permission notice shall be included in
12
// all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
// THE SOFTWARE.
21

22
/// A define for abstracting shared memory between shading languages.
23
///
24
/// @ingroup GPU
25
#define FFX_GROUPSHARED groupshared
26

27
/// A define for abstracting compute memory barriers between shading languages.
28
///
29
/// @ingroup GPU
30
#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync
31

32
/// A define added to accept static markup on functions to aid CPU/GPU portability of code.
33
///
34
/// @ingroup GPU
35
#define FFX_STATIC static
36

37
/// A define for abstracting loop unrolling between shading languages.
38
///
39
/// @ingroup GPU 
40
#define FFX_UNROLL [unroll]
41

42
/// A define for abstracting a 'greater than' comparison operator between two types.
43
///
44
/// @ingroup GPU
45
#define FFX_GREATER_THAN(x, y) x > y
46

47
/// A define for abstracting a 'greater than or equal' comparison operator between two types.
48
///
49
/// @ingroup GPU
50
#define FFX_GREATER_THAN_EQUAL(x, y) x >= y
51

52
/// A define for abstracting a 'less than' comparison operator between two types.
53
///
54
/// @ingroup GPU
55
#define FFX_LESS_THAN(x, y) x < y
56

57
/// A define for abstracting a 'less than or equal' comparison operator between two types.
58
///
59
/// @ingroup GPU
60
#define FFX_LESS_THAN_EQUAL(x, y) x <= y
61

62
/// A define for abstracting an 'equal' comparison operator between two types.
63
///
64
/// @ingroup GPU
65
#define FFX_EQUAL(x, y) x == y
66

67
/// A define for abstracting a 'not equal' comparison operator between two types.
68
///
69
/// @ingroup GPU
70
#define FFX_NOT_EQUAL(x, y) x != y
71

72
/// Broadcast a scalar value to a 1-dimensional floating point vector.
73
///
74
/// @ingroup GPU
75
#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x)
76

77
/// Broadcast a scalar value to a 2-dimensional floating point vector.
78
///
79
/// @ingroup GPU
80
#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32(x)
81

82
/// Broadcast a scalar value to a 3-dimensional floating point vector.
83
///
84
/// @ingroup GPU
85
#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32(x)
86

87
/// Broadcast a scalar value to a 4-dimensional floating point vector.
88
///
89
/// @ingroup GPU
90
#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32(x)
91

92
/// Broadcast a scalar value to a 1-dimensional unsigned integer vector.
93
///
94
/// @ingroup GPU
95
#define FFX_BROADCAST_UINT32(x) FfxUInt32(x)
96

97
/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
98
///
99
/// @ingroup GPU
100
#define FFX_BROADCAST_UINT32X2(x) FfxUInt32(x)
101

102
/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
103
///
104
/// @ingroup GPU
105
#define FFX_BROADCAST_UINT32X3(x) FfxUInt32(x)
106

107
/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
108
///
109
/// @ingroup GPU
110
#define FFX_BROADCAST_UINT32X4(x) FfxUInt32(x)
111

112
/// Broadcast a scalar value to a 1-dimensional signed integer vector.
113
///
114
/// @ingroup GPU
115
#define FFX_BROADCAST_INT32(x) FfxInt32(x)
116

117
/// Broadcast a scalar value to a 2-dimensional signed integer vector.
118
///
119
/// @ingroup GPU
120
#define FFX_BROADCAST_INT32X2(x) FfxInt32(x)
121

122
/// Broadcast a scalar value to a 3-dimensional signed integer vector.
123
///
124
/// @ingroup GPU
125
#define FFX_BROADCAST_INT32X3(x) FfxInt32(x)
126

127
/// Broadcast a scalar value to a 4-dimensional signed integer vector.
128
///
129
/// @ingroup GPU
130
#define FFX_BROADCAST_INT32X4(x) FfxInt32(x)
131

132
/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector.
133
///
134
/// @ingroup GPU
135
#define FFX_BROADCAST_MIN_FLOAT16(a)   FFX_MIN16_F(a)
136

137
/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector.
138
///
139
/// @ingroup GPU
140
#define FFX_BROADCAST_MIN_FLOAT16X2(a) FFX_MIN16_F(a)
141

142
/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector.
143
///
144
/// @ingroup GPU
145
#define FFX_BROADCAST_MIN_FLOAT16X3(a) FFX_MIN16_F(a)
146

147
/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector.
148
///
149
/// @ingroup GPU
150
#define FFX_BROADCAST_MIN_FLOAT16X4(a) FFX_MIN16_F(a)
151

152
/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector.
153
///
154
/// @ingroup GPU
155
#define FFX_BROADCAST_MIN_UINT16(a)   FFX_MIN16_U(a)
156

157
/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector.
158
///
159
/// @ingroup GPU
160
#define FFX_BROADCAST_MIN_UINT16X2(a) FFX_MIN16_U(a)
161

162
/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector.
163
///
164
/// @ingroup GPU
165
#define FFX_BROADCAST_MIN_UINT16X3(a) FFX_MIN16_U(a)
166

167
/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector.
168
///
169
/// @ingroup GPU
170
#define FFX_BROADCAST_MIN_UINT16X4(a) FFX_MIN16_U(a)
171

172
/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector.
173
///
174
/// @ingroup GPU
175
#define FFX_BROADCAST_MIN_INT16(a)   FFX_MIN16_I(a)
176

177
/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector.
178
///
179
/// @ingroup GPU
180
#define FFX_BROADCAST_MIN_INT16X2(a) FFX_MIN16_I(a)
181

182
/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector.
183
///
184
/// @ingroup GPU
185
#define FFX_BROADCAST_MIN_INT16X3(a) FFX_MIN16_I(a)
186

187
/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector.
188
///
189
/// @ingroup GPU
190
#define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a)
191

192
/// Pack 2x32-bit floating point values in a single 32bit value.
193
/// 
194
/// This function first converts each component of <c><i>value</i></c> into their nearest 16-bit floating
195
/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
196
/// 32bit unsigned integer respectively.
197
///
198
/// @param [in] value               A 2-dimensional floating point value to convert and pack.
199
/// 
200
/// @returns
201
/// A packed 32bit value containing 2 16bit floating point values.
202
/// 
203
/// @ingroup HLSL
204
FfxUInt32 packHalf2x16(FfxFloat32x2 value)
205
{
206
    return f32tof16(value.x) | (f32tof16(value.y) << 16);
207
}
208

209
/// Broadcast a scalar value to a 2-dimensional floating point vector.
210
///
211
/// @param [in] value               The value to to broadcast.
212
///
213
/// @returns
214
/// A 2-dimensional floating point vector with <c><i>value</i></c> in each component.
215
///
216
/// @ingroup HLSL
217
FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)
218
{
219
    return FfxFloat32x2(value, value);
220
}
221

222
/// Broadcast a scalar value to a 3-dimensional floating point vector.
223
///
224
/// @param [in] value               The value to to broadcast.
225
///
226
/// @returns
227
/// A 3-dimensional floating point vector with <c><i>value</i></c> in each component.
228
///
229
/// @ingroup HLSL
230
FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)
231
{
232
    return FfxFloat32x3(value, value, value);
233
}
234

235
/// Broadcast a scalar value to a 4-dimensional floating point vector.
236
///
237
/// @param [in] value               The value to to broadcast.
238
///
239
/// @returns
240
/// A 4-dimensional floating point vector with <c><i>value</i></c> in each component.
241
///
242
/// @ingroup HLSL
243
FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)
244
{
245
    return FfxFloat32x4(value, value, value, value);
246
}
247

248
/// Broadcast a scalar value to a 2-dimensional signed integer vector.
249
///
250
/// @param [in] value               The value to to broadcast.
251
///
252
/// @returns
253
/// A 2-dimensional signed integer vector with <c><i>value</i></c> in each component.
254
///
255
/// @ingroup HLSL
256
FfxInt32x2 ffxBroadcast2(FfxInt32 value)
257
{
258
    return FfxInt32x2(value, value);
259
}
260

261
/// Broadcast a scalar value to a 3-dimensional signed integer vector.
262
///
263
/// @param [in] value               The value to to broadcast.
264
///
265
/// @returns
266
/// A 3-dimensional signed integer vector with <c><i>value</i></c> in each component.
267
///
268
/// @ingroup HLSL
269
FfxUInt32x3 ffxBroadcast3(FfxInt32 value)
270
{
271
    return FfxUInt32x3(value, value, value);
272
}
273

274
/// Broadcast a scalar value to a 4-dimensional signed integer vector.
275
///
276
/// @param [in] value               The value to to broadcast.
277
///
278
/// @returns
279
/// A 4-dimensional signed integer vector with <c><i>value</i></c> in each component.
280
///
281
/// @ingroup HLSL
282
FfxInt32x4 ffxBroadcast4(FfxInt32 value)
283
{
284
    return FfxInt32x4(value, value, value, value);
285
}
286

287
/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
288
///
289
/// @param [in] value               The value to to broadcast.
290
///
291
/// @returns
292
/// A 2-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
293
///
294
/// @ingroup HLSL
295
FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)
296
{
297
    return FfxUInt32x2(value, value);
298
}
299

300
/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.
301
///
302
/// @param [in] value               The value to to broadcast.
303
///
304
/// @returns
305
/// A 3-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
306
///
307
/// @ingroup HLSL
308
FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)
309
{
310
    return FfxUInt32x3(value, value, value);
311
}
312

313
/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
314
///
315
/// @param [in] value               The value to to broadcast.
316
///
317
/// @returns
318
/// A 4-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
319
///
320
/// @ingroup HLSL
321
FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)
322
{
323
    return FfxUInt32x4(value, value, value, value);
324
}
325

326
FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)
327
{
328
    FfxUInt32 mask = (1u << bits) - 1;
329
    return (src >> off) & mask;
330
}
331

332
FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
333
{
334
    return (ins & mask) | (src & (~mask));
335
}
336

337
FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)
338
{
339
    FfxUInt32 mask = (1u << bits) - 1;
340
    return (ins & mask) | (src & (~mask));
341
}
342

343
/// Interprets the bit pattern of x as an unsigned integer.
344
///
345
/// @param [in] value               The input value.
346
///
347
/// @returns
348
/// The input interpreted as an unsigned integer.
349
///
350
/// @ingroup HLSL
351
FfxUInt32 ffxAsUInt32(FfxFloat32 x)
352
{
353
    return asuint(x);
354
}
355

356
/// Interprets the bit pattern of x as an unsigned integer.
357
///
358
/// @param [in] value               The input value.
359
///
360
/// @returns
361
/// The input interpreted as an unsigned integer.
362
///
363
/// @ingroup HLSL
364
FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)
365
{
366
    return asuint(x);
367
}
368

369
/// Interprets the bit pattern of x as an unsigned integer.
370
///
371
/// @param [in] value               The input value.
372
///
373
/// @returns
374
/// The input interpreted as an unsigned integer.
375
///
376
/// @ingroup HLSL
377
FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)
378
{
379
    return asuint(x);
380
}
381

382
/// Interprets the bit pattern of x as an unsigned integer.
383
///
384
/// @param [in] value               The input value.
385
///
386
/// @returns
387
/// The input interpreted as an unsigned integer.
388
///
389
/// @ingroup HLSL
390
FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)
391
{
392
    return asuint(x);
393
}
394

395
/// Interprets the bit pattern of x as a floating-point number.
396
///
397
/// @param [in] value               The input value.
398
///
399
/// @returns
400
/// The input interpreted as a floating-point number.
401
///
402
/// @ingroup HLSL
403
FfxFloat32 ffxAsFloat(FfxUInt32 x)
404
{
405
    return asfloat(x);
406
}
407

408
/// Interprets the bit pattern of x as a floating-point number.
409
///
410
/// @param [in] value               The input value.
411
///
412
/// @returns
413
/// The input interpreted as a floating-point number.
414
///
415
/// @ingroup HLSL
416
FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)
417
{
418
    return asfloat(x);
419
}
420

421
/// Interprets the bit pattern of x as a floating-point number.
422
///
423
/// @param [in] value               The input value.
424
///
425
/// @returns
426
/// The input interpreted as a floating-point number.
427
///
428
/// @ingroup HLSL
429
FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)
430
{
431
    return asfloat(x);
432
}
433

434
/// Interprets the bit pattern of x as a floating-point number.
435
///
436
/// @param [in] value               The input value.
437
///
438
/// @returns
439
/// The input interpreted as a floating-point number.
440
///
441
/// @ingroup HLSL
442
FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)
443
{
444
    return asfloat(x);
445
}
446

447
/// Compute the linear interopation between two values.
448
///
449
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
450
/// following math:
451
///
452
///     (1 - t) * x + t * y
453
///
454
/// @param [in] x               The first value to lerp between.
455
/// @param [in] y               The second value to lerp between.
456
/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
457
///
458
/// @returns
459
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
460
///
461
/// @ingroup HLSL
462
FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
463
{
464
    return lerp(x, y, t);
465
}
466

467
/// Compute the linear interopation between two values.
468
///
469
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
470
/// following math:
471
///
472
///     (1 - t) * x + t * y
473
///
474
/// @param [in] x               The first value to lerp between.
475
/// @param [in] y               The second value to lerp between.
476
/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
477
///
478
/// @returns
479
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
480
///
481
/// @ingroup HLSL
482
FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)
483
{
484
    return lerp(x, y, t);
485
}
486

487
/// Compute the linear interopation between two values.
488
///
489
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
490
/// following math:
491
///
492
///     (1 - t) * x + t * y
493
///
494
/// @param [in] x               The first value to lerp between.
495
/// @param [in] y               The second value to lerp between.
496
/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
497
///
498
/// @returns
499
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
500
///
501
/// @ingroup HLSL
502
FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)
503
{
504
    return lerp(x, y, t);
505
}
506

507
/// Compute the linear interopation between two values.
508
///
509
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
510
/// following math:
511
///
512
///     (1 - t) * x + t * y
513
///
514
/// @param [in] x               The first value to lerp between.
515
/// @param [in] y               The second value to lerp between.
516
/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
517
///
518
/// @returns
519
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
520
///
521
/// @ingroup HLSL
522
FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)
523
{
524
    return lerp(x, y, t);
525
}
526

527
/// Compute the linear interopation between two values.
528
///
529
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
530
/// following math:
531
///
532
///     (1 - t) * x + t * y
533
///
534
/// @param [in] x               The first value to lerp between.
535
/// @param [in] y               The second value to lerp between.
536
/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
537
///
538
/// @returns
539
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
540
///
541
/// @ingroup HLSL
542
FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)
543
{
544
    return lerp(x, y, t);
545
}
546

547
/// Compute the linear interopation between two values.
548
///
549
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
550
/// following math:
551
///
552
///     (1 - t) * x + t * y
553
///
554
/// @param [in] x               The first value to lerp between.
555
/// @param [in] y               The second value to lerp between.
556
/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
557
///
558
/// @returns
559
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
560
///
561
/// @ingroup HLSL
562
FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)
563
{
564
    return lerp(x, y, t);
565
}
566

567
/// Compute the linear interopation between two values.
568
///
569
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
570
/// following math:
571
///
572
///     (1 - t) * x + t * y
573
///
574
/// @param [in] x               The first value to lerp between.
575
/// @param [in] y               The second value to lerp between.
576
/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
577
///
578
/// @returns
579
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
580
///
581
/// @ingroup HLSL
582
FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)
583
{
584
    return lerp(x, y, t);
585
}
586

587
/// Clamp a value to a [0..1] range.
588
///
589
/// @param [in] x               The value to clamp to [0..1] range.
590
///
591
/// @returns
592
/// The clamped version of <c><i>x</i></c>.
593
///
594
/// @ingroup HLSL
595
FfxFloat32 ffxSaturate(FfxFloat32 x)
596
{
597
    return saturate(x);
598
}
599

600
/// Clamp a value to a [0..1] range.
601
///
602
/// @param [in] x               The value to clamp to [0..1] range.
603
///
604
/// @returns
605
/// The clamped version of <c><i>x</i></c>.
606
///
607
/// @ingroup HLSL
608
FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)
609
{
610
    return saturate(x);
611
}
612

613
/// Clamp a value to a [0..1] range.
614
///
615
/// @param [in] x               The value to clamp to [0..1] range.
616
///
617
/// @returns
618
/// The clamped version of <c><i>x</i></c>.
619
///
620
/// @ingroup HLSL
621
FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)
622
{
623
    return saturate(x);
624
}
625

626
/// Clamp a value to a [0..1] range.
627
///
628
/// @param [in] x               The value to clamp to [0..1] range.
629
///
630
/// @returns
631
/// The clamped version of <c><i>x</i></c>.
632
///
633
/// @ingroup HLSL
634
FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)
635
{
636
    return saturate(x);
637
}
638

639
/// Compute the factional part of a decimal value.
640
///
641
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
642
///
643
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
644
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
645
/// function.
646
///
647
/// @param [in] x               The value to compute the fractional part from.
648
/// 
649
/// @returns
650
/// The fractional part of <c><i>x</i></c>.
651
///
652
/// @ingroup HLSL
653
FfxFloat32 ffxFract(FfxFloat32 x)
654
{
655
    return x - floor(x);
656
}
657

658
/// Compute the factional part of a decimal value.
659
///
660
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
661
///
662
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
663
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
664
/// function.
665
///
666
/// @param [in] x               The value to compute the fractional part from.
667
///
668
/// @returns
669
/// The fractional part of <c><i>x</i></c>.
670
///
671
/// @ingroup HLSL
672
FfxFloat32x2 ffxFract(FfxFloat32x2 x)
673
{
674
    return x - floor(x);
675
}
676

677
/// Compute the factional part of a decimal value.
678
///
679
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
680
///
681
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
682
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
683
/// function.
684
///
685
/// @param [in] x               The value to compute the fractional part from.
686
///
687
/// @returns
688
/// The fractional part of <c><i>x</i></c>.
689
///
690
/// @ingroup HLSL
691
FfxFloat32x3 ffxFract(FfxFloat32x3 x)
692
{
693
    return x - floor(x);
694
}
695

696
/// Compute the factional part of a decimal value.
697
///
698
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
699
///
700
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
701
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic 
702
/// function.
703
///
704
/// @param [in] x               The value to compute the fractional part from.
705
///
706
/// @returns
707
/// The fractional part of <c><i>x</i></c>.
708
///
709
/// @ingroup HLSL
710
FfxFloat32x4 ffxFract(FfxFloat32x4 x)
711
{
712
    return x - floor(x);
713
}
714

715
/// Compute the maximum of three values.
716
///
717
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
718
/// 
719
/// @param [in] x               The first value to include in the max calculation.
720
/// @param [in] y               The second value to include in the max calcuation.
721
/// @param [in] z               The third value to include in the max calcuation.
722
/// 
723
/// @returns
724
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
725
///
726
/// @ingroup HLSL
727
FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
728
{
729
    return max(x, max(y, z));
730
}
731

732
/// Compute the maximum of three values.
733
///
734
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
735
/// 
736
/// @param [in] x               The first value to include in the max calculation.
737
/// @param [in] y               The second value to include in the max calcuation.
738
/// @param [in] z               The third value to include in the max calcuation.
739
///
740
/// @returns
741
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
742
///
743
/// @ingroup HLSL
744
FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
745
{
746
    return max(x, max(y, z));
747
}
748

749
/// Compute the maximum of three values.
750
///
751
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
752
/// 
753
/// @param [in] x               The first value to include in the max calculation.
754
/// @param [in] y               The second value to include in the max calcuation.
755
/// @param [in] z               The third value to include in the max calcuation.
756
///
757
/// @returns
758
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
759
///
760
/// @ingroup HLSL
761
FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
762
{
763
    return max(x, max(y, z));
764
}
765

766
/// Compute the maximum of three values.
767
///
768
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
769
///
770
/// @param [in] x               The first value to include in the max calculation.
771
/// @param [in] y               The second value to include in the max calcuation.
772
/// @param [in] z               The third value to include in the max calcuation.
773
///
774
/// @returns
775
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
776
///
777
/// @ingroup HLSL
778
FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
779
{
780
    return max(x, max(y, z));
781
}
782

783
/// Compute the maximum of three values.
784
///
785
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
786
///
787
/// @param [in] x               The first value to include in the max calculation.
788
/// @param [in] y               The second value to include in the max calcuation.
789
/// @param [in] z               The third value to include in the max calcuation.
790
///
791
/// @returns
792
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
793
///
794
/// @ingroup HLSL
795
FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
796
{
797
    return max(x, max(y, z));
798
}
799

800
/// Compute the maximum of three values.
801
///
802
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
803
///
804
/// @param [in] x               The first value to include in the max calculation.
805
/// @param [in] y               The second value to include in the max calcuation.
806
/// @param [in] z               The third value to include in the max calcuation.
807
///
808
/// @returns
809
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
810
///
811
/// @ingroup HLSL
812
FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
813
{
814
    return max(x, max(y, z));
815
}
816

817
/// Compute the maximum of three values.
818
///
819
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
820
///
821
/// @param [in] x               The first value to include in the max calculation.
822
/// @param [in] y               The second value to include in the max calcuation.
823
/// @param [in] z               The third value to include in the max calcuation.
824
///
825
/// @returns
826
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
827
///
828
/// @ingroup HLSL
829
FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
830
{
831
    return max(x, max(y, z));
832
}
833

834
/// Compute the maximum of three values.
835
///
836
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
837
///
838
/// @param [in] x               The first value to include in the max calculation.
839
/// @param [in] y               The second value to include in the max calcuation.
840
/// @param [in] z               The third value to include in the max calcuation.
841
///
842
/// @returns
843
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
844
///
845
/// @ingroup HLSL
846
FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
847
{
848
    return max(x, max(y, z));
849
}
850

851
/// Compute the median of three values.
852
///
853
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
854
///
855
/// @param [in] x               The first value to include in the median calculation.
856
/// @param [in] y               The second value to include in the median calcuation.
857
/// @param [in] z               The third value to include in the median calcuation.
858
///
859
/// @returns
860
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
861
///
862
/// @ingroup HLSL
863
FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
864
{
865
    return max(min(x, y), min(max(x, y), z));
866
}
867

868
/// Compute the median of three values.
869
///
870
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
871
///
872
/// @param [in] x               The first value to include in the median calculation.
873
/// @param [in] y               The second value to include in the median calcuation.
874
/// @param [in] z               The third value to include in the median calcuation.
875
///
876
/// @returns
877
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
878
///
879
/// @ingroup HLSL
880
FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
881
{
882
    return max(min(x, y), min(max(x, y), z));
883
}
884

885
/// Compute the median of three values.
886
///
887
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
888
///
889
/// @param [in] x               The first value to include in the median calculation.
890
/// @param [in] y               The second value to include in the median calcuation.
891
/// @param [in] z               The third value to include in the median calcuation.
892
///
893
/// @returns
894
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
895
///
896
/// @ingroup HLSL
897
FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
898
{
899
    return max(min(x, y), min(max(x, y), z));
900
}
901

902
/// Compute the median of three values.
903
///
904
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
905
///
906
/// @param [in] x               The first value to include in the median calculation.
907
/// @param [in] y               The second value to include in the median calcuation.
908
/// @param [in] z               The third value to include in the median calcuation.
909
///
910
/// @returns
911
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
912
///
913
/// @ingroup HLSL
914
FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
915
{
916
    return max(min(x, y), min(max(x, y), z));
917
}
918

919
/// Compute the median of three values.
920
///
921
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
922
///
923
/// @param [in] x               The first value to include in the median calculation.
924
/// @param [in] y               The second value to include in the median calcuation.
925
/// @param [in] z               The third value to include in the median calcuation.
926
///
927
/// @returns
928
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
929
///
930
/// @ingroup HLSL
931
FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z)
932
{
933
    return max(min(x, y), min(max(x, y), z));
934
    // return min(max(min(y, z), x), max(y, z));
935
    // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));
936
}
937

938
/// Compute the median of three values.
939
///
940
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
941
///
942
/// @param [in] x               The first value to include in the median calculation.
943
/// @param [in] y               The second value to include in the median calcuation.
944
/// @param [in] z               The third value to include in the median calcuation.
945
///
946
/// @returns
947
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
948
///
949
/// @ingroup HLSL
950
FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z)
951
{
952
    return max(min(x, y), min(max(x, y), z));
953
    // return min(max(min(y, z), x), max(y, z));
954
    // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));
955
}
956

957
/// Compute the median of three values.
958
///
959
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
960
///
961
/// @param [in] x               The first value to include in the median calculation.
962
/// @param [in] y               The second value to include in the median calcuation.
963
/// @param [in] z               The third value to include in the median calcuation.
964
///
965
/// @returns
966
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
967
///
968
/// @ingroup HLSL
969
FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z)
970
{
971
    return max(min(x, y), min(max(x, y), z));
972
}
973

974
/// Compute the median of three values.
975
///
976
/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on GCN/RDNA hardware.
977
///
978
/// @param [in] x               The first value to include in the median calculation.
979
/// @param [in] y               The second value to include in the median calcuation.
980
/// @param [in] z               The third value to include in the median calcuation.
981
///
982
/// @returns
983
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
984
///
985
/// @ingroup HLSL
986
FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)
987
{
988
    return max(min(x, y), min(max(x, y), z));
989
}
990

991
/// Compute the minimum of three values.
992
///
993
/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</i></c> operation on GCN/RDNA hardware.
994
///
995
/// @param [in] x               The first value to include in the min calculation.
996
/// @param [in] y               The second value to include in the min calcuation.
997
/// @param [in] z               The third value to include in the min calcuation.
998
///
999
/// @returns
1000
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1001
///
1002
/// @ingroup HLSL
1003
FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
1004
{
1005
    return min(x, min(y, z));
1006
}
1007

1008
/// Compute the minimum of three values.
1009
///
1010
/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</i></c> operation on GCN/RDNA hardware.
1011
///
1012
/// @param [in] x               The first value to include in the min calculation.
1013
/// @param [in] y               The second value to include in the min calcuation.
1014
/// @param [in] z               The third value to include in the min calcuation.
1015
///
1016
/// @returns
1017
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1018
///
1019
/// @ingroup HLSL
1020
FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
1021
{
1022
    return min(x, min(y, z));
1023
}
1024

1025
/// Compute the minimum of three values.
1026
///
1027
/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</c></i> operation on GCN/RDNA hardware.
1028
///
1029
/// @param [in] x               The first value to include in the min calculation.
1030
/// @param [in] y               The second value to include in the min calcuation.
1031
/// @param [in] z               The third value to include in the min calcuation.
1032
///
1033
/// @returns
1034
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1035
///
1036
/// @ingroup HLSL
1037
FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
1038
{
1039
    return min(x, min(y, z));
1040
}
1041

1042
/// Compute the minimum of three values.
1043
///
1044
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
1045
///
1046
/// @param [in] x               The first value to include in the min calculation.
1047
/// @param [in] y               The second value to include in the min calcuation.
1048
/// @param [in] z               The third value to include in the min calcuation.
1049
///
1050
/// @returns
1051
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1052
///
1053
/// @ingroup HLSL
1054
FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
1055
{
1056
    return min(x, min(y, z));
1057
}
1058

1059
/// Compute the minimum of three values.
1060
///
1061
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
1062
///
1063
/// @param [in] x               The first value to include in the min calculation.
1064
/// @param [in] y               The second value to include in the min calcuation.
1065
/// @param [in] z               The third value to include in the min calcuation.
1066
///
1067
/// @returns
1068
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1069
///
1070
/// @ingroup HLSL
1071
FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
1072
{
1073
    return min(x, min(y, z));
1074
}
1075

1076
/// Compute the minimum of three values.
1077
///
1078
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
1079
///
1080
/// @param [in] x               The first value to include in the min calculation.
1081
/// @param [in] y               The second value to include in the min calcuation.
1082
/// @param [in] z               The third value to include in the min calcuation.
1083
///
1084
/// @returns
1085
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1086
///
1087
/// @ingroup HLSL
1088
FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
1089
{
1090
    return min(x, min(y, z));
1091
}
1092

1093
/// Compute the minimum of three values.
1094
///
1095
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
1096
///
1097
/// @param [in] x               The first value to include in the min calculation.
1098
/// @param [in] y               The second value to include in the min calcuation.
1099
/// @param [in] z               The third value to include in the min calcuation.
1100
///
1101
/// @returns
1102
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1103
///
1104
/// @ingroup HLSL
1105
FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
1106
{
1107
    return min(x, min(y, z));
1108
}
1109

1110
/// Compute the minimum of three values.
1111
///
1112
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
1113
///
1114
/// @param [in] x               The first value to include in the min calculation.
1115
/// @param [in] y               The second value to include in the min calcuation.
1116
/// @param [in] z               The third value to include in the min calcuation.
1117
///
1118
/// @returns
1119
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1120
///
1121
/// @ingroup HLSL
1122
FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
1123
{
1124
    return min(x, min(y, z));
1125
}
1126

1127

1128
FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
1129
{
1130
    return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
1131
}
1132

1133
//==============================================================================================================================
1134
//                                                          HLSL HALF
1135
//==============================================================================================================================
1136
#if FFX_HALF
1137

1138
//==============================================================================================================================
1139
// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly).
1140
// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/
1141
FFX_MIN16_F2 ffxUint32ToFloat16x2(FfxUInt32 x)
1142
{
1143
	FfxFloat32x2 t = f16tof32(FfxUInt32x2(x & 0xFFFF, x >> 16));
1144
	return FFX_MIN16_F2(t);
1145
}
1146
FFX_MIN16_F4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x)
1147
{
1148
	return FFX_MIN16_F4(ffxUint32ToFloat16x2(x.x), ffxUint32ToFloat16x2(x.y));
1149
}
1150
FFX_MIN16_U2 ffxUint32ToUint16x2(FfxUInt32 x)
1151
{
1152
	FfxUInt32x2 t = FfxUInt32x2(x & 0xFFFF, x >> 16);
1153
	return FFX_MIN16_U2(t);
1154
}
1155
FFX_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x)
1156
{
1157
	return FFX_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y));
1158
}
1159
#define FFX_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x))
1160
#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x))
1161
#define FFX_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x))
1162
#define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x))
1163
//------------------------------------------------------------------------------------------------------------------------------
1164
FfxUInt32 FFX_MIN16_F2ToUint32(FFX_MIN16_F2 x)
1165
{
1166
	return f32tof16(x.x) + (f32tof16(x.y) << 16);
1167
}
1168
FfxUInt32x2 FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4 x)
1169
{
1170
	return FfxUInt32x2(FFX_MIN16_F2ToUint32(x.xy), FFX_MIN16_F2ToUint32(x.zw));
1171
}
1172
FfxUInt32 FFX_MIN16_U2ToUint32(FFX_MIN16_U2 x)
1173
{
1174
	return FfxUInt32(x.x) + (FfxUInt32(x.y) << 16);
1175
}
1176
FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x)
1177
{
1178
	return FfxUInt32x2(FFX_MIN16_U2ToUint32(x.xy), FFX_MIN16_U2ToUint32(x.zw));
1179
}
1180
#define FFX_FLOAT16X2_TO_UINT32(x) FFX_MIN16_F2ToUint32(FFX_MIN16_F2(x))
1181
#define FFX_FLOAT16X4_TO_UINT32X2(x) FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4(x))
1182
#define FFX_UINT16X2_TO_UINT32(x) FFX_MIN16_U2ToUint32(FFX_MIN16_U2(x))
1183
#define FFX_UINT16X4_TO_UINT32X2(x) FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4(x))
1184

1185
#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
1186
#define FFX_TO_UINT16(x) asuint16(x)
1187
#define FFX_TO_UINT16X2(x) asuint16(x)
1188
#define FFX_TO_UINT16X3(x) asuint16(x)
1189
#define FFX_TO_UINT16X4(x) asuint16(x)
1190
#else
1191
#define FFX_TO_UINT16(a) FFX_MIN16_U(f32tof16(FfxFloat32(a)))
1192
#define FFX_TO_UINT16X2(a) FFX_MIN16_U2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y))
1193
#define FFX_TO_UINT16X3(a) FFX_MIN16_U3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z))
1194
#define FFX_TO_UINT16X4(a) FFX_MIN16_U4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w))
1195
#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
1196

1197
#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
1198
#define FFX_TO_FLOAT16(x) asfloat16(x)
1199
#define FFX_TO_FLOAT16X2(x) asfloat16(x)
1200
#define FFX_TO_FLOAT16X3(x) asfloat16(x)
1201
#define FFX_TO_FLOAT16X4(x) asfloat16(x)
1202
#else
1203
#define FFX_TO_FLOAT16(a) FFX_MIN16_F(f16tof32(FfxUInt32(a)))
1204
#define FFX_TO_FLOAT16X2(a) FFX_MIN16_F2(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y))
1205
#define FFX_TO_FLOAT16X3(a) FFX_MIN16_F3(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z))
1206
#define FFX_TO_FLOAT16X4(a) FFX_MIN16_F4(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z), FFX_TO_FLOAT16((a).w))
1207
#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
1208

1209
//==============================================================================================================================
1210
#define FFX_BROADCAST_FLOAT16(a)   FFX_MIN16_F(a)
1211
#define FFX_BROADCAST_FLOAT16X2(a) FFX_MIN16_F(a)
1212
#define FFX_BROADCAST_FLOAT16X3(a) FFX_MIN16_F(a)
1213
#define FFX_BROADCAST_FLOAT16X4(a) FFX_MIN16_F(a)
1214

1215
//------------------------------------------------------------------------------------------------------------------------------
1216
#define FFX_BROADCAST_INT16(a)   FFX_MIN16_I(a)
1217
#define FFX_BROADCAST_INT16X2(a) FFX_MIN16_I(a)
1218
#define FFX_BROADCAST_INT16X3(a) FFX_MIN16_I(a)
1219
#define FFX_BROADCAST_INT16X4(a) FFX_MIN16_I(a)
1220

1221
//------------------------------------------------------------------------------------------------------------------------------
1222
#define FFX_BROADCAST_UINT16(a)   FFX_MIN16_U(a)
1223
#define FFX_BROADCAST_UINT16X2(a) FFX_MIN16_U(a)
1224
#define FFX_BROADCAST_UINT16X3(a) FFX_MIN16_U(a)
1225
#define FFX_BROADCAST_UINT16X4(a) FFX_MIN16_U(a)
1226

1227
//==============================================================================================================================
1228
FFX_MIN16_U ffxAbsHalf(FFX_MIN16_U a)
1229
{
1230
	return FFX_MIN16_U(abs(FFX_MIN16_I(a)));
1231
}
1232
FFX_MIN16_U2 ffxAbsHalf(FFX_MIN16_U2 a)
1233
{
1234
	return FFX_MIN16_U2(abs(FFX_MIN16_I2(a)));
1235
}
1236
FFX_MIN16_U3 ffxAbsHalf(FFX_MIN16_U3 a)
1237
{
1238
	return FFX_MIN16_U3(abs(FFX_MIN16_I3(a)));
1239
}
1240
FFX_MIN16_U4 ffxAbsHalf(FFX_MIN16_U4 a)
1241
{
1242
	return FFX_MIN16_U4(abs(FFX_MIN16_I4(a)));
1243
}
1244
//------------------------------------------------------------------------------------------------------------------------------
1245
FFX_MIN16_F ffxClampHalf(FFX_MIN16_F x, FFX_MIN16_F n, FFX_MIN16_F m)
1246
{
1247
	return max(n, min(x, m));
1248
}
1249
FFX_MIN16_F2 ffxClampHalf(FFX_MIN16_F2 x, FFX_MIN16_F2 n, FFX_MIN16_F2 m)
1250
{
1251
	return max(n, min(x, m));
1252
}
1253
FFX_MIN16_F3 ffxClampHalf(FFX_MIN16_F3 x, FFX_MIN16_F3 n, FFX_MIN16_F3 m)
1254
{
1255
	return max(n, min(x, m));
1256
}
1257
FFX_MIN16_F4 ffxClampHalf(FFX_MIN16_F4 x, FFX_MIN16_F4 n, FFX_MIN16_F4 m)
1258
{
1259
	return max(n, min(x, m));
1260
}
1261
//------------------------------------------------------------------------------------------------------------------------------
1262
// V_FRACT_F16 (note DX frac() is different).
1263
FFX_MIN16_F ffxFract(FFX_MIN16_F x)
1264
{
1265
	return x - floor(x);
1266
}
1267
FFX_MIN16_F2 ffxFract(FFX_MIN16_F2 x)
1268
{
1269
	return x - floor(x);
1270
}
1271
FFX_MIN16_F3 ffxFract(FFX_MIN16_F3 x)
1272
{
1273
	return x - floor(x);
1274
}
1275
FFX_MIN16_F4 ffxFract(FFX_MIN16_F4 x)
1276
{
1277
	return x - floor(x);
1278
}
1279
//------------------------------------------------------------------------------------------------------------------------------
1280
FFX_MIN16_F ffxLerp(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F a)
1281
{
1282
	return lerp(x, y, a);
1283
}
1284
FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F a)
1285
{
1286
	return lerp(x, y, a);
1287
}
1288
FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 a)
1289
{
1290
	return lerp(x, y, a);
1291
}
1292
FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F a)
1293
{
1294
	return lerp(x, y, a);
1295
}
1296
FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 a)
1297
{
1298
	return lerp(x, y, a);
1299
}
1300
FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F a)
1301
{
1302
	return lerp(x, y, a);
1303
}
1304
FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 a)
1305
{
1306
	return lerp(x, y, a);
1307
}
1308
//------------------------------------------------------------------------------------------------------------------------------
1309
FFX_MIN16_F ffxMax3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
1310
{
1311
	return max(x, max(y, z));
1312
}
1313
FFX_MIN16_F2 ffxMax3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
1314
{
1315
	return max(x, max(y, z));
1316
}
1317
FFX_MIN16_F3 ffxMax3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
1318
{
1319
	return max(x, max(y, z));
1320
}
1321
FFX_MIN16_F4 ffxMax3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
1322
{
1323
	return max(x, max(y, z));
1324
}
1325
//------------------------------------------------------------------------------------------------------------------------------
1326
FFX_MIN16_F ffxMin3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
1327
{
1328
	return min(x, min(y, z));
1329
}
1330
FFX_MIN16_F2 ffxMin3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
1331
{
1332
	return min(x, min(y, z));
1333
}
1334
FFX_MIN16_F3 ffxMin3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
1335
{
1336
	return min(x, min(y, z));
1337
}
1338
FFX_MIN16_F4 ffxMin3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
1339
{
1340
	return min(x, min(y, z));
1341
}
1342
//------------------------------------------------------------------------------------------------------------------------------
1343
FFX_MIN16_F ffxMed3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
1344
{
1345
    return max(min(x, y), min(max(x, y), z));
1346
}
1347
FFX_MIN16_F2 ffxMed3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
1348
{
1349
    return max(min(x, y), min(max(x, y), z));
1350
}
1351
FFX_MIN16_F3 ffxMed3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
1352
{
1353
    return max(min(x, y), min(max(x, y), z));
1354
}
1355
FFX_MIN16_F4 ffxMed3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
1356
{
1357
    return max(min(x, y), min(max(x, y), z));
1358
}
1359
//------------------------------------------------------------------------------------------------------------------------------
1360
FFX_MIN16_I ffxMed3Half(FFX_MIN16_I x, FFX_MIN16_I y, FFX_MIN16_I z)
1361
{
1362
    return max(min(x, y), min(max(x, y), z));
1363
}
1364
FFX_MIN16_I2 ffxMed3Half(FFX_MIN16_I2 x, FFX_MIN16_I2 y, FFX_MIN16_I2 z)
1365
{
1366
    return max(min(x, y), min(max(x, y), z));
1367
}
1368
FFX_MIN16_I3 ffxMed3Half(FFX_MIN16_I3 x, FFX_MIN16_I3 y, FFX_MIN16_I3 z)
1369
{
1370
    return max(min(x, y), min(max(x, y), z));
1371
}
1372
FFX_MIN16_I4 ffxMed3Half(FFX_MIN16_I4 x, FFX_MIN16_I4 y, FFX_MIN16_I4 z)
1373
{
1374
    return max(min(x, y), min(max(x, y), z));
1375
}
1376
//------------------------------------------------------------------------------------------------------------------------------
1377
FFX_MIN16_F ffxReciprocalHalf(FFX_MIN16_F x)
1378
{
1379
	return rcp(x);
1380
}
1381
FFX_MIN16_F2 ffxReciprocalHalf(FFX_MIN16_F2 x)
1382
{
1383
	return rcp(x);
1384
}
1385
FFX_MIN16_F3 ffxReciprocalHalf(FFX_MIN16_F3 x)
1386
{
1387
	return rcp(x);
1388
}
1389
FFX_MIN16_F4 ffxReciprocalHalf(FFX_MIN16_F4 x)
1390
{
1391
	return rcp(x);
1392
}
1393
//------------------------------------------------------------------------------------------------------------------------------
1394
FFX_MIN16_F ffxReciprocalSquareRootHalf(FFX_MIN16_F x)
1395
{
1396
	return rsqrt(x);
1397
}
1398
FFX_MIN16_F2 ffxReciprocalSquareRootHalf(FFX_MIN16_F2 x)
1399
{
1400
	return rsqrt(x);
1401
}
1402
FFX_MIN16_F3 ffxReciprocalSquareRootHalf(FFX_MIN16_F3 x)
1403
{
1404
	return rsqrt(x);
1405
}
1406
FFX_MIN16_F4 ffxReciprocalSquareRootHalf(FFX_MIN16_F4 x)
1407
{
1408
	return rsqrt(x);
1409
}
1410
//------------------------------------------------------------------------------------------------------------------------------
1411
FFX_MIN16_F ffxSaturate(FFX_MIN16_F x)
1412
{
1413
	return saturate(x);
1414
}
1415
FFX_MIN16_F2 ffxSaturate(FFX_MIN16_F2 x)
1416
{
1417
	return saturate(x);
1418
}
1419
FFX_MIN16_F3 ffxSaturate(FFX_MIN16_F3 x)
1420
{
1421
	return saturate(x);
1422
}
1423
FFX_MIN16_F4 ffxSaturate(FFX_MIN16_F4 x)
1424
{
1425
	return saturate(x);
1426
}
1427
//------------------------------------------------------------------------------------------------------------------------------
1428
FFX_MIN16_U ffxBitShiftRightHalf(FFX_MIN16_U a, FFX_MIN16_U b)
1429
{
1430
	return FFX_MIN16_U(FFX_MIN16_I(a) >> FFX_MIN16_I(b));
1431
}
1432
FFX_MIN16_U2 ffxBitShiftRightHalf(FFX_MIN16_U2 a, FFX_MIN16_U2 b)
1433
{
1434
	return FFX_MIN16_U2(FFX_MIN16_I2(a) >> FFX_MIN16_I2(b));
1435
}
1436
FFX_MIN16_U3 ffxBitShiftRightHalf(FFX_MIN16_U3 a, FFX_MIN16_U3 b)
1437
{
1438
	return FFX_MIN16_U3(FFX_MIN16_I3(a) >> FFX_MIN16_I3(b));
1439
}
1440
FFX_MIN16_U4 ffxBitShiftRightHalf(FFX_MIN16_U4 a, FFX_MIN16_U4 b)
1441
{
1442
	return FFX_MIN16_U4(FFX_MIN16_I4(a) >> FFX_MIN16_I4(b));
1443
}
1444
#endif // FFX_HALF
1445

1446
//==============================================================================================================================
1447
//                                                         HLSL WAVE
1448
//==============================================================================================================================
1449
#if defined(FFX_WAVE)
1450
// Where 'x' must be a compile time literal.
1451
FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x)
1452
{
1453
    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1454
}
1455
FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)
1456
{
1457
    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1458
}
1459
FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)
1460
{
1461
    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1462
}
1463
FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)
1464
{
1465
    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1466
}
1467
FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x)
1468
{
1469
    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1470
}
1471
FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x)
1472
{
1473
    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1474
}
1475
FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x)
1476
{
1477
    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1478
}
1479
FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x)
1480
{
1481
    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1482
}
1483

1484
#if FFX_HALF
1485
FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x)
1486
{
1487
    return FFX_UINT32_TO_FLOAT16X2(WaveReadLaneAt(FFX_FLOAT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));
1488
}
1489
FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x)
1490
{
1491
    return FFX_UINT32X2_TO_FLOAT16X4(WaveReadLaneAt(FFX_FLOAT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x));
1492
}
1493
FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x)
1494
{
1495
    return FFX_UINT32_TO_UINT16X2(WaveReadLaneAt(FFX_UINT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));
1496
}
1497
FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x)
1498
{
1499
    return AW4_FFX_UINT32(WaveReadLaneAt(FFX_UINT32_AW4(v), WaveGetLaneIndex() ^ x));
1500
}
1501
#endif // FFX_HALF
1502
#endif // #if defined(FFX_WAVE)
1503

1504
Product

Resources

Company