Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h
9899 views
1
// This file is part of the FidelityFX SDK.
2
//
3
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
4
//
5
// Permission is hereby granted, free of charge, to any person obtaining a copy
6
// of this software and associated documentation files (the "Software"), to deal
7
// in the Software without restriction, including without limitation the rights
8
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
// copies of the Software, and to permit persons to whom the Software is
10
// furnished to do so, subject to the following conditions:
11
// The above copyright notice and this permission notice shall be included in
12
// all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
// THE SOFTWARE.
21
22
/// A define for abstracting shared memory between shading languages.
23
///
24
/// @ingroup GPU
25
#define FFX_GROUPSHARED groupshared
26
27
/// A define for abstracting compute memory barriers between shading languages.
28
///
29
/// @ingroup GPU
30
#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync
31
32
/// A define added to accept static markup on functions to aid CPU/GPU portability of code.
33
///
34
/// @ingroup GPU
35
#define FFX_STATIC static
36
37
/// A define for abstracting loop unrolling between shading languages.
38
///
39
/// @ingroup GPU
40
#define FFX_UNROLL [unroll]
41
42
/// A define for abstracting a 'greater than' comparison operator between two types.
43
///
44
/// @ingroup GPU
45
#define FFX_GREATER_THAN(x, y) x > y
46
47
/// A define for abstracting a 'greater than or equal' comparison operator between two types.
48
///
49
/// @ingroup GPU
50
#define FFX_GREATER_THAN_EQUAL(x, y) x >= y
51
52
/// A define for abstracting a 'less than' comparison operator between two types.
53
///
54
/// @ingroup GPU
55
#define FFX_LESS_THAN(x, y) x < y
56
57
/// A define for abstracting a 'less than or equal' comparison operator between two types.
58
///
59
/// @ingroup GPU
60
#define FFX_LESS_THAN_EQUAL(x, y) x <= y
61
62
/// A define for abstracting an 'equal' comparison operator between two types.
63
///
64
/// @ingroup GPU
65
#define FFX_EQUAL(x, y) x == y
66
67
/// A define for abstracting a 'not equal' comparison operator between two types.
68
///
69
/// @ingroup GPU
70
#define FFX_NOT_EQUAL(x, y) x != y
71
72
/// Broadcast a scalar value to a 1-dimensional floating point vector.
73
///
74
/// @ingroup GPU
75
#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x)
76
77
/// Broadcast a scalar value to a 2-dimensional floating point vector.
78
///
79
/// @ingroup GPU
80
#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32(x)
81
82
/// Broadcast a scalar value to a 3-dimensional floating point vector.
83
///
84
/// @ingroup GPU
85
#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32(x)
86
87
/// Broadcast a scalar value to a 4-dimensional floating point vector.
88
///
89
/// @ingroup GPU
90
#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32(x)
91
92
/// Broadcast a scalar value to a 1-dimensional unsigned integer vector.
93
///
94
/// @ingroup GPU
95
#define FFX_BROADCAST_UINT32(x) FfxUInt32(x)
96
97
/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
98
///
99
/// @ingroup GPU
100
#define FFX_BROADCAST_UINT32X2(x) FfxUInt32(x)
101
102
/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
103
///
104
/// @ingroup GPU
105
#define FFX_BROADCAST_UINT32X3(x) FfxUInt32(x)
106
107
/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
108
///
109
/// @ingroup GPU
110
#define FFX_BROADCAST_UINT32X4(x) FfxUInt32(x)
111
112
/// Broadcast a scalar value to a 1-dimensional signed integer vector.
113
///
114
/// @ingroup GPU
115
#define FFX_BROADCAST_INT32(x) FfxInt32(x)
116
117
/// Broadcast a scalar value to a 2-dimensional signed integer vector.
118
///
119
/// @ingroup GPU
120
#define FFX_BROADCAST_INT32X2(x) FfxInt32(x)
121
122
/// Broadcast a scalar value to a 3-dimensional signed integer vector.
123
///
124
/// @ingroup GPU
125
#define FFX_BROADCAST_INT32X3(x) FfxInt32(x)
126
127
/// Broadcast a scalar value to a 4-dimensional signed integer vector.
128
///
129
/// @ingroup GPU
130
#define FFX_BROADCAST_INT32X4(x) FfxInt32(x)
131
132
/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector.
133
///
134
/// @ingroup GPU
135
#define FFX_BROADCAST_MIN_FLOAT16(a) FFX_MIN16_F(a)
136
137
/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector.
138
///
139
/// @ingroup GPU
140
#define FFX_BROADCAST_MIN_FLOAT16X2(a) FFX_MIN16_F(a)
141
142
/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector.
143
///
144
/// @ingroup GPU
145
#define FFX_BROADCAST_MIN_FLOAT16X3(a) FFX_MIN16_F(a)
146
147
/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector.
148
///
149
/// @ingroup GPU
150
#define FFX_BROADCAST_MIN_FLOAT16X4(a) FFX_MIN16_F(a)
151
152
/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector.
153
///
154
/// @ingroup GPU
155
#define FFX_BROADCAST_MIN_UINT16(a) FFX_MIN16_U(a)
156
157
/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector.
158
///
159
/// @ingroup GPU
160
#define FFX_BROADCAST_MIN_UINT16X2(a) FFX_MIN16_U(a)
161
162
/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector.
163
///
164
/// @ingroup GPU
165
#define FFX_BROADCAST_MIN_UINT16X3(a) FFX_MIN16_U(a)
166
167
/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector.
168
///
169
/// @ingroup GPU
170
#define FFX_BROADCAST_MIN_UINT16X4(a) FFX_MIN16_U(a)
171
172
/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector.
173
///
174
/// @ingroup GPU
175
#define FFX_BROADCAST_MIN_INT16(a) FFX_MIN16_I(a)
176
177
/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector.
178
///
179
/// @ingroup GPU
180
#define FFX_BROADCAST_MIN_INT16X2(a) FFX_MIN16_I(a)
181
182
/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector.
183
///
184
/// @ingroup GPU
185
#define FFX_BROADCAST_MIN_INT16X3(a) FFX_MIN16_I(a)
186
187
/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector.
188
///
189
/// @ingroup GPU
190
#define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a)
191
192
/// Pack 2x32-bit floating point values in a single 32bit value.
193
///
194
/// This function first converts each component of <c><i>value</i></c> into their nearest 16-bit floating
195
/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
196
/// 32bit unsigned integer respectively.
197
///
198
/// @param [in] value A 2-dimensional floating point value to convert and pack.
199
///
200
/// @returns
201
/// A packed 32bit value containing 2 16bit floating point values.
202
///
203
/// @ingroup HLSL
204
FfxUInt32 packHalf2x16(FfxFloat32x2 value)
205
{
206
return f32tof16(value.x) | (f32tof16(value.y) << 16);
207
}
208
209
/// Broadcast a scalar value to a 2-dimensional floating point vector.
210
///
211
/// @param [in] value The value to to broadcast.
212
///
213
/// @returns
214
/// A 2-dimensional floating point vector with <c><i>value</i></c> in each component.
215
///
216
/// @ingroup HLSL
217
FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)
218
{
219
return FfxFloat32x2(value, value);
220
}
221
222
/// Broadcast a scalar value to a 3-dimensional floating point vector.
223
///
224
/// @param [in] value The value to to broadcast.
225
///
226
/// @returns
227
/// A 3-dimensional floating point vector with <c><i>value</i></c> in each component.
228
///
229
/// @ingroup HLSL
230
FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)
231
{
232
return FfxFloat32x3(value, value, value);
233
}
234
235
/// Broadcast a scalar value to a 4-dimensional floating point vector.
236
///
237
/// @param [in] value The value to to broadcast.
238
///
239
/// @returns
240
/// A 4-dimensional floating point vector with <c><i>value</i></c> in each component.
241
///
242
/// @ingroup HLSL
243
FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)
244
{
245
return FfxFloat32x4(value, value, value, value);
246
}
247
248
/// Broadcast a scalar value to a 2-dimensional signed integer vector.
249
///
250
/// @param [in] value The value to to broadcast.
251
///
252
/// @returns
253
/// A 2-dimensional signed integer vector with <c><i>value</i></c> in each component.
254
///
255
/// @ingroup HLSL
256
FfxInt32x2 ffxBroadcast2(FfxInt32 value)
257
{
258
return FfxInt32x2(value, value);
259
}
260
261
/// Broadcast a scalar value to a 3-dimensional signed integer vector.
262
///
263
/// @param [in] value The value to to broadcast.
264
///
265
/// @returns
266
/// A 3-dimensional signed integer vector with <c><i>value</i></c> in each component.
267
///
268
/// @ingroup HLSL
269
FfxUInt32x3 ffxBroadcast3(FfxInt32 value)
270
{
271
return FfxUInt32x3(value, value, value);
272
}
273
274
/// Broadcast a scalar value to a 4-dimensional signed integer vector.
275
///
276
/// @param [in] value The value to to broadcast.
277
///
278
/// @returns
279
/// A 4-dimensional signed integer vector with <c><i>value</i></c> in each component.
280
///
281
/// @ingroup HLSL
282
FfxInt32x4 ffxBroadcast4(FfxInt32 value)
283
{
284
return FfxInt32x4(value, value, value, value);
285
}
286
287
/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
288
///
289
/// @param [in] value The value to to broadcast.
290
///
291
/// @returns
292
/// A 2-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
293
///
294
/// @ingroup HLSL
295
FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)
296
{
297
return FfxUInt32x2(value, value);
298
}
299
300
/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.
301
///
302
/// @param [in] value The value to to broadcast.
303
///
304
/// @returns
305
/// A 3-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
306
///
307
/// @ingroup HLSL
308
FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)
309
{
310
return FfxUInt32x3(value, value, value);
311
}
312
313
/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
314
///
315
/// @param [in] value The value to to broadcast.
316
///
317
/// @returns
318
/// A 4-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
319
///
320
/// @ingroup HLSL
321
FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)
322
{
323
return FfxUInt32x4(value, value, value, value);
324
}
325
326
FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)
327
{
328
FfxUInt32 mask = (1u << bits) - 1;
329
return (src >> off) & mask;
330
}
331
332
FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
333
{
334
return (ins & mask) | (src & (~mask));
335
}
336
337
FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)
338
{
339
FfxUInt32 mask = (1u << bits) - 1;
340
return (ins & mask) | (src & (~mask));
341
}
342
343
/// Interprets the bit pattern of x as an unsigned integer.
344
///
345
/// @param [in] value The input value.
346
///
347
/// @returns
348
/// The input interpreted as an unsigned integer.
349
///
350
/// @ingroup HLSL
351
FfxUInt32 ffxAsUInt32(FfxFloat32 x)
352
{
353
return asuint(x);
354
}
355
356
/// Interprets the bit pattern of x as an unsigned integer.
357
///
358
/// @param [in] value The input value.
359
///
360
/// @returns
361
/// The input interpreted as an unsigned integer.
362
///
363
/// @ingroup HLSL
364
FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)
365
{
366
return asuint(x);
367
}
368
369
/// Interprets the bit pattern of x as an unsigned integer.
370
///
371
/// @param [in] value The input value.
372
///
373
/// @returns
374
/// The input interpreted as an unsigned integer.
375
///
376
/// @ingroup HLSL
377
FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)
378
{
379
return asuint(x);
380
}
381
382
/// Interprets the bit pattern of x as an unsigned integer.
383
///
384
/// @param [in] value The input value.
385
///
386
/// @returns
387
/// The input interpreted as an unsigned integer.
388
///
389
/// @ingroup HLSL
390
FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)
391
{
392
return asuint(x);
393
}
394
395
/// Interprets the bit pattern of x as a floating-point number.
396
///
397
/// @param [in] value The input value.
398
///
399
/// @returns
400
/// The input interpreted as a floating-point number.
401
///
402
/// @ingroup HLSL
403
FfxFloat32 ffxAsFloat(FfxUInt32 x)
404
{
405
return asfloat(x);
406
}
407
408
/// Interprets the bit pattern of x as a floating-point number.
409
///
410
/// @param [in] value The input value.
411
///
412
/// @returns
413
/// The input interpreted as a floating-point number.
414
///
415
/// @ingroup HLSL
416
FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)
417
{
418
return asfloat(x);
419
}
420
421
/// Interprets the bit pattern of x as a floating-point number.
422
///
423
/// @param [in] value The input value.
424
///
425
/// @returns
426
/// The input interpreted as a floating-point number.
427
///
428
/// @ingroup HLSL
429
FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)
430
{
431
return asfloat(x);
432
}
433
434
/// Interprets the bit pattern of x as a floating-point number.
435
///
436
/// @param [in] value The input value.
437
///
438
/// @returns
439
/// The input interpreted as a floating-point number.
440
///
441
/// @ingroup HLSL
442
FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)
443
{
444
return asfloat(x);
445
}
446
447
/// Compute the linear interopation between two values.
448
///
449
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
450
/// following math:
451
///
452
/// (1 - t) * x + t * y
453
///
454
/// @param [in] x The first value to lerp between.
455
/// @param [in] y The second value to lerp between.
456
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
457
///
458
/// @returns
459
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
460
///
461
/// @ingroup HLSL
462
FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
463
{
464
return lerp(x, y, t);
465
}
466
467
/// Compute the linear interopation between two values.
468
///
469
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
470
/// following math:
471
///
472
/// (1 - t) * x + t * y
473
///
474
/// @param [in] x The first value to lerp between.
475
/// @param [in] y The second value to lerp between.
476
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
477
///
478
/// @returns
479
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
480
///
481
/// @ingroup HLSL
482
FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)
483
{
484
return lerp(x, y, t);
485
}
486
487
/// Compute the linear interopation between two values.
488
///
489
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
490
/// following math:
491
///
492
/// (1 - t) * x + t * y
493
///
494
/// @param [in] x The first value to lerp between.
495
/// @param [in] y The second value to lerp between.
496
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
497
///
498
/// @returns
499
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
500
///
501
/// @ingroup HLSL
502
FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)
503
{
504
return lerp(x, y, t);
505
}
506
507
/// Compute the linear interopation between two values.
508
///
509
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
510
/// following math:
511
///
512
/// (1 - t) * x + t * y
513
///
514
/// @param [in] x The first value to lerp between.
515
/// @param [in] y The second value to lerp between.
516
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
517
///
518
/// @returns
519
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
520
///
521
/// @ingroup HLSL
522
FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)
523
{
524
return lerp(x, y, t);
525
}
526
527
/// Compute the linear interopation between two values.
528
///
529
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
530
/// following math:
531
///
532
/// (1 - t) * x + t * y
533
///
534
/// @param [in] x The first value to lerp between.
535
/// @param [in] y The second value to lerp between.
536
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
537
///
538
/// @returns
539
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
540
///
541
/// @ingroup HLSL
542
FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)
543
{
544
return lerp(x, y, t);
545
}
546
547
/// Compute the linear interopation between two values.
548
///
549
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
550
/// following math:
551
///
552
/// (1 - t) * x + t * y
553
///
554
/// @param [in] x The first value to lerp between.
555
/// @param [in] y The second value to lerp between.
556
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
557
///
558
/// @returns
559
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
560
///
561
/// @ingroup HLSL
562
FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)
563
{
564
return lerp(x, y, t);
565
}
566
567
/// Compute the linear interopation between two values.
568
///
569
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
570
/// following math:
571
///
572
/// (1 - t) * x + t * y
573
///
574
/// @param [in] x The first value to lerp between.
575
/// @param [in] y The second value to lerp between.
576
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
577
///
578
/// @returns
579
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
580
///
581
/// @ingroup HLSL
582
FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)
583
{
584
return lerp(x, y, t);
585
}
586
587
/// Clamp a value to a [0..1] range.
588
///
589
/// @param [in] x The value to clamp to [0..1] range.
590
///
591
/// @returns
592
/// The clamped version of <c><i>x</i></c>.
593
///
594
/// @ingroup HLSL
595
FfxFloat32 ffxSaturate(FfxFloat32 x)
596
{
597
return saturate(x);
598
}
599
600
/// Clamp a value to a [0..1] range.
601
///
602
/// @param [in] x The value to clamp to [0..1] range.
603
///
604
/// @returns
605
/// The clamped version of <c><i>x</i></c>.
606
///
607
/// @ingroup HLSL
608
FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)
609
{
610
return saturate(x);
611
}
612
613
/// Clamp a value to a [0..1] range.
614
///
615
/// @param [in] x The value to clamp to [0..1] range.
616
///
617
/// @returns
618
/// The clamped version of <c><i>x</i></c>.
619
///
620
/// @ingroup HLSL
621
FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)
622
{
623
return saturate(x);
624
}
625
626
/// Clamp a value to a [0..1] range.
627
///
628
/// @param [in] x The value to clamp to [0..1] range.
629
///
630
/// @returns
631
/// The clamped version of <c><i>x</i></c>.
632
///
633
/// @ingroup HLSL
634
FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)
635
{
636
return saturate(x);
637
}
638
639
/// Compute the factional part of a decimal value.
640
///
641
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
642
///
643
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
644
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
645
/// function.
646
///
647
/// @param [in] x The value to compute the fractional part from.
648
///
649
/// @returns
650
/// The fractional part of <c><i>x</i></c>.
651
///
652
/// @ingroup HLSL
653
FfxFloat32 ffxFract(FfxFloat32 x)
654
{
655
return x - floor(x);
656
}
657
658
/// Compute the factional part of a decimal value.
659
///
660
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
661
///
662
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
663
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
664
/// function.
665
///
666
/// @param [in] x The value to compute the fractional part from.
667
///
668
/// @returns
669
/// The fractional part of <c><i>x</i></c>.
670
///
671
/// @ingroup HLSL
672
FfxFloat32x2 ffxFract(FfxFloat32x2 x)
673
{
674
return x - floor(x);
675
}
676
677
/// Compute the factional part of a decimal value.
678
///
679
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
680
///
681
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
682
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
683
/// function.
684
///
685
/// @param [in] x The value to compute the fractional part from.
686
///
687
/// @returns
688
/// The fractional part of <c><i>x</i></c>.
689
///
690
/// @ingroup HLSL
691
FfxFloat32x3 ffxFract(FfxFloat32x3 x)
692
{
693
return x - floor(x);
694
}
695
696
/// Compute the factional part of a decimal value.
697
///
698
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
699
///
700
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
701
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
702
/// function.
703
///
704
/// @param [in] x The value to compute the fractional part from.
705
///
706
/// @returns
707
/// The fractional part of <c><i>x</i></c>.
708
///
709
/// @ingroup HLSL
710
FfxFloat32x4 ffxFract(FfxFloat32x4 x)
711
{
712
return x - floor(x);
713
}
714
715
/// Compute the maximum of three values.
716
///
717
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
718
///
719
/// @param [in] x The first value to include in the max calculation.
720
/// @param [in] y The second value to include in the max calcuation.
721
/// @param [in] z The third value to include in the max calcuation.
722
///
723
/// @returns
724
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
725
///
726
/// @ingroup HLSL
727
FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
728
{
729
return max(x, max(y, z));
730
}
731
732
/// Compute the maximum of three values.
733
///
734
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
735
///
736
/// @param [in] x The first value to include in the max calculation.
737
/// @param [in] y The second value to include in the max calcuation.
738
/// @param [in] z The third value to include in the max calcuation.
739
///
740
/// @returns
741
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
742
///
743
/// @ingroup HLSL
744
FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
745
{
746
return max(x, max(y, z));
747
}
748
749
/// Compute the maximum of three values.
750
///
751
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
752
///
753
/// @param [in] x The first value to include in the max calculation.
754
/// @param [in] y The second value to include in the max calcuation.
755
/// @param [in] z The third value to include in the max calcuation.
756
///
757
/// @returns
758
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
759
///
760
/// @ingroup HLSL
761
FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
762
{
763
return max(x, max(y, z));
764
}
765
766
/// Compute the maximum of three values.
767
///
768
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
769
///
770
/// @param [in] x The first value to include in the max calculation.
771
/// @param [in] y The second value to include in the max calcuation.
772
/// @param [in] z The third value to include in the max calcuation.
773
///
774
/// @returns
775
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
776
///
777
/// @ingroup HLSL
778
FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
779
{
780
return max(x, max(y, z));
781
}
782
783
/// Compute the maximum of three values.
784
///
785
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
786
///
787
/// @param [in] x The first value to include in the max calculation.
788
/// @param [in] y The second value to include in the max calcuation.
789
/// @param [in] z The third value to include in the max calcuation.
790
///
791
/// @returns
792
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
793
///
794
/// @ingroup HLSL
795
FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
796
{
797
return max(x, max(y, z));
798
}
799
800
/// Compute the maximum of three values.
801
///
802
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
803
///
804
/// @param [in] x The first value to include in the max calculation.
805
/// @param [in] y The second value to include in the max calcuation.
806
/// @param [in] z The third value to include in the max calcuation.
807
///
808
/// @returns
809
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
810
///
811
/// @ingroup HLSL
812
FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
813
{
814
return max(x, max(y, z));
815
}
816
817
/// Compute the maximum of three values.
818
///
819
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
820
///
821
/// @param [in] x The first value to include in the max calculation.
822
/// @param [in] y The second value to include in the max calcuation.
823
/// @param [in] z The third value to include in the max calcuation.
824
///
825
/// @returns
826
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
827
///
828
/// @ingroup HLSL
829
FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
830
{
831
return max(x, max(y, z));
832
}
833
834
/// Compute the maximum of three values.
835
///
836
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
837
///
838
/// @param [in] x The first value to include in the max calculation.
839
/// @param [in] y The second value to include in the max calcuation.
840
/// @param [in] z The third value to include in the max calcuation.
841
///
842
/// @returns
843
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
844
///
845
/// @ingroup HLSL
846
FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
847
{
848
return max(x, max(y, z));
849
}
850
851
/// Compute the median of three values.
852
///
853
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
854
///
855
/// @param [in] x The first value to include in the median calculation.
856
/// @param [in] y The second value to include in the median calcuation.
857
/// @param [in] z The third value to include in the median calcuation.
858
///
859
/// @returns
860
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
861
///
862
/// @ingroup HLSL
863
FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
864
{
865
return max(min(x, y), min(max(x, y), z));
866
}
867
868
/// Compute the median of three values.
869
///
870
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
871
///
872
/// @param [in] x The first value to include in the median calculation.
873
/// @param [in] y The second value to include in the median calcuation.
874
/// @param [in] z The third value to include in the median calcuation.
875
///
876
/// @returns
877
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
878
///
879
/// @ingroup HLSL
880
FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
881
{
882
return max(min(x, y), min(max(x, y), z));
883
}
884
885
/// Compute the median of three values.
886
///
887
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
888
///
889
/// @param [in] x The first value to include in the median calculation.
890
/// @param [in] y The second value to include in the median calcuation.
891
/// @param [in] z The third value to include in the median calcuation.
892
///
893
/// @returns
894
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
895
///
896
/// @ingroup HLSL
897
FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
898
{
899
return max(min(x, y), min(max(x, y), z));
900
}
901
902
/// Compute the median of three values.
903
///
904
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
905
///
906
/// @param [in] x The first value to include in the median calculation.
907
/// @param [in] y The second value to include in the median calcuation.
908
/// @param [in] z The third value to include in the median calcuation.
909
///
910
/// @returns
911
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
912
///
913
/// @ingroup HLSL
914
FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
915
{
916
return max(min(x, y), min(max(x, y), z));
917
}
918
919
/// Compute the median of three values.
920
///
921
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
922
///
923
/// @param [in] x The first value to include in the median calculation.
924
/// @param [in] y The second value to include in the median calcuation.
925
/// @param [in] z The third value to include in the median calcuation.
926
///
927
/// @returns
928
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
929
///
930
/// @ingroup HLSL
931
FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z)
932
{
933
return max(min(x, y), min(max(x, y), z));
934
// return min(max(min(y, z), x), max(y, z));
935
// return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));
936
}
937
938
/// Compute the median of three values.
939
///
940
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
941
///
942
/// @param [in] x The first value to include in the median calculation.
943
/// @param [in] y The second value to include in the median calcuation.
944
/// @param [in] z The third value to include in the median calcuation.
945
///
946
/// @returns
947
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
948
///
949
/// @ingroup HLSL
950
FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z)
951
{
952
return max(min(x, y), min(max(x, y), z));
953
// return min(max(min(y, z), x), max(y, z));
954
// return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));
955
}
956
957
/// Compute the median of three values.
958
///
959
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
960
///
961
/// @param [in] x The first value to include in the median calculation.
962
/// @param [in] y The second value to include in the median calcuation.
963
/// @param [in] z The third value to include in the median calcuation.
964
///
965
/// @returns
966
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
967
///
968
/// @ingroup HLSL
969
FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z)
970
{
971
return max(min(x, y), min(max(x, y), z));
972
}
973
974
/// Compute the median of three values.
975
///
976
/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on GCN/RDNA hardware.
977
///
978
/// @param [in] x The first value to include in the median calculation.
979
/// @param [in] y The second value to include in the median calcuation.
980
/// @param [in] z The third value to include in the median calcuation.
981
///
982
/// @returns
983
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
984
///
985
/// @ingroup HLSL
986
FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)
987
{
988
return max(min(x, y), min(max(x, y), z));
989
}
990
991
/// Compute the minimum of three values.
992
///
993
/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</i></c> operation on GCN/RDNA hardware.
994
///
995
/// @param [in] x The first value to include in the min calculation.
996
/// @param [in] y The second value to include in the min calcuation.
997
/// @param [in] z The third value to include in the min calcuation.
998
///
999
/// @returns
1000
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1001
///
1002
/// @ingroup HLSL
1003
FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
1004
{
1005
return min(x, min(y, z));
1006
}
1007
1008
/// Compute the minimum of three values.
1009
///
1010
/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</i></c> operation on GCN/RDNA hardware.
1011
///
1012
/// @param [in] x The first value to include in the min calculation.
1013
/// @param [in] y The second value to include in the min calcuation.
1014
/// @param [in] z The third value to include in the min calcuation.
1015
///
1016
/// @returns
1017
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1018
///
1019
/// @ingroup HLSL
1020
FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
1021
{
1022
return min(x, min(y, z));
1023
}
1024
1025
/// Compute the minimum of three values.
1026
///
1027
/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</c></i> operation on GCN/RDNA hardware.
1028
///
1029
/// @param [in] x The first value to include in the min calculation.
1030
/// @param [in] y The second value to include in the min calcuation.
1031
/// @param [in] z The third value to include in the min calcuation.
1032
///
1033
/// @returns
1034
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1035
///
1036
/// @ingroup HLSL
1037
FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
1038
{
1039
return min(x, min(y, z));
1040
}
1041
1042
/// Compute the minimum of three values.
1043
///
1044
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
1045
///
1046
/// @param [in] x The first value to include in the min calculation.
1047
/// @param [in] y The second value to include in the min calcuation.
1048
/// @param [in] z The third value to include in the min calcuation.
1049
///
1050
/// @returns
1051
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1052
///
1053
/// @ingroup HLSL
1054
FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
1055
{
1056
return min(x, min(y, z));
1057
}
1058
1059
/// Compute the minimum of three values.
1060
///
1061
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
1062
///
1063
/// @param [in] x The first value to include in the min calculation.
1064
/// @param [in] y The second value to include in the min calcuation.
1065
/// @param [in] z The third value to include in the min calcuation.
1066
///
1067
/// @returns
1068
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1069
///
1070
/// @ingroup HLSL
1071
FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
1072
{
1073
return min(x, min(y, z));
1074
}
1075
1076
/// Compute the minimum of three values.
1077
///
1078
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
1079
///
1080
/// @param [in] x The first value to include in the min calculation.
1081
/// @param [in] y The second value to include in the min calcuation.
1082
/// @param [in] z The third value to include in the min calcuation.
1083
///
1084
/// @returns
1085
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1086
///
1087
/// @ingroup HLSL
1088
FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
1089
{
1090
return min(x, min(y, z));
1091
}
1092
1093
/// Compute the minimum of three values.
1094
///
1095
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
1096
///
1097
/// @param [in] x The first value to include in the min calculation.
1098
/// @param [in] y The second value to include in the min calcuation.
1099
/// @param [in] z The third value to include in the min calcuation.
1100
///
1101
/// @returns
1102
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1103
///
1104
/// @ingroup HLSL
1105
FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
1106
{
1107
return min(x, min(y, z));
1108
}
1109
1110
/// Compute the minimum of three values.
1111
///
1112
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
1113
///
1114
/// @param [in] x The first value to include in the min calculation.
1115
/// @param [in] y The second value to include in the min calcuation.
1116
/// @param [in] z The third value to include in the min calcuation.
1117
///
1118
/// @returns
1119
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
1120
///
1121
/// @ingroup HLSL
1122
FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
1123
{
1124
return min(x, min(y, z));
1125
}
1126
1127
1128
FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
1129
{
1130
return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
1131
}
1132
1133
//==============================================================================================================================
1134
// HLSL HALF
1135
//==============================================================================================================================
1136
#if FFX_HALF
1137
1138
//==============================================================================================================================
1139
// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly).
1140
// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/
1141
FFX_MIN16_F2 ffxUint32ToFloat16x2(FfxUInt32 x)
1142
{
1143
FfxFloat32x2 t = f16tof32(FfxUInt32x2(x & 0xFFFF, x >> 16));
1144
return FFX_MIN16_F2(t);
1145
}
1146
FFX_MIN16_F4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x)
1147
{
1148
return FFX_MIN16_F4(ffxUint32ToFloat16x2(x.x), ffxUint32ToFloat16x2(x.y));
1149
}
1150
FFX_MIN16_U2 ffxUint32ToUint16x2(FfxUInt32 x)
1151
{
1152
FfxUInt32x2 t = FfxUInt32x2(x & 0xFFFF, x >> 16);
1153
return FFX_MIN16_U2(t);
1154
}
1155
FFX_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x)
1156
{
1157
return FFX_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y));
1158
}
1159
#define FFX_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x))
1160
#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x))
1161
#define FFX_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x))
1162
#define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x))
1163
//------------------------------------------------------------------------------------------------------------------------------
1164
FfxUInt32 FFX_MIN16_F2ToUint32(FFX_MIN16_F2 x)
1165
{
1166
return f32tof16(x.x) + (f32tof16(x.y) << 16);
1167
}
1168
FfxUInt32x2 FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4 x)
1169
{
1170
return FfxUInt32x2(FFX_MIN16_F2ToUint32(x.xy), FFX_MIN16_F2ToUint32(x.zw));
1171
}
1172
FfxUInt32 FFX_MIN16_U2ToUint32(FFX_MIN16_U2 x)
1173
{
1174
return FfxUInt32(x.x) + (FfxUInt32(x.y) << 16);
1175
}
1176
FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x)
1177
{
1178
return FfxUInt32x2(FFX_MIN16_U2ToUint32(x.xy), FFX_MIN16_U2ToUint32(x.zw));
1179
}
1180
#define FFX_FLOAT16X2_TO_UINT32(x) FFX_MIN16_F2ToUint32(FFX_MIN16_F2(x))
1181
#define FFX_FLOAT16X4_TO_UINT32X2(x) FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4(x))
1182
#define FFX_UINT16X2_TO_UINT32(x) FFX_MIN16_U2ToUint32(FFX_MIN16_U2(x))
1183
#define FFX_UINT16X4_TO_UINT32X2(x) FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4(x))
1184
1185
#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
1186
#define FFX_TO_UINT16(x) asuint16(x)
1187
#define FFX_TO_UINT16X2(x) asuint16(x)
1188
#define FFX_TO_UINT16X3(x) asuint16(x)
1189
#define FFX_TO_UINT16X4(x) asuint16(x)
1190
#else
1191
#define FFX_TO_UINT16(a) FFX_MIN16_U(f32tof16(FfxFloat32(a)))
1192
#define FFX_TO_UINT16X2(a) FFX_MIN16_U2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y))
1193
#define FFX_TO_UINT16X3(a) FFX_MIN16_U3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z))
1194
#define FFX_TO_UINT16X4(a) FFX_MIN16_U4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w))
1195
#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
1196
1197
#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
1198
#define FFX_TO_FLOAT16(x) asfloat16(x)
1199
#define FFX_TO_FLOAT16X2(x) asfloat16(x)
1200
#define FFX_TO_FLOAT16X3(x) asfloat16(x)
1201
#define FFX_TO_FLOAT16X4(x) asfloat16(x)
1202
#else
1203
#define FFX_TO_FLOAT16(a) FFX_MIN16_F(f16tof32(FfxUInt32(a)))
1204
#define FFX_TO_FLOAT16X2(a) FFX_MIN16_F2(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y))
1205
#define FFX_TO_FLOAT16X3(a) FFX_MIN16_F3(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z))
1206
#define FFX_TO_FLOAT16X4(a) FFX_MIN16_F4(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z), FFX_TO_FLOAT16((a).w))
1207
#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
1208
1209
//==============================================================================================================================
1210
#define FFX_BROADCAST_FLOAT16(a) FFX_MIN16_F(a)
1211
#define FFX_BROADCAST_FLOAT16X2(a) FFX_MIN16_F(a)
1212
#define FFX_BROADCAST_FLOAT16X3(a) FFX_MIN16_F(a)
1213
#define FFX_BROADCAST_FLOAT16X4(a) FFX_MIN16_F(a)
1214
1215
//------------------------------------------------------------------------------------------------------------------------------
1216
#define FFX_BROADCAST_INT16(a) FFX_MIN16_I(a)
1217
#define FFX_BROADCAST_INT16X2(a) FFX_MIN16_I(a)
1218
#define FFX_BROADCAST_INT16X3(a) FFX_MIN16_I(a)
1219
#define FFX_BROADCAST_INT16X4(a) FFX_MIN16_I(a)
1220
1221
//------------------------------------------------------------------------------------------------------------------------------
1222
#define FFX_BROADCAST_UINT16(a) FFX_MIN16_U(a)
1223
#define FFX_BROADCAST_UINT16X2(a) FFX_MIN16_U(a)
1224
#define FFX_BROADCAST_UINT16X3(a) FFX_MIN16_U(a)
1225
#define FFX_BROADCAST_UINT16X4(a) FFX_MIN16_U(a)
1226
1227
//==============================================================================================================================
1228
FFX_MIN16_U ffxAbsHalf(FFX_MIN16_U a)
1229
{
1230
return FFX_MIN16_U(abs(FFX_MIN16_I(a)));
1231
}
1232
FFX_MIN16_U2 ffxAbsHalf(FFX_MIN16_U2 a)
1233
{
1234
return FFX_MIN16_U2(abs(FFX_MIN16_I2(a)));
1235
}
1236
FFX_MIN16_U3 ffxAbsHalf(FFX_MIN16_U3 a)
1237
{
1238
return FFX_MIN16_U3(abs(FFX_MIN16_I3(a)));
1239
}
1240
FFX_MIN16_U4 ffxAbsHalf(FFX_MIN16_U4 a)
1241
{
1242
return FFX_MIN16_U4(abs(FFX_MIN16_I4(a)));
1243
}
1244
//------------------------------------------------------------------------------------------------------------------------------
1245
FFX_MIN16_F ffxClampHalf(FFX_MIN16_F x, FFX_MIN16_F n, FFX_MIN16_F m)
1246
{
1247
return max(n, min(x, m));
1248
}
1249
FFX_MIN16_F2 ffxClampHalf(FFX_MIN16_F2 x, FFX_MIN16_F2 n, FFX_MIN16_F2 m)
1250
{
1251
return max(n, min(x, m));
1252
}
1253
FFX_MIN16_F3 ffxClampHalf(FFX_MIN16_F3 x, FFX_MIN16_F3 n, FFX_MIN16_F3 m)
1254
{
1255
return max(n, min(x, m));
1256
}
1257
FFX_MIN16_F4 ffxClampHalf(FFX_MIN16_F4 x, FFX_MIN16_F4 n, FFX_MIN16_F4 m)
1258
{
1259
return max(n, min(x, m));
1260
}
1261
//------------------------------------------------------------------------------------------------------------------------------
1262
// V_FRACT_F16 (note DX frac() is different).
1263
FFX_MIN16_F ffxFract(FFX_MIN16_F x)
1264
{
1265
return x - floor(x);
1266
}
1267
FFX_MIN16_F2 ffxFract(FFX_MIN16_F2 x)
1268
{
1269
return x - floor(x);
1270
}
1271
FFX_MIN16_F3 ffxFract(FFX_MIN16_F3 x)
1272
{
1273
return x - floor(x);
1274
}
1275
FFX_MIN16_F4 ffxFract(FFX_MIN16_F4 x)
1276
{
1277
return x - floor(x);
1278
}
1279
//------------------------------------------------------------------------------------------------------------------------------
1280
FFX_MIN16_F ffxLerp(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F a)
1281
{
1282
return lerp(x, y, a);
1283
}
1284
FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F a)
1285
{
1286
return lerp(x, y, a);
1287
}
1288
FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 a)
1289
{
1290
return lerp(x, y, a);
1291
}
1292
FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F a)
1293
{
1294
return lerp(x, y, a);
1295
}
1296
FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 a)
1297
{
1298
return lerp(x, y, a);
1299
}
1300
FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F a)
1301
{
1302
return lerp(x, y, a);
1303
}
1304
FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 a)
1305
{
1306
return lerp(x, y, a);
1307
}
1308
//------------------------------------------------------------------------------------------------------------------------------
1309
FFX_MIN16_F ffxMax3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
1310
{
1311
return max(x, max(y, z));
1312
}
1313
FFX_MIN16_F2 ffxMax3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
1314
{
1315
return max(x, max(y, z));
1316
}
1317
FFX_MIN16_F3 ffxMax3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
1318
{
1319
return max(x, max(y, z));
1320
}
1321
FFX_MIN16_F4 ffxMax3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
1322
{
1323
return max(x, max(y, z));
1324
}
1325
//------------------------------------------------------------------------------------------------------------------------------
1326
FFX_MIN16_F ffxMin3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
1327
{
1328
return min(x, min(y, z));
1329
}
1330
FFX_MIN16_F2 ffxMin3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
1331
{
1332
return min(x, min(y, z));
1333
}
1334
FFX_MIN16_F3 ffxMin3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
1335
{
1336
return min(x, min(y, z));
1337
}
1338
FFX_MIN16_F4 ffxMin3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
1339
{
1340
return min(x, min(y, z));
1341
}
1342
//------------------------------------------------------------------------------------------------------------------------------
1343
FFX_MIN16_F ffxMed3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
1344
{
1345
return max(min(x, y), min(max(x, y), z));
1346
}
1347
FFX_MIN16_F2 ffxMed3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
1348
{
1349
return max(min(x, y), min(max(x, y), z));
1350
}
1351
FFX_MIN16_F3 ffxMed3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
1352
{
1353
return max(min(x, y), min(max(x, y), z));
1354
}
1355
FFX_MIN16_F4 ffxMed3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
1356
{
1357
return max(min(x, y), min(max(x, y), z));
1358
}
1359
//------------------------------------------------------------------------------------------------------------------------------
1360
FFX_MIN16_I ffxMed3Half(FFX_MIN16_I x, FFX_MIN16_I y, FFX_MIN16_I z)
1361
{
1362
return max(min(x, y), min(max(x, y), z));
1363
}
1364
FFX_MIN16_I2 ffxMed3Half(FFX_MIN16_I2 x, FFX_MIN16_I2 y, FFX_MIN16_I2 z)
1365
{
1366
return max(min(x, y), min(max(x, y), z));
1367
}
1368
FFX_MIN16_I3 ffxMed3Half(FFX_MIN16_I3 x, FFX_MIN16_I3 y, FFX_MIN16_I3 z)
1369
{
1370
return max(min(x, y), min(max(x, y), z));
1371
}
1372
FFX_MIN16_I4 ffxMed3Half(FFX_MIN16_I4 x, FFX_MIN16_I4 y, FFX_MIN16_I4 z)
1373
{
1374
return max(min(x, y), min(max(x, y), z));
1375
}
1376
//------------------------------------------------------------------------------------------------------------------------------
1377
FFX_MIN16_F ffxReciprocalHalf(FFX_MIN16_F x)
1378
{
1379
return rcp(x);
1380
}
1381
FFX_MIN16_F2 ffxReciprocalHalf(FFX_MIN16_F2 x)
1382
{
1383
return rcp(x);
1384
}
1385
FFX_MIN16_F3 ffxReciprocalHalf(FFX_MIN16_F3 x)
1386
{
1387
return rcp(x);
1388
}
1389
FFX_MIN16_F4 ffxReciprocalHalf(FFX_MIN16_F4 x)
1390
{
1391
return rcp(x);
1392
}
1393
//------------------------------------------------------------------------------------------------------------------------------
1394
FFX_MIN16_F ffxReciprocalSquareRootHalf(FFX_MIN16_F x)
1395
{
1396
return rsqrt(x);
1397
}
1398
FFX_MIN16_F2 ffxReciprocalSquareRootHalf(FFX_MIN16_F2 x)
1399
{
1400
return rsqrt(x);
1401
}
1402
FFX_MIN16_F3 ffxReciprocalSquareRootHalf(FFX_MIN16_F3 x)
1403
{
1404
return rsqrt(x);
1405
}
1406
FFX_MIN16_F4 ffxReciprocalSquareRootHalf(FFX_MIN16_F4 x)
1407
{
1408
return rsqrt(x);
1409
}
1410
//------------------------------------------------------------------------------------------------------------------------------
1411
FFX_MIN16_F ffxSaturate(FFX_MIN16_F x)
1412
{
1413
return saturate(x);
1414
}
1415
FFX_MIN16_F2 ffxSaturate(FFX_MIN16_F2 x)
1416
{
1417
return saturate(x);
1418
}
1419
FFX_MIN16_F3 ffxSaturate(FFX_MIN16_F3 x)
1420
{
1421
return saturate(x);
1422
}
1423
FFX_MIN16_F4 ffxSaturate(FFX_MIN16_F4 x)
1424
{
1425
return saturate(x);
1426
}
1427
//------------------------------------------------------------------------------------------------------------------------------
1428
FFX_MIN16_U ffxBitShiftRightHalf(FFX_MIN16_U a, FFX_MIN16_U b)
1429
{
1430
return FFX_MIN16_U(FFX_MIN16_I(a) >> FFX_MIN16_I(b));
1431
}
1432
FFX_MIN16_U2 ffxBitShiftRightHalf(FFX_MIN16_U2 a, FFX_MIN16_U2 b)
1433
{
1434
return FFX_MIN16_U2(FFX_MIN16_I2(a) >> FFX_MIN16_I2(b));
1435
}
1436
FFX_MIN16_U3 ffxBitShiftRightHalf(FFX_MIN16_U3 a, FFX_MIN16_U3 b)
1437
{
1438
return FFX_MIN16_U3(FFX_MIN16_I3(a) >> FFX_MIN16_I3(b));
1439
}
1440
FFX_MIN16_U4 ffxBitShiftRightHalf(FFX_MIN16_U4 a, FFX_MIN16_U4 b)
1441
{
1442
return FFX_MIN16_U4(FFX_MIN16_I4(a) >> FFX_MIN16_I4(b));
1443
}
1444
#endif // FFX_HALF
1445
1446
//==============================================================================================================================
1447
// HLSL WAVE
1448
//==============================================================================================================================
1449
#if defined(FFX_WAVE)
1450
// Where 'x' must be a compile time literal.
1451
FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x)
1452
{
1453
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1454
}
1455
FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)
1456
{
1457
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1458
}
1459
FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)
1460
{
1461
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1462
}
1463
FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)
1464
{
1465
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1466
}
1467
FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x)
1468
{
1469
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1470
}
1471
FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x)
1472
{
1473
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1474
}
1475
FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x)
1476
{
1477
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1478
}
1479
FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x)
1480
{
1481
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
1482
}
1483
1484
#if FFX_HALF
1485
FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x)
1486
{
1487
return FFX_UINT32_TO_FLOAT16X2(WaveReadLaneAt(FFX_FLOAT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));
1488
}
1489
FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x)
1490
{
1491
return FFX_UINT32X2_TO_FLOAT16X4(WaveReadLaneAt(FFX_FLOAT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x));
1492
}
1493
FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x)
1494
{
1495
return FFX_UINT32_TO_UINT16X2(WaveReadLaneAt(FFX_UINT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));
1496
}
1497
FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x)
1498
{
1499
return AW4_FFX_UINT32(WaveReadLaneAt(FFX_UINT32_AW4(v), WaveGetLaneIndex() ^ x));
1500
}
1501
#endif // FFX_HALF
1502
#endif // #if defined(FFX_WAVE)
1503
1504