Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
4574 views
1
/****************************************************************************
2
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*
23
* @file blend_jit.cpp
24
*
25
* @brief Implementation of the blend jitter
26
*
27
* Notes:
28
*
29
******************************************************************************/
30
#include "jit_pch.hpp"
31
#include "builder.h"
32
#include "jit_api.h"
33
#include "blend_jit.h"
34
#include "gen_state_llvm.h"
35
#include "functionpasses/passes.h"
36
37
#include "util/compiler.h"
38
39
// components with bit-widths <= the QUANTIZE_THRESHOLD will be quantized
40
#define QUANTIZE_THRESHOLD 2
41
42
using namespace llvm;
43
using namespace SwrJit;
44
45
//////////////////////////////////////////////////////////////////////////
46
/// Interface to Jitting a blend shader
47
//////////////////////////////////////////////////////////////////////////
48
struct BlendJit : public Builder
49
{
50
BlendJit(JitManager* pJitMgr) : Builder(pJitMgr){};
51
52
template <bool Color, bool Alpha>
53
void GenerateBlendFactor(SWR_BLEND_FACTOR factor,
54
Value* constColor[4],
55
Value* src[4],
56
Value* src1[4],
57
Value* dst[4],
58
Value* result[4])
59
{
60
Value* out[4];
61
62
switch (factor)
63
{
64
case BLENDFACTOR_ONE:
65
out[0] = out[1] = out[2] = out[3] = VIMMED1(1.0f);
66
break;
67
case BLENDFACTOR_SRC_COLOR:
68
out[0] = src[0];
69
out[1] = src[1];
70
out[2] = src[2];
71
out[3] = src[3];
72
break;
73
case BLENDFACTOR_SRC_ALPHA:
74
out[0] = out[1] = out[2] = out[3] = src[3];
75
break;
76
case BLENDFACTOR_DST_ALPHA:
77
out[0] = out[1] = out[2] = out[3] = dst[3];
78
break;
79
case BLENDFACTOR_DST_COLOR:
80
out[0] = dst[0];
81
out[1] = dst[1];
82
out[2] = dst[2];
83
out[3] = dst[3];
84
break;
85
case BLENDFACTOR_SRC_ALPHA_SATURATE:
86
out[0] = out[1] = out[2] = VMINPS(src[3], FSUB(VIMMED1(1.0f), dst[3]));
87
out[3] = VIMMED1(1.0f);
88
break;
89
case BLENDFACTOR_CONST_COLOR:
90
out[0] = constColor[0];
91
out[1] = constColor[1];
92
out[2] = constColor[2];
93
out[3] = constColor[3];
94
break;
95
case BLENDFACTOR_CONST_ALPHA:
96
out[0] = out[1] = out[2] = out[3] = constColor[3];
97
break;
98
case BLENDFACTOR_SRC1_COLOR:
99
out[0] = src1[0];
100
out[1] = src1[1];
101
out[2] = src1[2];
102
out[3] = src1[3];
103
break;
104
case BLENDFACTOR_SRC1_ALPHA:
105
out[0] = out[1] = out[2] = out[3] = src1[3];
106
break;
107
case BLENDFACTOR_ZERO:
108
out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
109
break;
110
case BLENDFACTOR_INV_SRC_COLOR:
111
out[0] = FSUB(VIMMED1(1.0f), src[0]);
112
out[1] = FSUB(VIMMED1(1.0f), src[1]);
113
out[2] = FSUB(VIMMED1(1.0f), src[2]);
114
out[3] = FSUB(VIMMED1(1.0f), src[3]);
115
break;
116
case BLENDFACTOR_INV_SRC_ALPHA:
117
out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src[3]);
118
break;
119
case BLENDFACTOR_INV_DST_ALPHA:
120
out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), dst[3]);
121
break;
122
case BLENDFACTOR_INV_DST_COLOR:
123
out[0] = FSUB(VIMMED1(1.0f), dst[0]);
124
out[1] = FSUB(VIMMED1(1.0f), dst[1]);
125
out[2] = FSUB(VIMMED1(1.0f), dst[2]);
126
out[3] = FSUB(VIMMED1(1.0f), dst[3]);
127
break;
128
case BLENDFACTOR_INV_CONST_COLOR:
129
out[0] = FSUB(VIMMED1(1.0f), constColor[0]);
130
out[1] = FSUB(VIMMED1(1.0f), constColor[1]);
131
out[2] = FSUB(VIMMED1(1.0f), constColor[2]);
132
out[3] = FSUB(VIMMED1(1.0f), constColor[3]);
133
break;
134
case BLENDFACTOR_INV_CONST_ALPHA:
135
out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), constColor[3]);
136
break;
137
case BLENDFACTOR_INV_SRC1_COLOR:
138
out[0] = FSUB(VIMMED1(1.0f), src1[0]);
139
out[1] = FSUB(VIMMED1(1.0f), src1[1]);
140
out[2] = FSUB(VIMMED1(1.0f), src1[2]);
141
out[3] = FSUB(VIMMED1(1.0f), src1[3]);
142
break;
143
case BLENDFACTOR_INV_SRC1_ALPHA:
144
out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src1[3]);
145
break;
146
default:
147
SWR_INVALID("Unsupported blend factor: %d", factor);
148
out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
149
break;
150
}
151
152
if (Color)
153
{
154
result[0] = out[0];
155
result[1] = out[1];
156
result[2] = out[2];
157
}
158
159
if (Alpha)
160
{
161
result[3] = out[3];
162
}
163
}
164
165
void Clamp(SWR_FORMAT format, Value* src[4])
166
{
167
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
168
SWR_TYPE type = info.type[0];
169
170
switch (type)
171
{
172
default:
173
break;
174
175
case SWR_TYPE_UNORM:
176
src[0] = VMINPS(VMAXPS(src[0], VIMMED1(0.0f)), VIMMED1(1.0f));
177
src[1] = VMINPS(VMAXPS(src[1], VIMMED1(0.0f)), VIMMED1(1.0f));
178
src[2] = VMINPS(VMAXPS(src[2], VIMMED1(0.0f)), VIMMED1(1.0f));
179
src[3] = VMINPS(VMAXPS(src[3], VIMMED1(0.0f)), VIMMED1(1.0f));
180
break;
181
182
case SWR_TYPE_SNORM:
183
src[0] = VMINPS(VMAXPS(src[0], VIMMED1(-1.0f)), VIMMED1(1.0f));
184
src[1] = VMINPS(VMAXPS(src[1], VIMMED1(-1.0f)), VIMMED1(1.0f));
185
src[2] = VMINPS(VMAXPS(src[2], VIMMED1(-1.0f)), VIMMED1(1.0f));
186
src[3] = VMINPS(VMAXPS(src[3], VIMMED1(-1.0f)), VIMMED1(1.0f));
187
break;
188
189
case SWR_TYPE_UNKNOWN:
190
SWR_INVALID("Unsupported format type: %d", type);
191
}
192
}
193
194
void ApplyDefaults(SWR_FORMAT format, Value* src[4])
195
{
196
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
197
198
bool valid[] = {false, false, false, false};
199
for (uint32_t c = 0; c < info.numComps; ++c)
200
{
201
valid[info.swizzle[c]] = true;
202
}
203
204
for (uint32_t c = 0; c < 4; ++c)
205
{
206
if (!valid[c])
207
{
208
src[c] = BITCAST(VIMMED1((int)info.defaults[c]), mSimdFP32Ty);
209
}
210
}
211
}
212
213
void ApplyUnusedDefaults(SWR_FORMAT format, Value* src[4])
214
{
215
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
216
217
for (uint32_t c = 0; c < info.numComps; ++c)
218
{
219
if (info.type[c] == SWR_TYPE_UNUSED)
220
{
221
src[info.swizzle[c]] =
222
BITCAST(VIMMED1((int)info.defaults[info.swizzle[c]]), mSimdFP32Ty);
223
}
224
}
225
}
226
227
void Quantize(SWR_FORMAT format, Value* src[4])
228
{
229
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
230
for (uint32_t c = 0; c < info.numComps; ++c)
231
{
232
if (info.bpc[c] <= QUANTIZE_THRESHOLD && info.type[c] != SWR_TYPE_UNUSED)
233
{
234
uint32_t swizComp = info.swizzle[c];
235
float factor = (float)((1 << info.bpc[c]) - 1);
236
switch (info.type[c])
237
{
238
case SWR_TYPE_UNORM:
239
src[swizComp] = FADD(FMUL(src[swizComp], VIMMED1(factor)), VIMMED1(0.5f));
240
src[swizComp] = VROUND(src[swizComp], C(_MM_FROUND_TO_ZERO));
241
src[swizComp] = FMUL(src[swizComp], VIMMED1(1.0f / factor));
242
break;
243
default:
244
SWR_INVALID("Unsupported format type: %d", info.type[c]);
245
}
246
}
247
}
248
}
249
250
template <bool Color, bool Alpha>
251
void BlendFunc(SWR_BLEND_OP blendOp,
252
Value* src[4],
253
Value* srcFactor[4],
254
Value* dst[4],
255
Value* dstFactor[4],
256
Value* result[4])
257
{
258
Value* out[4];
259
Value* srcBlend[4];
260
Value* dstBlend[4];
261
for (uint32_t i = 0; i < 4; ++i)
262
{
263
srcBlend[i] = FMUL(src[i], srcFactor[i]);
264
dstBlend[i] = FMUL(dst[i], dstFactor[i]);
265
}
266
267
switch (blendOp)
268
{
269
case BLENDOP_ADD:
270
out[0] = FADD(srcBlend[0], dstBlend[0]);
271
out[1] = FADD(srcBlend[1], dstBlend[1]);
272
out[2] = FADD(srcBlend[2], dstBlend[2]);
273
out[3] = FADD(srcBlend[3], dstBlend[3]);
274
break;
275
276
case BLENDOP_SUBTRACT:
277
out[0] = FSUB(srcBlend[0], dstBlend[0]);
278
out[1] = FSUB(srcBlend[1], dstBlend[1]);
279
out[2] = FSUB(srcBlend[2], dstBlend[2]);
280
out[3] = FSUB(srcBlend[3], dstBlend[3]);
281
break;
282
283
case BLENDOP_REVSUBTRACT:
284
out[0] = FSUB(dstBlend[0], srcBlend[0]);
285
out[1] = FSUB(dstBlend[1], srcBlend[1]);
286
out[2] = FSUB(dstBlend[2], srcBlend[2]);
287
out[3] = FSUB(dstBlend[3], srcBlend[3]);
288
break;
289
290
case BLENDOP_MIN:
291
out[0] = VMINPS(src[0], dst[0]);
292
out[1] = VMINPS(src[1], dst[1]);
293
out[2] = VMINPS(src[2], dst[2]);
294
out[3] = VMINPS(src[3], dst[3]);
295
break;
296
297
case BLENDOP_MAX:
298
out[0] = VMAXPS(src[0], dst[0]);
299
out[1] = VMAXPS(src[1], dst[1]);
300
out[2] = VMAXPS(src[2], dst[2]);
301
out[3] = VMAXPS(src[3], dst[3]);
302
break;
303
304
default:
305
SWR_INVALID("Unsupported blend operation: %d", blendOp);
306
out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
307
break;
308
}
309
310
if (Color)
311
{
312
result[0] = out[0];
313
result[1] = out[1];
314
result[2] = out[2];
315
}
316
317
if (Alpha)
318
{
319
result[3] = out[3];
320
}
321
}
322
323
void LogicOpFunc(SWR_LOGIC_OP logicOp, Value* src[4], Value* dst[4], Value* result[4])
324
{
325
// Op: (s == PS output, d = RT contents)
326
switch (logicOp)
327
{
328
case LOGICOP_CLEAR:
329
result[0] = VIMMED1(0);
330
result[1] = VIMMED1(0);
331
result[2] = VIMMED1(0);
332
result[3] = VIMMED1(0);
333
break;
334
335
case LOGICOP_NOR:
336
// ~(s | d)
337
result[0] = XOR(OR(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
338
result[1] = XOR(OR(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
339
result[2] = XOR(OR(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
340
result[3] = XOR(OR(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
341
break;
342
343
case LOGICOP_AND_INVERTED:
344
// ~s & d
345
// todo: use avx andnot instr when I can find the intrinsic to call
346
result[0] = AND(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]);
347
result[1] = AND(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]);
348
result[2] = AND(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]);
349
result[3] = AND(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]);
350
break;
351
352
case LOGICOP_COPY_INVERTED:
353
// ~s
354
result[0] = XOR(src[0], VIMMED1(0xFFFFFFFF));
355
result[1] = XOR(src[1], VIMMED1(0xFFFFFFFF));
356
result[2] = XOR(src[2], VIMMED1(0xFFFFFFFF));
357
result[3] = XOR(src[3], VIMMED1(0xFFFFFFFF));
358
break;
359
360
case LOGICOP_AND_REVERSE:
361
// s & ~d
362
// todo: use avx andnot instr when I can find the intrinsic to call
363
result[0] = AND(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]);
364
result[1] = AND(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]);
365
result[2] = AND(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]);
366
result[3] = AND(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]);
367
break;
368
369
case LOGICOP_INVERT:
370
// ~d
371
result[0] = XOR(dst[0], VIMMED1(0xFFFFFFFF));
372
result[1] = XOR(dst[1], VIMMED1(0xFFFFFFFF));
373
result[2] = XOR(dst[2], VIMMED1(0xFFFFFFFF));
374
result[3] = XOR(dst[3], VIMMED1(0xFFFFFFFF));
375
break;
376
377
case LOGICOP_XOR:
378
// s ^ d
379
result[0] = XOR(src[0], dst[0]);
380
result[1] = XOR(src[1], dst[1]);
381
result[2] = XOR(src[2], dst[2]);
382
result[3] = XOR(src[3], dst[3]);
383
break;
384
385
case LOGICOP_NAND:
386
// ~(s & d)
387
result[0] = XOR(AND(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
388
result[1] = XOR(AND(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
389
result[2] = XOR(AND(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
390
result[3] = XOR(AND(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
391
break;
392
393
case LOGICOP_AND:
394
// s & d
395
result[0] = AND(src[0], dst[0]);
396
result[1] = AND(src[1], dst[1]);
397
result[2] = AND(src[2], dst[2]);
398
result[3] = AND(src[3], dst[3]);
399
break;
400
401
case LOGICOP_EQUIV:
402
// ~(s ^ d)
403
result[0] = XOR(XOR(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
404
result[1] = XOR(XOR(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
405
result[2] = XOR(XOR(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
406
result[3] = XOR(XOR(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
407
break;
408
409
case LOGICOP_NOOP:
410
result[0] = dst[0];
411
result[1] = dst[1];
412
result[2] = dst[2];
413
result[3] = dst[3];
414
break;
415
416
case LOGICOP_OR_INVERTED:
417
// ~s | d
418
result[0] = OR(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]);
419
result[1] = OR(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]);
420
result[2] = OR(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]);
421
result[3] = OR(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]);
422
break;
423
424
case LOGICOP_COPY:
425
result[0] = src[0];
426
result[1] = src[1];
427
result[2] = src[2];
428
result[3] = src[3];
429
break;
430
431
case LOGICOP_OR_REVERSE:
432
// s | ~d
433
result[0] = OR(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]);
434
result[1] = OR(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]);
435
result[2] = OR(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]);
436
result[3] = OR(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]);
437
break;
438
439
case LOGICOP_OR:
440
// s | d
441
result[0] = OR(src[0], dst[0]);
442
result[1] = OR(src[1], dst[1]);
443
result[2] = OR(src[2], dst[2]);
444
result[3] = OR(src[3], dst[3]);
445
break;
446
447
case LOGICOP_SET:
448
result[0] = VIMMED1(0xFFFFFFFF);
449
result[1] = VIMMED1(0xFFFFFFFF);
450
result[2] = VIMMED1(0xFFFFFFFF);
451
result[3] = VIMMED1(0xFFFFFFFF);
452
break;
453
454
default:
455
SWR_INVALID("Unsupported logic operation: %d", logicOp);
456
result[0] = result[1] = result[2] = result[3] = VIMMED1(0.0f);
457
break;
458
}
459
}
460
461
void
462
AlphaTest(const BLEND_COMPILE_STATE& state, Value* pBlendState, Value* ppAlpha, Value* ppMask)
463
{
464
// load uint32_t reference
465
Value* pRef = VBROADCAST(LOAD(pBlendState, {0, SWR_BLEND_STATE_alphaTestReference}));
466
467
// load alpha
468
Value* pAlpha = LOAD(ppAlpha, {0, 0});
469
470
Value* pTest = nullptr;
471
if (state.alphaTestFormat == ALPHA_TEST_UNORM8)
472
{
473
// convert float alpha to unorm8
474
Value* pAlphaU8 = FMUL(pAlpha, VIMMED1(256.0f));
475
pAlphaU8 = FP_TO_UI(pAlphaU8, mSimdInt32Ty);
476
477
// compare
478
switch (state.alphaTestFunction)
479
{
480
case ZFUNC_ALWAYS:
481
pTest = VIMMED1(true);
482
break;
483
case ZFUNC_NEVER:
484
pTest = VIMMED1(false);
485
break;
486
case ZFUNC_LT:
487
pTest = ICMP_ULT(pAlphaU8, pRef);
488
break;
489
case ZFUNC_EQ:
490
pTest = ICMP_EQ(pAlphaU8, pRef);
491
break;
492
case ZFUNC_LE:
493
pTest = ICMP_ULE(pAlphaU8, pRef);
494
break;
495
case ZFUNC_GT:
496
pTest = ICMP_UGT(pAlphaU8, pRef);
497
break;
498
case ZFUNC_NE:
499
pTest = ICMP_NE(pAlphaU8, pRef);
500
break;
501
case ZFUNC_GE:
502
pTest = ICMP_UGE(pAlphaU8, pRef);
503
break;
504
default:
505
SWR_INVALID("Invalid alpha test function");
506
break;
507
}
508
}
509
else
510
{
511
// cast ref to float
512
pRef = BITCAST(pRef, mSimdFP32Ty);
513
514
// compare
515
switch (state.alphaTestFunction)
516
{
517
case ZFUNC_ALWAYS:
518
pTest = VIMMED1(true);
519
break;
520
case ZFUNC_NEVER:
521
pTest = VIMMED1(false);
522
break;
523
case ZFUNC_LT:
524
pTest = FCMP_OLT(pAlpha, pRef);
525
break;
526
case ZFUNC_EQ:
527
pTest = FCMP_OEQ(pAlpha, pRef);
528
break;
529
case ZFUNC_LE:
530
pTest = FCMP_OLE(pAlpha, pRef);
531
break;
532
case ZFUNC_GT:
533
pTest = FCMP_OGT(pAlpha, pRef);
534
break;
535
case ZFUNC_NE:
536
pTest = FCMP_ONE(pAlpha, pRef);
537
break;
538
case ZFUNC_GE:
539
pTest = FCMP_OGE(pAlpha, pRef);
540
break;
541
default:
542
SWR_INVALID("Invalid alpha test function");
543
break;
544
}
545
}
546
547
// load current mask
548
Value* pMask = LOAD(ppMask);
549
550
// convert to int1 mask
551
pMask = MASK(pMask);
552
553
// and with alpha test result
554
pMask = AND(pMask, pTest);
555
556
// convert back to vector mask
557
pMask = VMASK(pMask);
558
559
// store new mask
560
STORE(pMask, ppMask);
561
}
562
563
Function* Create(const BLEND_COMPILE_STATE& state)
564
{
565
std::stringstream fnName("BLND_",
566
std::ios_base::in | std::ios_base::out | std::ios_base::ate);
567
fnName << ComputeCRC(0, &state, sizeof(state));
568
569
// blend function signature
570
// typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_CONTEXT*);
571
572
std::vector<Type*> args{
573
PointerType::get(Gen_SWR_BLEND_CONTEXT(JM()), 0) // SWR_BLEND_CONTEXT*
574
};
575
576
// std::vector<Type*> args{
577
// PointerType::get(Gen_SWR_BLEND_CONTEXT(JM()), 0), // SWR_BLEND_CONTEXT*
578
//};
579
580
FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
581
Function* blendFunc = Function::Create(
582
fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
583
blendFunc->getParent()->setModuleIdentifier(blendFunc->getName());
584
585
BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", blendFunc);
586
587
IRB()->SetInsertPoint(entry);
588
589
// arguments
590
auto argitr = blendFunc->arg_begin();
591
Value* pBlendContext = &*argitr++;
592
pBlendContext->setName("pBlendContext");
593
Value* pBlendState = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pBlendState});
594
pBlendState->setName("pBlendState");
595
Value* pSrc = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src});
596
pSrc->setName("src");
597
Value* pSrc1 = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src1});
598
pSrc1->setName("src1");
599
Value* pSrc0Alpha = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src0alpha});
600
pSrc0Alpha->setName("src0alpha");
601
Value* sampleNum = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_sampleNum});
602
sampleNum->setName("sampleNum");
603
Value* pDst = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pDst});
604
pDst->setName("pDst");
605
Value* pResult = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_result});
606
pResult->setName("result");
607
Value* ppoMask = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_oMask});
608
ppoMask->setName("ppoMask");
609
Value* ppMask = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pMask});
610
ppMask->setName("pMask");
611
612
static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT,
613
"Unsupported hot tile format");
614
Value* dst[4];
615
Value* constantColor[4];
616
Value* src[4];
617
Value* src1[4];
618
Value* result[4];
619
for (uint32_t i = 0; i < 4; ++i)
620
{
621
// load hot tile
622
dst[i] = LOAD(pDst, {0, i});
623
624
// load constant color
625
constantColor[i] = VBROADCAST(LOAD(pBlendState, {0, SWR_BLEND_STATE_constantColor, i}));
626
627
// load src
628
src[i] = LOAD(pSrc, {0, i});
629
630
// load src1
631
src1[i] = LOAD(pSrc1, {0, i});
632
}
633
Value* currentSampleMask = VIMMED1(-1);
634
if (state.desc.alphaToCoverageEnable)
635
{
636
Value* pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f);
637
uint32_t bits = (1 << state.desc.numSamples) - 1;
638
currentSampleMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits)));
639
currentSampleMask = FP_TO_SI(FADD(currentSampleMask, VIMMED1(0.5f)), mSimdInt32Ty);
640
}
641
642
// alpha test
643
if (state.desc.alphaTestEnable)
644
{
645
// Gather for archrast stats
646
STORE(C(1), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaTested});
647
AlphaTest(state, pBlendState, pSrc0Alpha, ppMask);
648
}
649
else
650
{
651
// Gather for archrast stats
652
STORE(C(0), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaTested});
653
}
654
655
// color blend
656
if (state.blendState.blendEnable)
657
{
658
// Gather for archrast stats
659
STORE(C(1), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaBlended});
660
661
// clamp sources
662
Clamp(state.format, src);
663
Clamp(state.format, src1);
664
Clamp(state.format, dst);
665
Clamp(state.format, constantColor);
666
667
// apply defaults to hottile contents to take into account missing components
668
ApplyDefaults(state.format, dst);
669
670
// Force defaults for unused 'X' components
671
ApplyUnusedDefaults(state.format, dst);
672
673
// Quantize low precision components
674
Quantize(state.format, dst);
675
676
// special case clamping for R11G11B10_float which has no sign bit
677
if (state.format == R11G11B10_FLOAT)
678
{
679
dst[0] = VMAXPS(dst[0], VIMMED1(0.0f));
680
dst[1] = VMAXPS(dst[1], VIMMED1(0.0f));
681
dst[2] = VMAXPS(dst[2], VIMMED1(0.0f));
682
dst[3] = VMAXPS(dst[3], VIMMED1(0.0f));
683
}
684
685
Value* srcFactor[4];
686
Value* dstFactor[4];
687
if (state.desc.independentAlphaBlendEnable)
688
{
689
GenerateBlendFactor<true, false>(
690
state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
691
GenerateBlendFactor<false, true>(state.blendState.sourceAlphaBlendFactor,
692
constantColor,
693
src,
694
src1,
695
dst,
696
srcFactor);
697
698
GenerateBlendFactor<true, false>(
699
state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
700
GenerateBlendFactor<false, true>(state.blendState.destAlphaBlendFactor,
701
constantColor,
702
src,
703
src1,
704
dst,
705
dstFactor);
706
707
BlendFunc<true, false>(
708
state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
709
BlendFunc<false, true>(
710
state.blendState.alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
711
}
712
else
713
{
714
GenerateBlendFactor<true, true>(
715
state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
716
GenerateBlendFactor<true, true>(
717
state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
718
719
BlendFunc<true, true>(
720
state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
721
}
722
723
// store results out
724
for (uint32_t i = 0; i < 4; ++i)
725
{
726
STORE(result[i], pResult, {0, i});
727
}
728
}
729
else
730
{
731
// Gather for archrast stats
732
STORE(C(0), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaBlended});
733
}
734
735
if (state.blendState.logicOpEnable)
736
{
737
const SWR_FORMAT_INFO& info = GetFormatInfo(state.format);
738
Value* vMask[4];
739
float scale[4];
740
741
if (!state.blendState.blendEnable)
742
{
743
Clamp(state.format, src);
744
Clamp(state.format, dst);
745
}
746
747
for (uint32_t i = 0; i < 4; i++)
748
{
749
if (info.type[i] == SWR_TYPE_UNUSED)
750
{
751
continue;
752
}
753
754
if (info.bpc[i] >= 32)
755
{
756
vMask[i] = VIMMED1(0xFFFFFFFF);
757
scale[i] = 0xFFFFFFFF;
758
}
759
else
760
{
761
vMask[i] = VIMMED1((1 << info.bpc[i]) - 1);
762
if (info.type[i] == SWR_TYPE_SNORM)
763
scale[i] = (1 << (info.bpc[i] - 1)) - 1;
764
else
765
scale[i] = (1 << info.bpc[i]) - 1;
766
}
767
768
switch (info.type[i])
769
{
770
default:
771
SWR_INVALID("Unsupported type for logic op: %d", info.type[i]);
772
break;
773
774
case SWR_TYPE_UNKNOWN:
775
case SWR_TYPE_UNUSED:
776
FALLTHROUGH;
777
778
case SWR_TYPE_UINT:
779
case SWR_TYPE_SINT:
780
src[i] = BITCAST(src[i], mSimdInt32Ty);
781
dst[i] = BITCAST(dst[i], mSimdInt32Ty);
782
break;
783
case SWR_TYPE_SNORM:
784
src[i] = FP_TO_SI(FMUL(src[i], VIMMED1(scale[i])), mSimdInt32Ty);
785
dst[i] = FP_TO_SI(FMUL(dst[i], VIMMED1(scale[i])), mSimdInt32Ty);
786
break;
787
case SWR_TYPE_UNORM:
788
src[i] = FP_TO_UI(FMUL(src[i], VIMMED1(scale[i])), mSimdInt32Ty);
789
dst[i] = FP_TO_UI(FMUL(dst[i], VIMMED1(scale[i])), mSimdInt32Ty);
790
break;
791
}
792
}
793
794
LogicOpFunc(state.blendState.logicOpFunc, src, dst, result);
795
796
// store results out
797
for (uint32_t i = 0; i < 4; ++i)
798
{
799
if (info.type[i] == SWR_TYPE_UNUSED)
800
{
801
continue;
802
}
803
804
// clear upper bits from PS output not in RT format after doing logic op
805
result[i] = AND(result[i], vMask[i]);
806
807
switch (info.type[i])
808
{
809
default:
810
SWR_INVALID("Unsupported type for logic op: %d", info.type[i]);
811
break;
812
813
case SWR_TYPE_UNKNOWN:
814
case SWR_TYPE_UNUSED:
815
FALLTHROUGH;
816
817
case SWR_TYPE_UINT:
818
case SWR_TYPE_SINT:
819
result[i] = BITCAST(result[i], mSimdFP32Ty);
820
break;
821
case SWR_TYPE_SNORM:
822
result[i] = SHL(result[i], C(32 - info.bpc[i]));
823
result[i] = ASHR(result[i], C(32 - info.bpc[i]));
824
result[i] = FMUL(SI_TO_FP(result[i], mSimdFP32Ty), VIMMED1(1.0f / scale[i]));
825
break;
826
case SWR_TYPE_UNORM:
827
result[i] = FMUL(UI_TO_FP(result[i], mSimdFP32Ty), VIMMED1(1.0f / scale[i]));
828
break;
829
}
830
831
STORE(result[i], pResult, {0, i});
832
}
833
}
834
835
if (state.desc.oMaskEnable)
836
{
837
assert(!(state.desc.alphaToCoverageEnable));
838
// load current mask
839
Value* oMask = LOAD(ppoMask);
840
currentSampleMask = AND(oMask, currentSampleMask);
841
}
842
843
if (state.desc.sampleMaskEnable)
844
{
845
Value* sampleMask = LOAD(pBlendState, {0, SWR_BLEND_STATE_sampleMask});
846
currentSampleMask = AND(VBROADCAST(sampleMask), currentSampleMask);
847
}
848
849
if (state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable ||
850
state.desc.oMaskEnable)
851
{
852
// load coverage mask and mask off any lanes with no samples
853
Value* pMask = LOAD(ppMask);
854
Value* sampleMasked = SHL(C(1), sampleNum);
855
currentSampleMask = AND(currentSampleMask, VBROADCAST(sampleMasked));
856
currentSampleMask = S_EXT(ICMP_UGT(currentSampleMask, VBROADCAST(C(0))), mSimdInt32Ty);
857
Value* outputMask = AND(pMask, currentSampleMask);
858
// store new mask
859
STORE(outputMask, GEP(ppMask, C(0)));
860
}
861
862
RET_VOID();
863
864
JitManager::DumpToFile(blendFunc, "");
865
866
::FunctionPassManager passes(JM()->mpCurrentModule);
867
868
passes.add(createBreakCriticalEdgesPass());
869
passes.add(createCFGSimplificationPass());
870
passes.add(createEarlyCSEPass());
871
passes.add(createPromoteMemoryToRegisterPass());
872
passes.add(createCFGSimplificationPass());
873
passes.add(createEarlyCSEPass());
874
passes.add(createInstructionCombiningPass());
875
#if LLVM_VERSION_MAJOR <= 11
876
passes.add(createConstantPropagationPass());
877
#endif
878
passes.add(createSCCPPass());
879
passes.add(createAggressiveDCEPass());
880
881
passes.add(createLowerX86Pass(this));
882
883
passes.run(*blendFunc);
884
885
JitManager::DumpToFile(blendFunc, "optimized");
886
887
return blendFunc;
888
}
889
};
890
891
//////////////////////////////////////////////////////////////////////////
892
/// @brief JITs from fetch shader IR
893
/// @param hJitMgr - JitManager handle
894
/// @param func - LLVM function IR
895
/// @return PFN_FETCH_FUNC - pointer to fetch code
896
PFN_BLEND_JIT_FUNC JitBlendFunc(HANDLE hJitMgr, const HANDLE hFunc)
897
{
898
const llvm::Function* func = (const llvm::Function*)hFunc;
899
JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
900
PFN_BLEND_JIT_FUNC pfnBlend;
901
pfnBlend = (PFN_BLEND_JIT_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
902
// MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot
903
// add new IR to the module
904
pJitMgr->mIsModuleFinalized = true;
905
906
return pfnBlend;
907
}
908
909
//////////////////////////////////////////////////////////////////////////
910
/// @brief JIT compiles blend shader
911
/// @param hJitMgr - JitManager handle
912
/// @param state - blend state to build function from
913
extern "C" PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitMgr,
914
const BLEND_COMPILE_STATE& state)
915
{
916
JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
917
918
pJitMgr->SetupNewModule();
919
920
BlendJit theJit(pJitMgr);
921
HANDLE hFunc = theJit.Create(state);
922
923
return JitBlendFunc(hJitMgr, hFunc);
924
}
925
926