Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/re-spirv/re-spirv.cpp
21059 views
1
//
2
// re-spirv
3
//
4
// Copyright (c) 2024 renderbag and contributors. All rights reserved.
5
// Licensed under the MIT license. See LICENSE file for details.
6
//
7
8
#include "re-spirv.h"
9
10
#include <algorithm>
11
#include <cassert>
12
#include <cstdio>
13
#include <cstring>
14
#include <unordered_map>
15
16
#define SPV_ENABLE_UTILITY_CODE
17
18
#include "spirv/unified1/spirv.h"
19
20
// Enables more extensive output on errors.
21
#define RESPV_VERBOSE_ERRORS 0
22
23
namespace respv {
24
// Common.
25
26
static bool SpvIsSupported(SpvOp pOpCode) {
27
switch (pOpCode) {
28
case SpvOpUndef:
29
case SpvOpSource:
30
case SpvOpName:
31
case SpvOpMemberName:
32
case SpvOpExtension:
33
case SpvOpExtInstImport:
34
case SpvOpExtInst:
35
case SpvOpMemoryModel:
36
case SpvOpEntryPoint:
37
case SpvOpExecutionMode:
38
case SpvOpCapability:
39
case SpvOpTypeVoid:
40
case SpvOpTypeBool:
41
case SpvOpTypeInt:
42
case SpvOpTypeFloat:
43
case SpvOpTypeVector:
44
case SpvOpTypeMatrix:
45
case SpvOpTypeImage:
46
case SpvOpTypeSampler:
47
case SpvOpTypeSampledImage:
48
case SpvOpTypeArray:
49
case SpvOpTypeRuntimeArray:
50
case SpvOpTypeStruct:
51
case SpvOpTypePointer:
52
case SpvOpTypeFunction:
53
case SpvOpConstantTrue:
54
case SpvOpConstantFalse:
55
case SpvOpConstant:
56
case SpvOpConstantComposite:
57
case SpvOpConstantNull:
58
case SpvOpSpecConstantTrue:
59
case SpvOpSpecConstantFalse:
60
case SpvOpSpecConstant:
61
case SpvOpSpecConstantOp:
62
case SpvOpFunction:
63
case SpvOpFunctionParameter:
64
case SpvOpFunctionEnd:
65
case SpvOpFunctionCall:
66
case SpvOpVariable:
67
case SpvOpImageTexelPointer:
68
case SpvOpLoad:
69
case SpvOpStore:
70
case SpvOpAccessChain:
71
case SpvOpDecorate:
72
case SpvOpMemberDecorate:
73
case SpvOpVectorShuffle:
74
case SpvOpCompositeConstruct:
75
case SpvOpCompositeExtract:
76
case SpvOpCompositeInsert:
77
case SpvOpCopyObject:
78
case SpvOpTranspose:
79
case SpvOpSampledImage:
80
case SpvOpImageSampleImplicitLod:
81
case SpvOpImageSampleExplicitLod:
82
case SpvOpImageSampleDrefImplicitLod:
83
case SpvOpImageSampleDrefExplicitLod:
84
case SpvOpImageSampleProjImplicitLod:
85
case SpvOpImageSampleProjExplicitLod:
86
case SpvOpImageSampleProjDrefImplicitLod:
87
case SpvOpImageSampleProjDrefExplicitLod:
88
case SpvOpImageFetch:
89
case SpvOpImageGather:
90
case SpvOpImageDrefGather:
91
case SpvOpImageRead:
92
case SpvOpImageWrite:
93
case SpvOpImage:
94
case SpvOpImageQueryFormat:
95
case SpvOpImageQueryOrder:
96
case SpvOpImageQuerySizeLod:
97
case SpvOpImageQuerySize:
98
case SpvOpImageQueryLod:
99
case SpvOpImageQueryLevels:
100
case SpvOpImageQuerySamples:
101
case SpvOpConvertFToU:
102
case SpvOpConvertFToS:
103
case SpvOpConvertSToF:
104
case SpvOpConvertUToF:
105
case SpvOpUConvert:
106
case SpvOpSConvert:
107
case SpvOpFConvert:
108
case SpvOpBitcast:
109
case SpvOpSNegate:
110
case SpvOpFNegate:
111
case SpvOpIAdd:
112
case SpvOpFAdd:
113
case SpvOpISub:
114
case SpvOpFSub:
115
case SpvOpIMul:
116
case SpvOpFMul:
117
case SpvOpUDiv:
118
case SpvOpSDiv:
119
case SpvOpFDiv:
120
case SpvOpUMod:
121
case SpvOpSRem:
122
case SpvOpSMod:
123
case SpvOpFRem:
124
case SpvOpFMod:
125
case SpvOpVectorTimesScalar:
126
case SpvOpMatrixTimesScalar:
127
case SpvOpVectorTimesMatrix:
128
case SpvOpMatrixTimesVector:
129
case SpvOpMatrixTimesMatrix:
130
case SpvOpOuterProduct:
131
case SpvOpDot:
132
case SpvOpIAddCarry:
133
case SpvOpISubBorrow:
134
case SpvOpUMulExtended:
135
case SpvOpSMulExtended:
136
case SpvOpAny:
137
case SpvOpAll:
138
case SpvOpIsNan:
139
case SpvOpIsInf:
140
case SpvOpIsFinite:
141
case SpvOpIsNormal:
142
case SpvOpLogicalEqual:
143
case SpvOpLogicalNotEqual:
144
case SpvOpLogicalOr:
145
case SpvOpLogicalAnd:
146
case SpvOpLogicalNot:
147
case SpvOpSelect:
148
case SpvOpIEqual:
149
case SpvOpINotEqual:
150
case SpvOpUGreaterThan:
151
case SpvOpSGreaterThan:
152
case SpvOpUGreaterThanEqual:
153
case SpvOpSGreaterThanEqual:
154
case SpvOpULessThan:
155
case SpvOpSLessThan:
156
case SpvOpULessThanEqual:
157
case SpvOpSLessThanEqual:
158
case SpvOpFOrdEqual:
159
case SpvOpFUnordEqual:
160
case SpvOpFOrdNotEqual:
161
case SpvOpFUnordNotEqual:
162
case SpvOpFOrdLessThan:
163
case SpvOpFUnordLessThan:
164
case SpvOpFOrdGreaterThan:
165
case SpvOpFUnordGreaterThan:
166
case SpvOpFOrdLessThanEqual:
167
case SpvOpFUnordLessThanEqual:
168
case SpvOpFOrdGreaterThanEqual:
169
case SpvOpFUnordGreaterThanEqual:
170
case SpvOpShiftRightLogical:
171
case SpvOpShiftRightArithmetic:
172
case SpvOpShiftLeftLogical:
173
case SpvOpBitwiseOr:
174
case SpvOpBitwiseXor:
175
case SpvOpBitwiseAnd:
176
case SpvOpNot:
177
case SpvOpBitFieldInsert:
178
case SpvOpBitFieldSExtract:
179
case SpvOpBitFieldUExtract:
180
case SpvOpBitReverse:
181
case SpvOpBitCount:
182
case SpvOpDPdx:
183
case SpvOpDPdy:
184
case SpvOpFwidth:
185
case SpvOpDPdxFine:
186
case SpvOpDPdyFine:
187
case SpvOpFwidthFine:
188
case SpvOpDPdxCoarse:
189
case SpvOpDPdyCoarse:
190
case SpvOpFwidthCoarse:
191
case SpvOpControlBarrier:
192
case SpvOpMemoryBarrier:
193
case SpvOpAtomicLoad:
194
case SpvOpAtomicStore:
195
case SpvOpAtomicExchange:
196
case SpvOpAtomicCompareExchange:
197
case SpvOpAtomicCompareExchangeWeak:
198
case SpvOpAtomicIIncrement:
199
case SpvOpAtomicIDecrement:
200
case SpvOpAtomicIAdd:
201
case SpvOpAtomicISub:
202
case SpvOpAtomicSMin:
203
case SpvOpAtomicUMin:
204
case SpvOpAtomicSMax:
205
case SpvOpAtomicUMax:
206
case SpvOpAtomicAnd:
207
case SpvOpAtomicOr:
208
case SpvOpAtomicXor:
209
case SpvOpPhi:
210
case SpvOpLoopMerge:
211
case SpvOpSelectionMerge:
212
case SpvOpLabel:
213
case SpvOpBranch:
214
case SpvOpBranchConditional:
215
case SpvOpSwitch:
216
case SpvOpKill:
217
case SpvOpReturn:
218
case SpvOpReturnValue:
219
case SpvOpUnreachable:
220
case SpvOpGroupNonUniformElect:
221
case SpvOpGroupNonUniformAll:
222
case SpvOpGroupNonUniformAny:
223
case SpvOpGroupNonUniformAllEqual:
224
case SpvOpGroupNonUniformBroadcast:
225
case SpvOpGroupNonUniformBroadcastFirst:
226
case SpvOpGroupNonUniformBallot:
227
case SpvOpGroupNonUniformInverseBallot:
228
case SpvOpGroupNonUniformBallotBitExtract:
229
case SpvOpGroupNonUniformBallotBitCount:
230
case SpvOpGroupNonUniformBallotFindLSB:
231
case SpvOpGroupNonUniformBallotFindMSB:
232
case SpvOpGroupNonUniformShuffle:
233
case SpvOpGroupNonUniformShuffleXor:
234
case SpvOpGroupNonUniformShuffleUp:
235
case SpvOpGroupNonUniformShuffleDown:
236
case SpvOpGroupNonUniformIAdd:
237
case SpvOpGroupNonUniformFAdd:
238
case SpvOpGroupNonUniformIMul:
239
case SpvOpGroupNonUniformFMul:
240
case SpvOpGroupNonUniformSMin:
241
case SpvOpGroupNonUniformUMin:
242
case SpvOpGroupNonUniformFMin:
243
case SpvOpGroupNonUniformSMax:
244
case SpvOpGroupNonUniformUMax:
245
case SpvOpGroupNonUniformFMax:
246
case SpvOpGroupNonUniformBitwiseAnd:
247
case SpvOpGroupNonUniformBitwiseOr:
248
case SpvOpGroupNonUniformBitwiseXor:
249
case SpvOpGroupNonUniformLogicalAnd:
250
case SpvOpGroupNonUniformLogicalOr:
251
case SpvOpGroupNonUniformLogicalXor:
252
case SpvOpGroupNonUniformQuadBroadcast:
253
case SpvOpGroupNonUniformQuadSwap:
254
case SpvOpCopyLogical:
255
return true;
256
default:
257
return false;
258
}
259
}
260
261
static bool SpvIsIgnored(SpvOp pOpCode) {
262
switch (pOpCode) {
263
case SpvOpSource:
264
case SpvOpName:
265
case SpvOpMemberName:
266
return true;
267
default:
268
return false;
269
}
270
}
271
272
static bool SpvHasOperands(SpvOp pOpCode, uint32_t &rOperandWordStart, uint32_t &rOperandWordCount, uint32_t &rOperandWordStride, uint32_t &rOperandWordSkip, bool &rOperandWordSkipString, bool pIncludePhi) {
273
switch (pOpCode) {
274
case SpvOpExecutionMode:
275
case SpvOpBranchConditional:
276
case SpvOpSwitch:
277
case SpvOpReturnValue:
278
case SpvOpDecorate:
279
case SpvOpMemberDecorate:
280
rOperandWordStart = 1;
281
rOperandWordCount = 1;
282
rOperandWordStride = 1;
283
rOperandWordSkip = UINT32_MAX;
284
rOperandWordSkipString = false;
285
return true;
286
case SpvOpStore:
287
case SpvOpMemoryBarrier:
288
rOperandWordStart = 1;
289
rOperandWordCount = 2;
290
rOperandWordStride = 1;
291
rOperandWordSkip = UINT32_MAX;
292
rOperandWordSkipString = false;
293
return true;
294
case SpvOpControlBarrier:
295
rOperandWordStart = 1;
296
rOperandWordCount = 3;
297
rOperandWordStride = 1;
298
rOperandWordSkip = UINT32_MAX;
299
rOperandWordSkipString = false;
300
return true;
301
case SpvOpTypeVector:
302
case SpvOpTypeMatrix:
303
case SpvOpTypeImage:
304
case SpvOpTypeSampledImage:
305
case SpvOpTypeRuntimeArray:
306
rOperandWordStart = 2;
307
rOperandWordCount = 1;
308
rOperandWordStride = 1;
309
rOperandWordSkip = UINT32_MAX;
310
rOperandWordSkipString = false;
311
return true;
312
case SpvOpTypeArray:
313
rOperandWordStart = 2;
314
rOperandWordCount = 2;
315
rOperandWordStride = 1;
316
rOperandWordSkip = UINT32_MAX;
317
rOperandWordSkipString = false;
318
return true;
319
case SpvOpTypeStruct:
320
case SpvOpTypeFunction:
321
rOperandWordStart = 2;
322
rOperandWordCount = UINT32_MAX;
323
rOperandWordStride = 1;
324
rOperandWordSkip = UINT32_MAX;
325
rOperandWordSkipString = false;
326
return true;
327
case SpvOpEntryPoint:
328
rOperandWordStart = 2;
329
rOperandWordCount = UINT32_MAX;
330
rOperandWordStride = 1;
331
rOperandWordSkip = 1;
332
rOperandWordSkipString = true;
333
return true;
334
case SpvOpTypePointer:
335
case SpvOpLoad:
336
case SpvOpCompositeExtract:
337
case SpvOpCopyObject:
338
case SpvOpTranspose:
339
case SpvOpImage:
340
case SpvOpImageQueryFormat:
341
case SpvOpImageQueryOrder:
342
case SpvOpImageQuerySize:
343
case SpvOpImageQueryLevels:
344
case SpvOpImageQuerySamples:
345
case SpvOpConvertFToU:
346
case SpvOpConvertFToS:
347
case SpvOpConvertSToF:
348
case SpvOpConvertUToF:
349
case SpvOpUConvert:
350
case SpvOpSConvert:
351
case SpvOpFConvert:
352
case SpvOpBitcast:
353
case SpvOpSNegate:
354
case SpvOpFNegate:
355
case SpvOpAny:
356
case SpvOpAll:
357
case SpvOpIsNan:
358
case SpvOpIsInf:
359
case SpvOpIsFinite:
360
case SpvOpIsNormal:
361
case SpvOpLogicalNot:
362
case SpvOpNot:
363
case SpvOpBitReverse:
364
case SpvOpBitCount:
365
case SpvOpDPdx:
366
case SpvOpDPdy:
367
case SpvOpFwidth:
368
case SpvOpDPdxFine:
369
case SpvOpDPdyFine:
370
case SpvOpFwidthFine:
371
case SpvOpDPdxCoarse:
372
case SpvOpDPdyCoarse:
373
case SpvOpFwidthCoarse:
374
case SpvOpGroupNonUniformElect:
375
case SpvOpCopyLogical:
376
rOperandWordStart = 3;
377
rOperandWordCount = 1;
378
rOperandWordStride = 1;
379
rOperandWordSkip = UINT32_MAX;
380
rOperandWordSkipString = false;
381
return true;
382
case SpvOpVectorShuffle:
383
case SpvOpCompositeInsert:
384
case SpvOpSampledImage:
385
case SpvOpImageQuerySizeLod:
386
case SpvOpImageQueryLod:
387
case SpvOpIAdd:
388
case SpvOpFAdd:
389
case SpvOpISub:
390
case SpvOpFSub:
391
case SpvOpIMul:
392
case SpvOpFMul:
393
case SpvOpUDiv:
394
case SpvOpSDiv:
395
case SpvOpFDiv:
396
case SpvOpUMod:
397
case SpvOpSRem:
398
case SpvOpSMod:
399
case SpvOpFRem:
400
case SpvOpFMod:
401
case SpvOpVectorTimesScalar:
402
case SpvOpMatrixTimesScalar:
403
case SpvOpVectorTimesMatrix:
404
case SpvOpMatrixTimesVector:
405
case SpvOpMatrixTimesMatrix:
406
case SpvOpOuterProduct:
407
case SpvOpDot:
408
case SpvOpIAddCarry:
409
case SpvOpISubBorrow:
410
case SpvOpUMulExtended:
411
case SpvOpSMulExtended:
412
case SpvOpLogicalEqual:
413
case SpvOpLogicalNotEqual:
414
case SpvOpLogicalOr:
415
case SpvOpLogicalAnd:
416
case SpvOpIEqual:
417
case SpvOpINotEqual:
418
case SpvOpUGreaterThan:
419
case SpvOpSGreaterThan:
420
case SpvOpUGreaterThanEqual:
421
case SpvOpSGreaterThanEqual:
422
case SpvOpULessThan:
423
case SpvOpSLessThan:
424
case SpvOpULessThanEqual:
425
case SpvOpSLessThanEqual:
426
case SpvOpFOrdEqual:
427
case SpvOpFUnordEqual:
428
case SpvOpFOrdNotEqual:
429
case SpvOpFUnordNotEqual:
430
case SpvOpFOrdLessThan:
431
case SpvOpFUnordLessThan:
432
case SpvOpFOrdGreaterThan:
433
case SpvOpFUnordGreaterThan:
434
case SpvOpFOrdLessThanEqual:
435
case SpvOpFUnordLessThanEqual:
436
case SpvOpFOrdGreaterThanEqual:
437
case SpvOpFUnordGreaterThanEqual:
438
case SpvOpShiftRightLogical:
439
case SpvOpShiftRightArithmetic:
440
case SpvOpShiftLeftLogical:
441
case SpvOpBitwiseOr:
442
case SpvOpBitwiseAnd:
443
case SpvOpBitwiseXor:
444
case SpvOpGroupNonUniformAll:
445
case SpvOpGroupNonUniformAny:
446
case SpvOpGroupNonUniformAllEqual:
447
case SpvOpGroupNonUniformBroadcastFirst:
448
case SpvOpGroupNonUniformBallot:
449
case SpvOpGroupNonUniformInverseBallot:
450
case SpvOpGroupNonUniformBallotFindLSB:
451
case SpvOpGroupNonUniformBallotFindMSB:
452
rOperandWordStart = 3;
453
rOperandWordCount = 2;
454
rOperandWordStride = 1;
455
rOperandWordSkip = UINT32_MAX;
456
rOperandWordSkipString = false;
457
return true;
458
case SpvOpImageTexelPointer:
459
case SpvOpSelect:
460
case SpvOpBitFieldSExtract:
461
case SpvOpBitFieldUExtract:
462
case SpvOpAtomicLoad:
463
case SpvOpAtomicIIncrement:
464
case SpvOpAtomicIDecrement:
465
case SpvOpGroupNonUniformBroadcast:
466
case SpvOpGroupNonUniformBallotBitExtract:
467
case SpvOpGroupNonUniformShuffle:
468
case SpvOpGroupNonUniformShuffleXor:
469
case SpvOpGroupNonUniformShuffleUp:
470
case SpvOpGroupNonUniformShuffleDown:
471
case SpvOpGroupNonUniformQuadBroadcast:
472
case SpvOpGroupNonUniformQuadSwap:
473
rOperandWordStart = 3;
474
rOperandWordCount = 3;
475
rOperandWordStride = 1;
476
rOperandWordSkip = UINT32_MAX;
477
rOperandWordSkipString = false;
478
return true;
479
case SpvOpGroupNonUniformBallotBitCount:
480
rOperandWordStart = 3;
481
rOperandWordCount = 3;
482
rOperandWordStride = 1;
483
rOperandWordSkip = 1;
484
rOperandWordSkipString = false;
485
return true;
486
case SpvOpAtomicStore:
487
rOperandWordStart = 1;
488
rOperandWordCount = 4;
489
rOperandWordStride = 1;
490
rOperandWordSkip = UINT32_MAX;
491
rOperandWordSkipString = false;
492
return true;
493
case SpvOpBitFieldInsert:
494
case SpvOpAtomicExchange:
495
case SpvOpAtomicIAdd:
496
case SpvOpAtomicISub:
497
case SpvOpAtomicSMin:
498
case SpvOpAtomicUMin:
499
case SpvOpAtomicSMax:
500
case SpvOpAtomicUMax:
501
case SpvOpAtomicAnd:
502
case SpvOpAtomicOr:
503
case SpvOpAtomicXor:
504
rOperandWordStart = 3;
505
rOperandWordCount = 4;
506
rOperandWordStride = 1;
507
rOperandWordSkip = UINT32_MAX;
508
rOperandWordSkipString = false;
509
return true;
510
case SpvOpAtomicCompareExchange:
511
case SpvOpAtomicCompareExchangeWeak:
512
rOperandWordStart = 3;
513
rOperandWordCount = 6;
514
rOperandWordStride = 1;
515
rOperandWordSkip = UINT32_MAX;
516
rOperandWordSkipString = false;
517
return true;
518
case SpvOpConstantComposite:
519
case SpvOpFunctionCall:
520
case SpvOpAccessChain:
521
case SpvOpCompositeConstruct:
522
rOperandWordStart = 3;
523
rOperandWordCount = UINT32_MAX;
524
rOperandWordStride = 1;
525
rOperandWordSkip = UINT32_MAX;
526
rOperandWordSkipString = false;
527
return true;
528
case SpvOpSpecConstantOp:
529
rOperandWordStart = 3;
530
rOperandWordCount = UINT32_MAX;
531
rOperandWordStride = 1;
532
rOperandWordSkip = 0;
533
rOperandWordSkipString = false;
534
return true;
535
case SpvOpExtInst:
536
case SpvOpGroupNonUniformIAdd:
537
case SpvOpGroupNonUniformFAdd:
538
case SpvOpGroupNonUniformIMul:
539
case SpvOpGroupNonUniformFMul:
540
case SpvOpGroupNonUniformSMin:
541
case SpvOpGroupNonUniformUMin:
542
case SpvOpGroupNonUniformFMin:
543
case SpvOpGroupNonUniformSMax:
544
case SpvOpGroupNonUniformUMax:
545
case SpvOpGroupNonUniformFMax:
546
case SpvOpGroupNonUniformBitwiseAnd:
547
case SpvOpGroupNonUniformBitwiseOr:
548
case SpvOpGroupNonUniformBitwiseXor:
549
case SpvOpGroupNonUniformLogicalAnd:
550
case SpvOpGroupNonUniformLogicalOr:
551
case SpvOpGroupNonUniformLogicalXor:
552
rOperandWordStart = 3;
553
rOperandWordCount = UINT32_MAX;
554
rOperandWordStride = 1;
555
rOperandWordSkip = 1;
556
rOperandWordSkipString = false;
557
return true;
558
case SpvOpImageWrite:
559
rOperandWordStart = 1;
560
rOperandWordCount = UINT32_MAX;
561
rOperandWordStride = 1;
562
rOperandWordSkip = 3;
563
rOperandWordSkipString = false;
564
return true;
565
case SpvOpImageSampleImplicitLod:
566
case SpvOpImageSampleExplicitLod:
567
case SpvOpImageSampleProjImplicitLod:
568
case SpvOpImageSampleProjExplicitLod:
569
case SpvOpImageFetch:
570
case SpvOpImageRead:
571
rOperandWordStart = 3;
572
rOperandWordCount = UINT32_MAX;
573
rOperandWordStride = 1;
574
rOperandWordSkip = 2;
575
rOperandWordSkipString = false;
576
return true;
577
case SpvOpImageSampleDrefImplicitLod:
578
case SpvOpImageSampleDrefExplicitLod:
579
case SpvOpImageSampleProjDrefImplicitLod:
580
case SpvOpImageSampleProjDrefExplicitLod:
581
case SpvOpImageGather:
582
case SpvOpImageDrefGather:
583
rOperandWordStart = 3;
584
rOperandWordCount = UINT32_MAX;
585
rOperandWordStride = 1;
586
rOperandWordSkip = 3;
587
rOperandWordSkipString = false;
588
return true;
589
case SpvOpPhi:
590
if (pIncludePhi) {
591
rOperandWordStart = 3;
592
rOperandWordCount = UINT32_MAX;
593
rOperandWordStride = 2;
594
rOperandWordSkip = UINT32_MAX;
595
rOperandWordSkipString = false;
596
return true;
597
}
598
else {
599
rOperandWordStart = 0;
600
rOperandWordCount = 0;
601
rOperandWordStride = 0;
602
rOperandWordSkip = 0;
603
rOperandWordSkipString = false;
604
return true;
605
}
606
case SpvOpFunction:
607
case SpvOpVariable:
608
rOperandWordStart = 4;
609
rOperandWordCount = 1;
610
rOperandWordStride = 1;
611
rOperandWordSkip = UINT32_MAX;
612
rOperandWordSkipString = false;
613
return true;
614
case SpvOpLabel:
615
case SpvOpBranch:
616
case SpvOpConstantTrue:
617
case SpvOpConstantFalse:
618
case SpvOpConstant:
619
case SpvOpConstantSampler:
620
case SpvOpConstantNull:
621
case SpvOpSpecConstantTrue:
622
case SpvOpSpecConstantFalse:
623
case SpvOpSpecConstant:
624
case SpvOpCapability:
625
case SpvOpExtInstImport:
626
case SpvOpMemoryModel:
627
case SpvOpTypeVoid:
628
case SpvOpTypeBool:
629
case SpvOpTypeInt:
630
case SpvOpTypeFloat:
631
case SpvOpTypeSampler:
632
case SpvOpLoopMerge:
633
case SpvOpSelectionMerge:
634
case SpvOpKill:
635
case SpvOpReturn:
636
case SpvOpUnreachable:
637
case SpvOpFunctionParameter:
638
case SpvOpFunctionEnd:
639
case SpvOpExtension:
640
case SpvOpUndef:
641
rOperandWordStart = 0;
642
rOperandWordCount = 0;
643
rOperandWordStride = 0;
644
rOperandWordSkip = 0;
645
rOperandWordSkipString = false;
646
return true;
647
default:
648
return false;
649
}
650
}
651
652
static bool SpvHasLabels(SpvOp pOpCode, uint32_t &rLabelWordStart, uint32_t &rLabelWordCount, uint32_t &rLabelWordStride, bool pIncludePhi) {
653
switch (pOpCode) {
654
case SpvOpSelectionMerge:
655
case SpvOpBranch:
656
rLabelWordStart = 1;
657
rLabelWordCount = 1;
658
rLabelWordStride = 1;
659
return true;
660
case SpvOpLoopMerge:
661
rLabelWordStart = 1;
662
rLabelWordCount = 2;
663
rLabelWordStride = 1;
664
return true;
665
case SpvOpBranchConditional:
666
rLabelWordStart = 2;
667
rLabelWordCount = 2;
668
rLabelWordStride = 1;
669
return true;
670
case SpvOpSwitch:
671
rLabelWordStart = 2;
672
rLabelWordCount = UINT32_MAX;
673
rLabelWordStride = 2;
674
return true;
675
case SpvOpPhi:
676
if (pIncludePhi) {
677
rLabelWordStart = 4;
678
rLabelWordCount = UINT32_MAX;
679
rLabelWordStride = 2;
680
return true;
681
}
682
else {
683
return false;
684
}
685
default:
686
return false;
687
}
688
}
689
690
// Used to indicate which operations have side effects and can't be discarded if their result is not used.
691
static bool SpvHasSideEffects(SpvOp pOpCode) {
692
switch (pOpCode) {
693
case SpvOpFunctionCall:
694
case SpvOpAtomicExchange:
695
case SpvOpAtomicCompareExchange:
696
case SpvOpAtomicCompareExchangeWeak:
697
case SpvOpAtomicIIncrement:
698
case SpvOpAtomicIDecrement:
699
case SpvOpAtomicIAdd:
700
case SpvOpAtomicISub:
701
case SpvOpAtomicSMin:
702
case SpvOpAtomicUMin:
703
case SpvOpAtomicSMax:
704
case SpvOpAtomicUMax:
705
case SpvOpAtomicAnd:
706
case SpvOpAtomicOr:
707
case SpvOpAtomicXor:
708
case SpvOpAtomicFlagTestAndSet:
709
case SpvOpAtomicFlagClear:
710
return true;
711
default:
712
return false;
713
}
714
}
715
716
static bool SpvOpIsTerminator(SpvOp pOpCode) {
717
switch (pOpCode) {
718
case SpvOpBranch:
719
case SpvOpBranchConditional:
720
case SpvOpSwitch:
721
case SpvOpReturn:
722
case SpvOpReturnValue:
723
case SpvOpKill:
724
case SpvOpUnreachable:
725
return true;
726
default:
727
return false;
728
}
729
}
730
731
static bool checkOperandWordSkip(uint32_t pWordIndex, const uint32_t *pSpirvWords, uint32_t pRelativeWordIndex, uint32_t pOperandWordSkip, bool pOperandWordSkipString, uint32_t &rOperandWordIndex) {
732
if (pRelativeWordIndex == pOperandWordSkip) {
733
if (pOperandWordSkipString) {
734
const char *operandString = reinterpret_cast<const char *>(&pSpirvWords[pWordIndex + rOperandWordIndex]);
735
uint32_t stringLengthInWords = (strlen(operandString) + sizeof(uint32_t)) / sizeof(uint32_t);
736
rOperandWordIndex += stringLengthInWords;
737
}
738
else {
739
rOperandWordIndex++;
740
}
741
742
return true;
743
}
744
else {
745
return false;
746
}
747
}
748
749
static uint32_t addToList(uint32_t pInstructionIndex, uint32_t pListIndex, std::vector<ListNode> &rListNodes) {
750
rListNodes.emplace_back(pInstructionIndex, pListIndex);
751
return uint32_t(rListNodes.size() - 1);
752
}
753
754
// Shader
755
756
Shader::Shader() {
757
// Empty.
758
}
759
760
Shader::Shader(const void *pData, size_t pSize, bool pInlineFunctions) {
761
parse(pData, pSize, pInlineFunctions);
762
}
763
764
void Shader::clear() {
765
extSpirvWords = nullptr;
766
extSpirvWordCount = 0;
767
inlinedSpirvWords.clear();
768
instructions.clear();
769
instructionAdjacentListIndices.clear();
770
instructionInDegrees.clear();
771
instructionOutDegrees.clear();
772
instructionOrder.clear();
773
blocks.clear();
774
blockPreOrderIndices.clear();
775
blockPostOrderIndices.clear();
776
functions.clear();
777
variableOrder.clear();
778
results.clear();
779
specializations.clear();
780
decorations.clear();
781
phis.clear();
782
loopHeaders.clear();
783
listNodes.clear();
784
defaultSwitchOpConstantInt = UINT32_MAX;
785
}
786
787
constexpr uint32_t SpvStartWordIndex = 5;
788
789
bool Shader::checkData(const void *pData, size_t pSize) {
790
const uint32_t *words = reinterpret_cast<const uint32_t *>(pData);
791
const size_t wordCount = pSize / sizeof(uint32_t);
792
if (wordCount < SpvStartWordIndex) {
793
fprintf(stderr, "Not enough words in SPIR-V.\n");
794
return false;
795
}
796
797
if (words[0] != SpvMagicNumber) {
798
fprintf(stderr, "Invalid SPIR-V Magic Number on header.\n");
799
return false;
800
}
801
802
if (words[1] > SpvVersion) {
803
fprintf(stderr, "SPIR-V Version is too new for the library. Max version for the library is 0x%X.\n", SpvVersion);
804
return false;
805
}
806
807
return true;
808
}
809
810
bool Shader::inlineData(const void *pData, size_t pSize) {
811
assert(pData != nullptr);
812
assert(pSize > 0);
813
814
struct CallItem {
815
uint32_t wordIndex = 0;
816
uint32_t functionId = UINT32_MAX;
817
uint32_t blockId = UINT32_MAX;
818
uint32_t startBlockId = UINT32_MAX;
819
uint32_t loopBlockId = UINT32_MAX;
820
uint32_t continueBlockId = UINT32_MAX;
821
uint32_t returnBlockId = UINT32_MAX;
822
uint32_t resultType = UINT32_MAX;
823
uint32_t resultId = UINT32_MAX;
824
uint32_t parameterIndex = 0;
825
uint32_t remapsPendingCount = 0;
826
uint32_t returnParametersCount = 0;
827
uint32_t sameBlockOperationsCount = 0;
828
bool startBlockIdAssigned = false;
829
bool functionInlined = false;
830
831
CallItem(uint32_t wordIndex, uint32_t functionId = UINT32_MAX, bool functionInlined = false, uint32_t startBlockId = UINT32_MAX, uint32_t loopBlockId = UINT32_MAX, uint32_t continueBlockId = UINT32_MAX, uint32_t returnBlockId = UINT32_MAX, uint32_t resultType = UINT32_MAX, uint32_t resultId = UINT32_MAX)
832
: wordIndex(wordIndex), functionId(functionId), functionInlined(functionInlined), startBlockId(startBlockId), loopBlockId(loopBlockId), continueBlockId(continueBlockId), returnBlockId(returnBlockId), resultType(resultType), resultId(resultId)
833
{
834
// Regular constructor.
835
}
836
};
837
838
struct FunctionDefinition {
839
uint32_t wordIndex = 0;
840
uint32_t wordCount = 0;
841
uint32_t resultId = UINT32_MAX;
842
uint32_t functionWordCount = 0;
843
uint32_t codeWordCount = 0;
844
uint32_t variableWordCount = 0;
845
uint32_t decorationWordCount = 0;
846
uint32_t inlineWordCount = 0;
847
uint32_t returnValueCount = 0;
848
uint32_t callIndex = 0;
849
uint32_t callCount = 0;
850
uint32_t parameterIndex = 0;
851
uint32_t parameterCount = 0;
852
uint32_t inlinedVariableWordCount = 0;
853
bool canInline = true;
854
855
FunctionDefinition() {
856
// Default empty constructor.
857
}
858
859
FunctionDefinition(uint32_t resultId) : resultId(resultId) {
860
// Constructor for sorting.
861
}
862
863
bool operator<(const FunctionDefinition &other) const {
864
return resultId < other.resultId;
865
}
866
};
867
868
struct FunctionParameter {
869
uint32_t resultId = 0;
870
871
FunctionParameter(uint32_t resultId) : resultId(resultId) {
872
// Regular constructor.
873
}
874
};
875
876
struct FunctionCall {
877
uint32_t wordIndex = 0;
878
uint32_t functionId = 0;
879
uint32_t sameBlockWordCount = 0;
880
881
FunctionCall(uint32_t wordIndex, uint32_t functionId, uint32_t sameBlockWordCount) : wordIndex(wordIndex), functionId(functionId), sameBlockWordCount(sameBlockWordCount) {
882
// Regular constructor.
883
}
884
};
885
886
struct FunctionResult {
887
uint32_t wordIndex = UINT32_MAX;
888
uint32_t decorationIndex = UINT32_MAX;
889
};
890
891
typedef std::vector<FunctionDefinition>::iterator FunctionDefinitionIterator;
892
893
struct FunctionItem {
894
FunctionDefinitionIterator function = {};
895
FunctionDefinitionIterator rootFunction = {};
896
uint32_t callIndex = 0;
897
898
FunctionItem(FunctionDefinitionIterator function, FunctionDefinitionIterator rootFunction, uint32_t callIndex) : function(function), rootFunction(rootFunction), callIndex(callIndex) {
899
// Regular constructor.
900
}
901
};
902
903
struct ResultDecoration {
904
uint32_t wordIndex = 0;
905
uint32_t nextDecorationIndex = 0;
906
907
ResultDecoration(uint32_t wordIndex, uint32_t nextDecorationIndex) : wordIndex(wordIndex), nextDecorationIndex(nextDecorationIndex) {
908
// Regular constructor.
909
}
910
};
911
912
thread_local std::vector<FunctionResult> functionResultMap;
913
thread_local std::vector<ResultDecoration> resultDecorations;
914
thread_local std::vector<uint32_t> loopMergeIdStack;
915
thread_local std::vector<FunctionDefinition> functionDefinitions;
916
thread_local std::vector<FunctionParameter> functionParameters;
917
thread_local std::vector<FunctionCall> functionCalls;
918
thread_local std::vector<FunctionItem> functionStack;
919
thread_local std::vector<CallItem> callStack;
920
thread_local std::vector<uint32_t> shaderResultMap;
921
thread_local std::vector<uint32_t> storeMap;
922
thread_local std::vector<uint32_t> storeMapChanges;
923
thread_local std::vector<uint32_t> loadMap;
924
thread_local std::vector<uint32_t> loadMapChanges;
925
thread_local std::vector<uint32_t> phiMap;
926
thread_local std::vector<uint32_t> opPhis;
927
thread_local std::vector<uint32_t> remapsPending;
928
thread_local std::vector<uint32_t> returnParameters;
929
thread_local std::vector<uint32_t> sameBlockOperations;
930
functionResultMap.clear();
931
resultDecorations.clear();
932
loopMergeIdStack.clear();
933
functionDefinitions.clear();
934
functionParameters.clear();
935
functionCalls.clear();
936
callStack.clear();
937
shaderResultMap.clear();
938
storeMap.clear();
939
storeMapChanges.clear();
940
loadMap.clear();
941
loadMapChanges.clear();
942
phiMap.clear();
943
opPhis.clear();
944
remapsPending.clear();
945
returnParameters.clear();
946
sameBlockOperations.clear();
947
948
// Parse all instructions in the shader first.
949
const uint32_t *dataWords = reinterpret_cast<const uint32_t *>(pData);
950
const size_t dataWordCount = pSize / sizeof(uint32_t);
951
const uint32_t dataIdBound = dataWords[3];
952
functionResultMap.resize(dataIdBound);
953
954
FunctionDefinition currentFunction;
955
uint32_t parseWordIndex = SpvStartWordIndex;
956
uint32_t entryPointFunctionId = UINT32_MAX;
957
uint32_t globalWordCount = 0;
958
uint32_t sameBlockWordCount = 0;
959
while (parseWordIndex < dataWordCount) {
960
SpvOp opCode = SpvOp(dataWords[parseWordIndex] & 0xFFFFU);
961
uint32_t wordCount = (dataWords[parseWordIndex] >> 16U) & 0xFFFFU;
962
if (wordCount == 0) {
963
fprintf(stderr, "Invalid word count found at %d.\n", parseWordIndex);
964
return false;
965
}
966
967
switch (opCode) {
968
case SpvOpFunction:
969
if (currentFunction.resultId != UINT32_MAX) {
970
fprintf(stderr, "Found function start without the previous function ending.\n");
971
return false;
972
}
973
974
currentFunction.resultId = dataWords[parseWordIndex + 2];
975
currentFunction.wordIndex = parseWordIndex;
976
currentFunction.functionWordCount = wordCount;
977
break;
978
case SpvOpFunctionEnd:
979
if (currentFunction.resultId == UINT32_MAX) {
980
fprintf(stderr, "Found function end without a function start.\n");
981
return false;
982
}
983
984
currentFunction.wordCount = parseWordIndex + wordCount - currentFunction.wordIndex;
985
currentFunction.functionWordCount += wordCount;
986
functionDefinitions.emplace_back(currentFunction);
987
988
// Reset the current function to being empty again.
989
currentFunction = FunctionDefinition();
990
break;
991
case SpvOpFunctionParameter:
992
if (currentFunction.resultId == UINT32_MAX) {
993
fprintf(stderr, "Found function parameter without a function start.\n");
994
return false;
995
}
996
997
currentFunction.functionWordCount += wordCount;
998
999
if (currentFunction.parameterCount == 0) {
1000
currentFunction.parameterIndex = uint32_t(functionParameters.size());
1001
}
1002
1003
functionParameters.emplace_back(dataWords[parseWordIndex + 2]);
1004
currentFunction.parameterCount++;
1005
break;
1006
case SpvOpFunctionCall:
1007
if (currentFunction.resultId == UINT32_MAX) {
1008
fprintf(stderr, "Found function call without a function start.\n");
1009
return false;
1010
}
1011
1012
currentFunction.codeWordCount += wordCount;
1013
1014
if (currentFunction.callCount == 0) {
1015
currentFunction.callIndex = uint32_t(functionCalls.size());
1016
}
1017
1018
functionCalls.emplace_back(parseWordIndex, dataWords[parseWordIndex + 3], sameBlockWordCount);
1019
currentFunction.callCount++;
1020
break;
1021
case SpvOpDecorate: {
1022
uint32_t resultId = dataWords[parseWordIndex + 1];
1023
if (resultId >= dataIdBound) {
1024
fprintf(stderr, "Found decoration with invalid result %u.\n", resultId);
1025
return false;
1026
}
1027
1028
uint32_t nextDecorationIndex = functionResultMap[resultId].decorationIndex;
1029
functionResultMap[resultId].decorationIndex = uint32_t(resultDecorations.size());
1030
resultDecorations.emplace_back(parseWordIndex, nextDecorationIndex);
1031
globalWordCount += wordCount;
1032
break;
1033
}
1034
case SpvOpVariable:
1035
if (currentFunction.resultId != UINT32_MAX) {
1036
// Identify the variable as a local function variable.
1037
uint32_t resultId = dataWords[parseWordIndex + 2];
1038
if (resultId >= dataIdBound) {
1039
fprintf(stderr, "Found variable with invalid result %u.\n", resultId);
1040
return false;
1041
}
1042
1043
currentFunction.variableWordCount += wordCount;
1044
}
1045
else {
1046
globalWordCount += wordCount;
1047
}
1048
1049
break;
1050
case SpvOpReturn:
1051
// Functions that use early returns while on a loop can't be inlined.
1052
if (!loopMergeIdStack.empty()) {
1053
currentFunction.canInline = false;
1054
}
1055
1056
// If inlined, an OpBranch is required to replace the return.
1057
currentFunction.inlineWordCount += 2;
1058
currentFunction.functionWordCount += wordCount;
1059
break;
1060
case SpvOpReturnValue:
1061
// Functions that use early returns while on a loop can't be inlined.
1062
if (!loopMergeIdStack.empty()) {
1063
currentFunction.canInline = false;
1064
}
1065
1066
// If inlined, an OpPhi with at least one argument is required to handle return values.
1067
if (currentFunction.returnValueCount == 1) {
1068
currentFunction.inlineWordCount += 5;
1069
}
1070
1071
currentFunction.returnValueCount++;
1072
1073
// An OpBranch is required to replace the return.
1074
currentFunction.inlineWordCount += 2;
1075
1076
// An argument in OpPhi is required if there's more than one return value.
1077
if (currentFunction.returnValueCount > 1) {
1078
currentFunction.inlineWordCount += 2;
1079
}
1080
1081
currentFunction.functionWordCount += wordCount;
1082
break;
1083
case SpvOpEntryPoint:
1084
if (entryPointFunctionId != UINT32_MAX) {
1085
fprintf(stderr, "Found more than one entry point, which is not yet supported.\n");
1086
return false;
1087
}
1088
1089
entryPointFunctionId = dataWords[parseWordIndex + 2];
1090
globalWordCount += wordCount;
1091
break;
1092
case SpvOpStore: {
1093
if (currentFunction.resultId == UINT32_MAX) {
1094
fprintf(stderr, "Found store outside of a function.\n");
1095
return false;
1096
}
1097
1098
currentFunction.codeWordCount += wordCount;
1099
break;
1100
}
1101
case SpvOpLabel: {
1102
if (currentFunction.resultId == UINT32_MAX) {
1103
fprintf(stderr, "Found label outside of a function.\n");
1104
return false;
1105
}
1106
1107
uint32_t labelId = dataWords[parseWordIndex + 1];
1108
if (!loopMergeIdStack.empty() && (loopMergeIdStack.back() == labelId)) {
1109
loopMergeIdStack.pop_back();
1110
}
1111
1112
currentFunction.codeWordCount += wordCount;
1113
sameBlockWordCount = 0;
1114
break;
1115
}
1116
case SpvOpLoopMerge: {
1117
if (currentFunction.resultId == UINT32_MAX) {
1118
fprintf(stderr, "Found loop outside of a function.\n");
1119
return false;
1120
}
1121
1122
uint32_t mergeId = dataWords[parseWordIndex + 1];
1123
loopMergeIdStack.emplace_back(mergeId);
1124
currentFunction.codeWordCount += wordCount;
1125
break;
1126
}
1127
case SpvOpImage:
1128
case SpvOpSampledImage: {
1129
if (currentFunction.resultId == UINT32_MAX) {
1130
fprintf(stderr, "Found loop outside of a function.\n");
1131
return false;
1132
}
1133
1134
sameBlockWordCount += wordCount;
1135
currentFunction.codeWordCount += wordCount;
1136
break;
1137
}
1138
default:
1139
if (currentFunction.resultId != UINT32_MAX) {
1140
currentFunction.codeWordCount += wordCount;
1141
}
1142
else {
1143
globalWordCount += wordCount;
1144
}
1145
1146
break;
1147
}
1148
1149
if (currentFunction.resultId != UINT32_MAX) {
1150
bool hasResult, hasType;
1151
SpvHasResultAndType(opCode, &hasResult, &hasType);
1152
1153
if (hasResult) {
1154
// Indicate the result is associated to a function.
1155
uint32_t resultId = dataWords[parseWordIndex + (hasType ? 2 : 1)];
1156
functionResultMap[resultId].wordIndex = parseWordIndex;
1157
1158
// Look for all decorations associated to this result. These will be skipped when rewriting
1159
// the shader and written back when the result is parsed again.
1160
uint32_t decorationIndex = functionResultMap[resultId].decorationIndex;
1161
while (decorationIndex != UINT32_MAX) {
1162
const ResultDecoration &decoration = resultDecorations[decorationIndex];
1163
uint32_t decorationWordCount = (dataWords[decoration.wordIndex] >> 16U) & 0xFFFFU;
1164
currentFunction.decorationWordCount += decorationWordCount;
1165
globalWordCount -= decorationWordCount;
1166
decorationIndex = decoration.nextDecorationIndex;
1167
}
1168
}
1169
}
1170
1171
parseWordIndex += wordCount;
1172
}
1173
1174
if (entryPointFunctionId == UINT32_MAX) {
1175
fprintf(stderr, "Unable to find function entry point.\n");
1176
return false;
1177
}
1178
1179
// Make sure function array is sorted to make lower bound searches possible.
1180
std::sort(functionDefinitions.begin(), functionDefinitions.end());
1181
1182
// Find the entry point function and mark that it shouldn't be inlined.
1183
FunctionDefinitionIterator entryFunctionIt = std::lower_bound(functionDefinitions.begin(), functionDefinitions.end(), entryPointFunctionId);
1184
if (entryFunctionIt == functionDefinitions.end()) {
1185
fprintf(stderr, "Unable to find entry point function %d.\n", entryPointFunctionId);
1186
return false;
1187
}
1188
1189
entryFunctionIt->canInline = false;
1190
1191
// Do a first iteration pass with the functions that can't be inlined as the starting points of the stack.
1192
// This pass will figure out the total size required for the final inlined shader.
1193
FunctionDefinitionIterator startFunctionIt = functionDefinitions.begin();
1194
while (startFunctionIt != functionDefinitions.end()) {
1195
if (!startFunctionIt->canInline) {
1196
functionStack.emplace_back(startFunctionIt, startFunctionIt, 0);
1197
}
1198
1199
startFunctionIt++;
1200
}
1201
1202
uint32_t codeWordCount = 0;
1203
uint32_t functionDecorationWordCount = 0;
1204
while (!functionStack.empty()) {
1205
FunctionItem &functionItem = functionStack.back();
1206
if (functionItem.callIndex == functionItem.function->callCount) {
1207
// Add this function's code and variables.
1208
codeWordCount += functionItem.function->codeWordCount;
1209
codeWordCount += functionItem.function->variableWordCount;
1210
functionDecorationWordCount += functionItem.function->decorationWordCount;
1211
1212
// This function will be inlined so its variables should be reserved on the parent function instead.
1213
if (functionItem.function->canInline) {
1214
codeWordCount += functionItem.function->inlineWordCount;
1215
functionItem.rootFunction->inlinedVariableWordCount += functionItem.function->variableWordCount;
1216
}
1217
// Only add the function's word counts if can't be inlined.
1218
else {
1219
codeWordCount += functionItem.function->functionWordCount;
1220
}
1221
1222
functionStack.pop_back();
1223
}
1224
else {
1225
// Traverse the function calls to be inlined
1226
const FunctionCall &functionCall = functionCalls[functionItem.function->callIndex + functionItem.callIndex];
1227
functionItem.callIndex++;
1228
1229
uint32_t callFunctionId = dataWords[functionCall.wordIndex + 3];
1230
FunctionDefinitionIterator callFunctionIt = std::lower_bound(functionDefinitions.begin(), functionDefinitions.end(), callFunctionId);
1231
if (callFunctionIt == functionDefinitions.end()) {
1232
fprintf(stderr, "Unable to find function %d.\n", callFunctionId);
1233
return false;
1234
}
1235
1236
if (callFunctionIt->canInline) {
1237
// Function call will be replaced by one OpLoopMerge, three OpLabel and three OpBranch.
1238
// All words required for preserving same block operations will also be added.
1239
// Substract the word count for the function call as it'll not be copied.
1240
uint32_t callWordCount = (dataWords[functionCall.wordIndex] >> 16U) & 0xFFFFU;
1241
codeWordCount += 4 + 2 * 3 + 2 * 3;
1242
codeWordCount += functionCall.sameBlockWordCount;
1243
codeWordCount -= callWordCount;
1244
functionStack.emplace_back(callFunctionIt, functionItem.rootFunction, 0);
1245
}
1246
}
1247
}
1248
1249
// Figure out the total size of the shader and copy the header.
1250
size_t totalWordCount = SpvStartWordIndex + globalWordCount + codeWordCount + functionDecorationWordCount;
1251
inlinedSpirvWords.resize(totalWordCount);
1252
memcpy(inlinedSpirvWords.data(), pData, SpvStartWordIndex * sizeof(uint32_t));
1253
1254
// To avoid reallocation of these unless the shader really warrants it, we reserve some memory for these vectors.
1255
uint32_t &inlinedIdBound = inlinedSpirvWords[3];
1256
uint32_t dstWordIndex = SpvStartWordIndex;
1257
shaderResultMap.resize(dataIdBound, UINT32_MAX);
1258
storeMap.resize(dataIdBound, UINT32_MAX);
1259
loadMap.resize(dataIdBound, UINT32_MAX);
1260
phiMap.resize(dataIdBound, UINT32_MAX);
1261
1262
auto copyInstruction = [&](uint32_t dataWordIndex, bool renameResult, uint32_t &copyWordIndex, uint32_t &copyDecorationIndex) {
1263
copyDecorationIndex = UINT32_MAX;
1264
1265
SpvOp opCode = SpvOp(dataWords[dataWordIndex] & 0xFFFFU);
1266
uint32_t wordCount = (dataWords[dataWordIndex] >> 16U) & 0xFFFFU;
1267
for (uint32_t i = 0; i < wordCount; i++) {
1268
inlinedSpirvWords[copyWordIndex + i] = dataWords[dataWordIndex + i];
1269
}
1270
1271
bool hasResult, hasType;
1272
SpvHasResultAndType(opCode, &hasResult, &hasType);
1273
1274
if (hasResult) {
1275
// Any inlined functions must remap all their results and operands.
1276
uint32_t &resultId = inlinedSpirvWords[copyWordIndex + (hasType ? 2 : 1)];
1277
if ((resultId < dataIdBound) && (functionResultMap[resultId].wordIndex != UINT32_MAX)) {
1278
copyDecorationIndex = functionResultMap[resultId].decorationIndex;
1279
}
1280
1281
if (renameResult) {
1282
// First labels in a function will be replaced by the assigned label if present.
1283
uint32_t newResultId;
1284
if ((opCode == SpvOpLabel) && (callStack.back().startBlockId != UINT32_MAX) && !callStack.back().startBlockIdAssigned) {
1285
newResultId = callStack.back().startBlockId;
1286
callStack.back().startBlockIdAssigned = true;
1287
}
1288
else {
1289
newResultId = inlinedIdBound++;
1290
}
1291
1292
// Remap and replace the result ID in the instruction.
1293
shaderResultMap[resultId] = newResultId;
1294
resultId = newResultId;
1295
1296
// Store the current block's remapped label.
1297
if (opCode == SpvOpLabel) {
1298
callStack.back().blockId = resultId;
1299
}
1300
}
1301
}
1302
1303
// Remap any operands or labels present in the instructions.
1304
uint32_t operandWordStart, operandWordCount, operandWordStride, operandWordSkip;
1305
bool operandWordSkipString;
1306
if (SpvHasOperands(opCode, operandWordStart, operandWordCount, operandWordStride, operandWordSkip, operandWordSkipString, true)) {
1307
uint32_t operandWordIndex = operandWordStart;
1308
for (uint32_t j = 0; j < operandWordCount; j++) {
1309
if (checkOperandWordSkip(callStack.back().wordIndex, dataWords, j, operandWordSkip, operandWordSkipString, operandWordIndex)) {
1310
continue;
1311
}
1312
1313
if (operandWordIndex >= wordCount) {
1314
break;
1315
}
1316
1317
uint32_t shaderWordIndex = copyWordIndex + operandWordIndex;
1318
uint32_t &operandId = inlinedSpirvWords[shaderWordIndex];
1319
1320
// Discard any known stores for variables that are used in operations that the effect is not explicitly considered yet.
1321
if ((opCode != SpvOpStore) && (opCode != SpvOpLoad)) {
1322
storeMap[operandId] = dataIdBound;
1323
}
1324
1325
// Rename the operand if it originates from a load.
1326
if (loadMap[operandId] < dataIdBound) {
1327
operandId = loadMap[operandId];
1328
}
1329
1330
// Apply the result remapping.
1331
if (shaderResultMap[operandId] != UINT32_MAX) {
1332
operandId = shaderResultMap[operandId];
1333
}
1334
1335
operandWordIndex += operandWordStride;
1336
}
1337
}
1338
1339
uint32_t labelWordStart, labelWordCount, labelWordStride;
1340
if (SpvHasLabels(opCode, labelWordStart, labelWordCount, labelWordStride, true)) {
1341
for (uint32_t j = 0; (j < labelWordCount) && ((labelWordStart + j * labelWordStride) < wordCount); j++) {
1342
uint32_t labelWordIndex = labelWordStart + j * labelWordStride;
1343
remapsPending.emplace_back(copyWordIndex + labelWordIndex);
1344
callStack.back().remapsPendingCount++;
1345
}
1346
}
1347
1348
copyWordIndex += wordCount;
1349
};
1350
1351
auto copyDecorations = [&](uint32_t copyDecorationIndex, uint32_t &copyWordIndex) {
1352
uint32_t placeholderWordIndex;
1353
while (copyDecorationIndex != UINT32_MAX) {
1354
copyInstruction(resultDecorations[copyDecorationIndex].wordIndex, false, copyWordIndex, placeholderWordIndex);
1355
copyDecorationIndex = resultDecorations[copyDecorationIndex].nextDecorationIndex;
1356
}
1357
};
1358
1359
// Perform the final pass for inlining all functions.
1360
uint32_t copyDecorationIndex;
1361
uint32_t dstInlinedDecorationWordIndex = UINT32_MAX;
1362
uint32_t dstInlinedDecorationWordIndexMax = UINT32_MAX;
1363
uint32_t dstInlinedVariableWordIndex = UINT32_MAX;
1364
uint32_t dstInlinedVariableWordIndexMax = UINT32_MAX;
1365
callStack.emplace_back(SpvStartWordIndex);
1366
while (!callStack.empty()) {
1367
uint32_t callWordIndex = callStack.back().wordIndex;
1368
if (callWordIndex >= dataWordCount) {
1369
break;
1370
}
1371
1372
bool copyWords = true;
1373
bool copyWordsToVariables = false;
1374
SpvOp opCode = SpvOp(dataWords[callWordIndex] & 0xFFFFU);
1375
uint32_t wordCount = (dataWords[callWordIndex] >> 16U) & 0xFFFFU;
1376
if (wordCount == 0) {
1377
fprintf(stderr, "Function iteration landed in an invalid instruction due to an implementation error.\n");
1378
return false;
1379
}
1380
1381
switch (opCode) {
1382
case SpvOpLabel:
1383
while (!storeMapChanges.empty()) {
1384
storeMap[storeMapChanges.back()] = UINT32_MAX;
1385
storeMapChanges.pop_back();
1386
}
1387
1388
while (!loadMapChanges.empty()) {
1389
loadMap[loadMapChanges.back()] = UINT32_MAX;
1390
loadMapChanges.pop_back();
1391
}
1392
1393
sameBlockOperations.resize(sameBlockOperations.size() - callStack.back().sameBlockOperationsCount);
1394
callStack.back().blockId = dataWords[callWordIndex + 1];
1395
callStack.back().sameBlockOperationsCount = 0;
1396
break;
1397
case SpvOpFunction: {
1398
uint32_t functionId = dataWords[callWordIndex + 2];
1399
FunctionDefinitionIterator functionIt = std::lower_bound(functionDefinitions.begin(), functionDefinitions.end(), functionId);
1400
if (functionIt == functionDefinitions.end()) {
1401
fprintf(stderr, "Unable to find function %d.\n", functionId);
1402
return false;
1403
}
1404
1405
// If we're iterating on the top of the shader, we skip over the function.
1406
// Only copy the function's words if it's not inlined and we're iterating on it.
1407
if (callStack.back().functionId == UINT32_MAX) {
1408
// Skip parsing the entire function on this stack level.
1409
callStack.back().wordIndex += functionIt->wordCount;
1410
1411
// Insert a new stack level if we found function that isn't inlined.
1412
if (!functionIt->canInline) {
1413
callStack.emplace_back(callWordIndex - wordCount, functionId);
1414
}
1415
else {
1416
callStack.back().wordIndex -= wordCount;
1417
}
1418
1419
copyWords = false;
1420
}
1421
else {
1422
copyWords = !functionIt->canInline;
1423
}
1424
1425
break;
1426
}
1427
case SpvOpFunctionParameter:
1428
// Only copy the function's parameters if it's not inlined.
1429
copyWords = !callStack.back().functionInlined;
1430
break;
1431
case SpvOpFunctionEnd: {
1432
// Apply any pending remappings from instructions with labels.
1433
for (size_t i = remapsPending.size() - callStack.back().remapsPendingCount; i < remapsPending.size(); i++) {
1434
uint32_t &resultId = inlinedSpirvWords[remapsPending[i]];
1435
if (shaderResultMap[resultId] != UINT32_MAX) {
1436
resultId = shaderResultMap[resultId];
1437
}
1438
}
1439
1440
// Only copy the function's end if it's not inlined.
1441
if (!callStack.back().functionInlined) {
1442
copyWords = true;
1443
1444
if (dstInlinedVariableWordIndex != dstInlinedVariableWordIndexMax) {
1445
fprintf(stderr, "Failed to fill all available variable space due to an implementation error.\n");
1446
return false;
1447
}
1448
1449
dstInlinedVariableWordIndex = UINT32_MAX;
1450
dstInlinedVariableWordIndexMax = UINT32_MAX;
1451
}
1452
else {
1453
// Insert a label for the continue block that connects back to the start along with a branch.
1454
inlinedSpirvWords[dstWordIndex++] = SpvOpLabel | (2 << 16U);
1455
inlinedSpirvWords[dstWordIndex++] = callStack.back().continueBlockId;
1456
1457
inlinedSpirvWords[dstWordIndex++] = SpvOpBranch | (2 << 16U);
1458
inlinedSpirvWords[dstWordIndex++] = callStack.back().loopBlockId;
1459
1460
// Insert a label for the return block.
1461
inlinedSpirvWords[dstWordIndex++] = SpvOpLabel | (2 << 16U);
1462
inlinedSpirvWords[dstWordIndex++] = callStack.back().returnBlockId;
1463
1464
// If the function only returns one possible value, the caller instead will just remap the result to this one.
1465
if (callStack.back().returnParametersCount == 2) {
1466
uint32_t functionResultId = callStack.back().resultId;
1467
shaderResultMap[functionResultId] = returnParameters[returnParameters.size() - callStack.back().returnParametersCount];
1468
}
1469
// Insert an OpPhi for selecting the result from a function call that called a function that returns multiple values.
1470
else if (callStack.back().returnParametersCount > 2) {
1471
// Remap the function result if necessary.
1472
const CallItem &previousCallStack = callStack[callStack.size() - 2];
1473
uint32_t functionResultId = callStack.back().resultId;
1474
if ((previousCallStack.functionId != UINT32_MAX) && previousCallStack.functionInlined) {
1475
uint32_t newFunctionResultId = inlinedIdBound++;
1476
shaderResultMap[functionResultId] = newFunctionResultId;
1477
functionResultId = newFunctionResultId;
1478
}
1479
1480
opPhis.emplace_back(dstWordIndex);
1481
inlinedSpirvWords[dstWordIndex++] = SpvOpPhi | ((3 + callStack.back().returnParametersCount) << 16U);
1482
inlinedSpirvWords[dstWordIndex++] = callStack.back().resultType;
1483
inlinedSpirvWords[dstWordIndex++] = functionResultId;
1484
1485
// Copy the OpPhi arguments directly.
1486
for (size_t i = returnParameters.size() - callStack.back().returnParametersCount; i < returnParameters.size(); i++) {
1487
inlinedSpirvWords[dstWordIndex++] = returnParameters[i];
1488
}
1489
}
1490
1491
copyWords = false;
1492
}
1493
1494
// Pop this stack level and return to iterating on the previous one.
1495
remapsPending.resize(remapsPending.size() - callStack.back().remapsPendingCount);
1496
returnParameters.resize(returnParameters.size() - callStack.back().returnParametersCount);
1497
sameBlockOperations.resize(sameBlockOperations.size() - callStack.back().sameBlockOperationsCount);
1498
callStack.pop_back();
1499
1500
if (!callStack.empty()) {
1501
// Copy the same block operations and rename the results even if the function wasn't inlined.
1502
for (size_t i = sameBlockOperations.size() - callStack.back().sameBlockOperationsCount; i < sameBlockOperations.size(); i++) {
1503
copyInstruction(sameBlockOperations[i], true, dstWordIndex, copyDecorationIndex);
1504
copyDecorations(copyDecorationIndex, dstInlinedDecorationWordIndex);
1505
}
1506
1507
callStack.back().wordIndex -= wordCount;
1508
}
1509
1510
break;
1511
}
1512
case SpvOpFunctionCall: {
1513
// Inline the function by inserting two labels and a branch.
1514
uint32_t functionId = dataWords[callWordIndex + 3];
1515
FunctionDefinitionIterator functionIt = std::lower_bound(functionDefinitions.begin(), functionDefinitions.end(), functionId);
1516
if (functionIt == functionDefinitions.end()) {
1517
fprintf(stderr, "Unable to find function %d.\n", functionId);
1518
return false;
1519
}
1520
1521
if (functionIt->canInline) {
1522
// Generate the ID that will be used to indicate the function's start and the return block.
1523
uint32_t loopLabelId = inlinedIdBound++;
1524
uint32_t startLabelId = inlinedIdBound++;
1525
uint32_t continueLabelId = inlinedIdBound++;
1526
uint32_t returnLabelId = inlinedIdBound++;
1527
1528
// In any future Phi operations, rename the current label to the return label.
1529
if (callStack.back().blockId >= phiMap.size()) {
1530
phiMap.resize(callStack.back().blockId + 1, UINT32_MAX);
1531
}
1532
1533
phiMap[callStack.back().blockId] = returnLabelId;
1534
1535
// Branch into a new block. The new block will contain a single iteration loop.
1536
inlinedSpirvWords[dstWordIndex++] = SpvOpBranch | (2 << 16U);
1537
inlinedSpirvWords[dstWordIndex++] = loopLabelId;
1538
1539
inlinedSpirvWords[dstWordIndex++] = SpvOpLabel | (2 << 16U);
1540
inlinedSpirvWords[dstWordIndex++] = loopLabelId;
1541
1542
inlinedSpirvWords[dstWordIndex++] = SpvOpLoopMerge | (4 << 16U);
1543
inlinedSpirvWords[dstWordIndex++] = returnLabelId;
1544
inlinedSpirvWords[dstWordIndex++] = continueLabelId;
1545
inlinedSpirvWords[dstWordIndex++] = SpvLoopControlMaskNone;
1546
1547
inlinedSpirvWords[dstWordIndex++] = SpvOpBranch | (2 << 16U);
1548
inlinedSpirvWords[dstWordIndex++] = startLabelId;
1549
1550
// Pass the result Id unmodified. The function evaluation will determine how it should be remapped.
1551
uint32_t functionResultId = dataWords[callWordIndex + 2];
1552
callStack.back().wordIndex += wordCount;
1553
1554
// Word count should be substracted as the loop's end will add it.
1555
callStack.emplace_back(functionIt->wordIndex - wordCount, functionIt->resultId, true, startLabelId, loopLabelId, continueLabelId, returnLabelId, dataWords[callWordIndex + 1], functionResultId);
1556
1557
for (uint32_t i = 0; i < functionIt->parameterCount; i++) {
1558
if (wordCount <= (4 + i)) {
1559
fprintf(stderr, "Not enough words for argument %d in function call.\n", i);
1560
return false;
1561
}
1562
1563
uint32_t functionParameterId = functionParameters[functionIt->parameterIndex + i].resultId;
1564
uint32_t localParameterId = dataWords[callWordIndex + 4 + i];
1565
if (shaderResultMap[localParameterId] != UINT32_MAX) {
1566
localParameterId = shaderResultMap[localParameterId];
1567
}
1568
1569
shaderResultMap[functionParameterId] = localParameterId;
1570
}
1571
1572
copyWords = false;
1573
}
1574
else {
1575
copyWords = true;
1576
}
1577
1578
break;
1579
}
1580
case SpvOpDecorate: {
1581
if (dstInlinedDecorationWordIndex == UINT32_MAX) {
1582
// Upon encountering the first decoration in the shader, reserve space to write out any decorations
1583
// that are found to be linked to function results.
1584
dstInlinedDecorationWordIndex = dstWordIndex;
1585
dstWordIndex += functionDecorationWordCount;
1586
dstInlinedDecorationWordIndexMax = dstWordIndex;
1587
}
1588
1589
// Only copy the decoration as-is if it doesn't belong to a result in a function.
1590
uint32_t resultId = dataWords[callWordIndex + 1];
1591
copyWords = (functionResultMap[resultId].wordIndex == UINT32_MAX);
1592
break;
1593
}
1594
case SpvOpVariable:
1595
if ((callStack.back().functionId < UINT32_MAX) && !callStack.back().functionInlined) {
1596
// As soon as we find a variable local to the function, reserve the space to insert all
1597
// inlined function variables that we encounter.
1598
if (dstInlinedVariableWordIndex == UINT32_MAX) {
1599
FunctionDefinitionIterator functionIt = std::lower_bound(functionDefinitions.begin(), functionDefinitions.end(), callStack.back().functionId);
1600
if (functionIt == functionDefinitions.end()) {
1601
fprintf(stderr, "Unable to find function %d.\n", callStack.back().functionId);
1602
return false;
1603
}
1604
1605
dstInlinedVariableWordIndex = dstWordIndex;
1606
dstWordIndex += functionIt->inlinedVariableWordCount;
1607
dstInlinedVariableWordIndexMax = dstWordIndex;
1608
}
1609
}
1610
else {
1611
// Copy the variables into the entry point function's variables.
1612
copyWordsToVariables = (callStack.back().functionId != UINT32_MAX);
1613
}
1614
1615
copyWords = true;
1616
break;
1617
case SpvOpReturn:
1618
if (callStack.back().functionInlined) {
1619
// Replace return with a branch to the return label.
1620
inlinedSpirvWords[dstWordIndex++] = SpvOpBranch | (2 << 16U);
1621
inlinedSpirvWords[dstWordIndex++] = callStack.back().returnBlockId;
1622
copyWords = false;
1623
}
1624
else {
1625
// Copy as is.
1626
}
1627
1628
break;
1629
case SpvOpReturnValue: {
1630
if (callStack.back().functionInlined) {
1631
// Replace return with a branch to the return label.
1632
inlinedSpirvWords[dstWordIndex++] = SpvOpBranch | (2 << 16U);
1633
inlinedSpirvWords[dstWordIndex++] = callStack.back().returnBlockId;
1634
copyWords = false;
1635
1636
// Store parameters for Phi operator.
1637
uint32_t operandId = dataWords[callStack.back().wordIndex + 1];
1638
if (shaderResultMap[operandId] != UINT32_MAX) {
1639
operandId = shaderResultMap[operandId];
1640
}
1641
1642
returnParameters.emplace_back(operandId);
1643
returnParameters.emplace_back(callStack.back().blockId);
1644
callStack.back().returnParametersCount += 2;
1645
}
1646
else {
1647
// Copy as is.
1648
}
1649
1650
break;
1651
}
1652
case SpvOpLoad: {
1653
// If the pointer being loaded was modified this block, store its result to rename the
1654
// operands that use the result of this load operation. This load operation will go
1655
// unused and be deleted in the optimization pass.
1656
// Ignore load operations with memory operands.
1657
if (wordCount == 4) {
1658
uint32_t pointerId = dataWords[callStack.back().wordIndex + 3];
1659
if (pointerId >= dataIdBound) {
1660
fprintf(stderr, "Found load operation with invalid pointer %u.\n", pointerId);
1661
return false;
1662
}
1663
1664
uint32_t pointerWordIndex = functionResultMap[pointerId].wordIndex;
1665
if ((pointerWordIndex != UINT32_MAX) && (SpvOp(dataWords[pointerWordIndex] & 0xFFFFU) == SpvOpVariable) && (storeMap[pointerId] < dataIdBound)) {
1666
uint32_t resultId = dataWords[callStack.back().wordIndex + 2];
1667
if (loadMap[resultId] != storeMap[pointerId]) {
1668
loadMap[resultId] = storeMap[pointerId];
1669
loadMapChanges.emplace_back(resultId);
1670
}
1671
}
1672
}
1673
1674
break;
1675
}
1676
case SpvOpStore: {
1677
// Keep track of the result last stored to the pointer on this block.
1678
// Ignore store operations with memory operands.
1679
if (wordCount == 3) {
1680
uint32_t pointerId = dataWords[callStack.back().wordIndex + 1];
1681
if (pointerId >= dataIdBound) {
1682
fprintf(stderr, "Found store operation with invalid pointer %u.\n", pointerId);
1683
return false;
1684
}
1685
1686
uint32_t resultId = dataWords[callStack.back().wordIndex + 2];
1687
if (resultId >= dataIdBound) {
1688
fprintf(stderr, "Found store operation with invalid result %u.\n", resultId);
1689
return false;
1690
}
1691
1692
if (storeMap[pointerId] != resultId) {
1693
storeMap[pointerId] = resultId;
1694
storeMapChanges.emplace_back(pointerId);
1695
}
1696
}
1697
1698
break;
1699
}
1700
case SpvOpPhi:
1701
opPhis.emplace_back(dstWordIndex);
1702
break;
1703
case SpvOpImage:
1704
case SpvOpSampledImage: {
1705
sameBlockOperations.emplace_back(callStack.back().wordIndex);
1706
callStack.back().sameBlockOperationsCount++;
1707
break;
1708
}
1709
default:
1710
break;
1711
}
1712
1713
if (copyWords) {
1714
uint32_t &copyWordIndex = copyWordsToVariables ? dstInlinedVariableWordIndex : dstWordIndex;
1715
copyInstruction(callWordIndex, callStack.back().functionInlined, copyWordIndex, copyDecorationIndex);
1716
copyDecorations(copyDecorationIndex, dstInlinedDecorationWordIndex);
1717
}
1718
1719
if (!callStack.empty()) {
1720
callStack.back().wordIndex += wordCount;
1721
}
1722
1723
assert(dstWordIndex <= totalWordCount && "Not enough words were reserved for the shader.");
1724
assert(dstInlinedVariableWordIndex <= dstInlinedVariableWordIndexMax && "Not enough words were reserved for inlined variables.");
1725
assert(dstInlinedDecorationWordIndex <= dstInlinedDecorationWordIndexMax && "Not enough words were reserved for function decorations.");
1726
}
1727
1728
if (dstWordIndex != totalWordCount) {
1729
fprintf(stderr, "Failed to fill all shader data due to an implementation error.\n");
1730
return false;
1731
}
1732
1733
// Fix any OpPhi operators with the labels for the blocks that were split.
1734
for (uint32_t wordIndex : opPhis) {
1735
uint32_t wordCount = (inlinedSpirvWords[wordIndex] >> 16U) & 0xFFFFU;
1736
for (uint32_t j = 3; j < wordCount; j += 2) {
1737
uint32_t &labelId = inlinedSpirvWords[wordIndex + j + 1];
1738
while ((phiMap.size() > labelId) && (phiMap[labelId] != UINT32_MAX)) {
1739
labelId = phiMap[labelId];
1740
}
1741
}
1742
}
1743
1744
return true;
1745
}
1746
1747
bool Shader::parseData(const void *pData, size_t pSize) {
1748
assert(pData != nullptr);
1749
assert(pSize > 0);
1750
1751
const uint32_t *dataWords = reinterpret_cast<const uint32_t *>(pData);
1752
const size_t dataWordCount = pSize / sizeof(uint32_t);
1753
const uint32_t idBound = dataWords[3];
1754
instructions.reserve(idBound);
1755
results.resize(idBound, Result());
1756
results.shrink_to_fit();
1757
1758
// Parse all instructions.
1759
uint32_t blockIndex = UINT32_MAX;
1760
uint32_t functionInstructionIndex = UINT32_MAX;
1761
uint32_t functionLabelIndex = UINT32_MAX;
1762
uint32_t blockInstructionIndex = UINT32_MAX;
1763
uint32_t wordIndex = SpvStartWordIndex;
1764
while (wordIndex < dataWordCount) {
1765
SpvOp opCode = SpvOp(dataWords[wordIndex] & 0xFFFFU);
1766
uint32_t wordCount = (dataWords[wordIndex] >> 16U) & 0xFFFFU;
1767
if (wordCount == 0) {
1768
fprintf(stderr, "SPIR-V Parsing error. Invalid instruction word count at word %d.\n", wordIndex);
1769
return false;
1770
}
1771
1772
bool hasResult, hasType;
1773
SpvHasResultAndType(opCode, &hasResult, &hasType);
1774
1775
uint32_t instructionIndex = uint32_t(instructions.size());
1776
if (hasResult) {
1777
uint32_t resultId = dataWords[wordIndex + (hasType ? 2 : 1)];
1778
if (resultId >= idBound) {
1779
fprintf(stderr, "SPIR-V Parsing error. Invalid Result ID: %u.\n", resultId);
1780
return false;
1781
}
1782
1783
results[resultId].instructionIndex = instructionIndex;
1784
}
1785
1786
// Handle specific instructions.
1787
switch (opCode) {
1788
case SpvOpFunction:
1789
functionInstructionIndex = instructionIndex;
1790
break;
1791
case SpvOpFunctionEnd:
1792
functions.emplace_back(functionInstructionIndex, functionLabelIndex);
1793
functionInstructionIndex = functionLabelIndex = UINT32_MAX;
1794
break;
1795
case SpvOpDecorate:
1796
case SpvOpMemberDecorate:
1797
decorations.emplace_back(instructionIndex);
1798
break;
1799
case SpvOpPhi:
1800
phis.emplace_back(instructionIndex);
1801
break;
1802
case SpvOpLoopMerge:
1803
loopHeaders.emplace_back(instructionIndex, blockInstructionIndex);
1804
break;
1805
case SpvOpLabel:
1806
blockIndex = uint32_t(blocks.size());
1807
blockInstructionIndex = instructionIndex;
1808
1809
if (functionLabelIndex == UINT32_MAX) {
1810
functionLabelIndex = blockInstructionIndex;
1811
}
1812
1813
break;
1814
default:
1815
break;
1816
}
1817
1818
instructions.emplace_back(wordIndex, blockIndex);
1819
1820
if (SpvOpIsTerminator(opCode)) {
1821
blocks.emplace_back(blockInstructionIndex, instructionIndex);
1822
blockIndex = UINT32_MAX;
1823
blockInstructionIndex = UINT32_MAX;
1824
}
1825
1826
wordIndex += wordCount;
1827
}
1828
1829
// Initialize all adjacent indices for the lists.
1830
instructionAdjacentListIndices.resize(instructions.size(), UINT32_MAX);
1831
1832
return true;
1833
}
1834
1835
bool Shader::process(const void *pData, size_t pSize) {
1836
// Greatly decreases the costs of adding nodes to the linked list.
1837
listNodes.reserve(instructions.size() * 2);
1838
1839
thread_local std::vector<uint32_t> loopMergeBlockStack;
1840
thread_local std::vector<uint32_t> loopMergeInstructionStack;
1841
thread_local std::vector<bool> preOrderVisitedBlocks;
1842
thread_local std::vector<bool> postOrderVisitedBlocks;
1843
loopMergeBlockStack.clear();
1844
loopMergeInstructionStack.clear();
1845
preOrderVisitedBlocks.clear();
1846
postOrderVisitedBlocks.clear();
1847
1848
bool foundOpSwitch = false;
1849
const uint32_t *dataWords = reinterpret_cast<const uint32_t *>(pData);
1850
const size_t dataWordCount = pSize / sizeof(uint32_t);
1851
uint32_t currentBlockId = 0;
1852
uint32_t currentLoopHeaderIndex = 0;
1853
for (uint32_t i = 0; i < uint32_t(instructions.size()); i++) {
1854
uint32_t wordIndex = instructions[i].wordIndex;
1855
SpvOp opCode = SpvOp(dataWords[wordIndex] & 0xFFFFU);
1856
uint32_t wordCount = (dataWords[wordIndex] >> 16U) & 0xFFFFU;
1857
if (!SpvIsSupported(opCode)) {
1858
fprintf(stderr, "%s is not supported yet.\n", SpvOpToString(opCode));
1859
return false;
1860
}
1861
1862
bool hasResult, hasType;
1863
SpvHasResultAndType(opCode, &hasResult, &hasType);
1864
1865
if (hasType) {
1866
uint32_t typeId = dataWords[wordIndex + 1];
1867
if (typeId >= results.size()) {
1868
fprintf(stderr, "SPIR-V Parsing error. Invalid Type ID: %u.\n", typeId);
1869
return false;
1870
}
1871
1872
if (results[typeId].instructionIndex == UINT32_MAX) {
1873
fprintf(stderr, "SPIR-V Parsing error. Result %u is not valid.\n", typeId);
1874
return false;
1875
}
1876
1877
uint32_t typeInstructionIndex = results[typeId].instructionIndex;
1878
instructionAdjacentListIndices[typeInstructionIndex] = addToList(i, instructionAdjacentListIndices[typeInstructionIndex], listNodes);
1879
1880
// Check if it's an OpConstant of Int type so it can be reused on switches.
1881
if ((opCode == SpvOpConstant) && (defaultSwitchOpConstantInt == UINT32_MAX)) {
1882
uint32_t typeWordIndex = instructions[typeInstructionIndex].wordIndex;
1883
SpvOp typeOpCode = SpvOp(dataWords[typeWordIndex] & 0xFFFFU);
1884
if (typeOpCode == SpvOpTypeInt) {
1885
defaultSwitchOpConstantInt = dataWords[wordIndex + 2];
1886
}
1887
}
1888
}
1889
1890
// Every operand should be adjacent to this instruction.
1891
uint32_t operandWordStart, operandWordCount, operandWordStride, operandWordSkip;
1892
bool operandWordSkipString;
1893
if (SpvHasOperands(opCode, operandWordStart, operandWordCount, operandWordStride, operandWordSkip, operandWordSkipString, false)) {
1894
uint32_t operandWordIndex = operandWordStart;
1895
for (uint32_t j = 0; j < operandWordCount; j++) {
1896
if (checkOperandWordSkip(wordIndex, dataWords, j, operandWordSkip, operandWordSkipString, operandWordIndex)) {
1897
continue;
1898
}
1899
1900
if (operandWordIndex >= wordCount) {
1901
break;
1902
}
1903
1904
uint32_t operandId = dataWords[wordIndex + operandWordIndex];
1905
if (operandId >= results.size()) {
1906
fprintf(stderr, "SPIR-V Parsing error. Invalid Operand ID: %u.\n", operandId);
1907
return false;
1908
}
1909
1910
if (results[operandId].instructionIndex == UINT32_MAX) {
1911
fprintf(stderr, "SPIR-V Parsing error. Result %u is not valid.\n", operandId);
1912
return false;
1913
}
1914
1915
uint32_t resultIndex = results[operandId].instructionIndex;
1916
instructionAdjacentListIndices[resultIndex] = addToList(i, instructionAdjacentListIndices[resultIndex], listNodes);
1917
operandWordIndex += operandWordStride;
1918
}
1919
}
1920
else {
1921
fprintf(stderr, "SPIR-V Parsing error. Operands for %s are not implemented yet.\n", SpvOpToString(opCode));
1922
return false;
1923
}
1924
1925
// This instruction should be adjacent to every label referenced.
1926
uint32_t labelWordStart, labelWordCount, labelWordStride;
1927
if (SpvHasLabels(opCode, labelWordStart, labelWordCount, labelWordStride, false)) {
1928
for (uint32_t j = 0; (j < labelWordCount) && ((labelWordStart + j * labelWordStride) < wordCount); j++) {
1929
uint32_t labelId = dataWords[wordIndex + labelWordStart + j * labelWordStride];
1930
if (labelId >= results.size()) {
1931
fprintf(stderr, "SPIR-V Parsing error. Invalid Operand ID: %u.\n", labelId);
1932
return false;
1933
}
1934
1935
if (results[labelId].instructionIndex == UINT32_MAX) {
1936
fprintf(stderr, "SPIR-V Parsing error. Invalid Operand ID: %u.\n", labelId);
1937
return false;
1938
}
1939
1940
// Make sure this label not pointing back to the loop header while on a loop merge.
1941
if (!loopMergeBlockStack.empty() && (labelId == loopMergeBlockStack.back())) {
1942
continue;
1943
}
1944
1945
uint32_t labelIndex = results[labelId].instructionIndex;
1946
instructionAdjacentListIndices[i] = addToList(labelIndex, instructionAdjacentListIndices[i], listNodes);
1947
}
1948
}
1949
1950
// Parse parented blocks of OpPhi to indicate the dependency.
1951
if (opCode == SpvOpPhi) {
1952
uint32_t continueLabelId = UINT32_MAX;
1953
if (!loopMergeInstructionStack.empty()) {
1954
uint32_t loopMergeWordIndex = instructions[loopMergeInstructionStack.back()].wordIndex;
1955
continueLabelId = dataWords[loopMergeWordIndex + 2];
1956
}
1957
1958
for (uint32_t j = 3; j < wordCount; j += 2) {
1959
uint32_t labelId = dataWords[wordIndex + j + 1];
1960
if (labelId >= results.size()) {
1961
fprintf(stderr, "SPIR-V Parsing error. Invalid Parent ID: %u.\n", labelId);
1962
return false;
1963
}
1964
1965
if (results[labelId].instructionIndex == UINT32_MAX) {
1966
fprintf(stderr, "SPIR-V Parsing error. Invalid Parent ID: %u.\n", labelId);
1967
return false;
1968
}
1969
1970
// Make sure this label doesn't come from the loop continue.
1971
if (labelId == continueLabelId) {
1972
continue;
1973
}
1974
1975
uint32_t operandId = dataWords[wordIndex + j];
1976
if (operandId >= results.size()) {
1977
fprintf(stderr, "SPIR-V Parsing error. Invalid Operand ID: %u.\n", operandId);
1978
return false;
1979
}
1980
1981
if (results[operandId].instructionIndex == UINT32_MAX) {
1982
fprintf(stderr, "SPIR-V Parsing error. Result %u is not valid.\n", operandId);
1983
return false;
1984
}
1985
1986
uint32_t labelIndex = results[labelId].instructionIndex;
1987
uint32_t resultIndex = results[operandId].instructionIndex;
1988
instructionAdjacentListIndices[labelIndex] = addToList(i, instructionAdjacentListIndices[labelIndex], listNodes);
1989
instructionAdjacentListIndices[resultIndex] = addToList(i, instructionAdjacentListIndices[resultIndex], listNodes);
1990
}
1991
}
1992
// Parse decorations.
1993
else if (opCode == SpvOpDecorate) {
1994
uint32_t decoration = dataWords[wordIndex + 2];
1995
if (decoration == SpvDecorationSpecId) {
1996
uint32_t resultId = dataWords[wordIndex + 1];
1997
uint32_t constantId = dataWords[wordIndex + 3];
1998
if (resultId >= results.size()) {
1999
fprintf(stderr, "SPIR-V Parsing error. Invalid Operand ID: %u.\n", resultId);
2000
return false;
2001
}
2002
2003
uint32_t resultInstructionIndex = results[resultId].instructionIndex;
2004
if (resultInstructionIndex == UINT32_MAX) {
2005
fprintf(stderr, "SPIR-V Parsing error. Invalid Operand ID: %u.\n", resultId);
2006
return false;
2007
}
2008
2009
specializations.resize(std::max(specializations.size(), size_t(constantId + 1)));
2010
specializations[constantId].constantInstructionIndex = resultInstructionIndex;
2011
specializations[constantId].decorationInstructionIndex = i;
2012
}
2013
}
2014
// Check if a switch is used in the shader.
2015
else if (opCode == SpvOpSwitch) {
2016
foundOpSwitch = true;
2017
}
2018
// If a loop merge stack is active, pop it if it corresponds to the merge block.
2019
else if (opCode == SpvOpLabel) {
2020
currentBlockId = dataWords[wordIndex + 1];
2021
2022
if ((currentLoopHeaderIndex < loopHeaders.size()) && (i == loopHeaders[currentLoopHeaderIndex].blockInstructionIndex)) {
2023
loopMergeBlockStack.emplace_back(currentBlockId);
2024
loopMergeInstructionStack.emplace_back(loopHeaders[currentLoopHeaderIndex].instructionIndex);
2025
currentLoopHeaderIndex++;
2026
}
2027
2028
if (!loopMergeBlockStack.empty() && !loopMergeInstructionStack.empty()) {
2029
uint32_t loopMergeWordIndex = instructions[loopMergeInstructionStack.back()].wordIndex;
2030
uint32_t mergeBlockId = dataWords[loopMergeWordIndex + 1];
2031
if (currentBlockId == mergeBlockId) {
2032
loopMergeBlockStack.pop_back();
2033
loopMergeInstructionStack.pop_back();
2034
}
2035
}
2036
}
2037
}
2038
2039
// Do a pre-order and post-order traversal of the tree starting from each function. These indices are
2040
// later used to figure out whether instructions dominate other instructions when doing optimizations.
2041
thread_local std::vector<uint32_t> blockIndexStack;
2042
thread_local std::vector<uint32_t> blockAdjacentStack;
2043
uint32_t preOrderIndex = 0;
2044
uint32_t postOrderIndex = 0;
2045
blockPreOrderIndices.resize(blocks.size(), 0);
2046
blockPostOrderIndices.resize(blocks.size(), 0);
2047
preOrderVisitedBlocks.resize(blocks.size(), false);
2048
postOrderVisitedBlocks.resize(blocks.size(), false);
2049
for (uint32_t i = 0; i < uint32_t(functions.size()); i++) {
2050
const Function &function = functions[i];
2051
const Instruction &functionLabelInstruction = instructions[function.labelInstructionIndex];
2052
blockIndexStack.clear();
2053
blockAdjacentStack.clear();
2054
blockIndexStack.emplace_back(functionLabelInstruction.blockIndex);
2055
blockAdjacentStack.emplace_back(UINT32_MAX);
2056
while (!blockIndexStack.empty()) {
2057
uint32_t blockIndex = blockIndexStack.back();
2058
uint32_t blockAdjacentIndex = blockAdjacentStack.back();
2059
blockIndexStack.pop_back();
2060
blockAdjacentStack.pop_back();
2061
2062
uint32_t terminatorInstructorIndex = blocks[blockIndex].terminatorInstructionIndex;
2063
if (!preOrderVisitedBlocks[blockIndex]) {
2064
blockPreOrderIndices[blockIndex] = preOrderIndex++;
2065
blockAdjacentIndex = instructionAdjacentListIndices[terminatorInstructorIndex];
2066
preOrderVisitedBlocks[blockIndex] = true;
2067
}
2068
2069
if ((blockAdjacentIndex == UINT32_MAX) && !postOrderVisitedBlocks[blockIndex]) {
2070
blockPostOrderIndices[blockIndex] = postOrderIndex++;
2071
postOrderVisitedBlocks[blockIndex] = true;
2072
}
2073
2074
while (blockAdjacentIndex != UINT32_MAX) {
2075
const ListNode &adjacentListNode = listNodes[blockAdjacentIndex];
2076
const Instruction &adjacentInstruction = instructions[adjacentListNode.instructionIndex];
2077
SpvOp adjacentOpCode = SpvOp(dataWords[adjacentInstruction.wordIndex] & 0xFFFFU);
2078
if (adjacentOpCode == SpvOpLabel) {
2079
blockIndexStack.emplace_back(blockIndex);
2080
blockAdjacentStack.emplace_back(adjacentListNode.nextListIndex);
2081
blockIndexStack.emplace_back(adjacentInstruction.blockIndex);
2082
blockAdjacentStack.emplace_back(UINT32_MAX);
2083
blockAdjacentIndex = UINT32_MAX;
2084
}
2085
else {
2086
blockAdjacentIndex = adjacentListNode.nextListIndex;
2087
}
2088
}
2089
}
2090
}
2091
2092
if (foundOpSwitch && (defaultSwitchOpConstantInt == UINT32_MAX)) {
2093
fprintf(stderr, "Unable to find an OpConstantInt to use as replacement for switches. Adding this instruction automatically is not supported yet.\n");
2094
return false;
2095
}
2096
2097
return true;
2098
}
2099
2100
struct InstructionSort {
2101
union {
2102
struct {
2103
uint64_t instructionIndex : 32;
2104
uint64_t instructionLevel : 32;
2105
};
2106
2107
uint64_t instructionValue = 0;
2108
};
2109
2110
InstructionSort() {
2111
// Empty.
2112
}
2113
2114
bool operator<(const InstructionSort &i) const {
2115
return instructionValue < i.instructionValue;
2116
}
2117
};
2118
2119
bool Shader::sort(const void *pData, size_t pSize) {
2120
const uint32_t *dataWords = reinterpret_cast<const uint32_t *>(pData);
2121
const size_t dataWordCount = pSize / sizeof(uint32_t);
2122
2123
// Count the in and out degrees for all instructions.
2124
instructionInDegrees.clear();
2125
instructionOutDegrees.clear();
2126
instructionInDegrees.resize(instructions.size(), 0);
2127
instructionOutDegrees.resize(instructions.size(), 0);
2128
for (uint32_t i = 0; i < uint32_t(instructions.size()); i++) {
2129
uint32_t listIndex = instructionAdjacentListIndices[i];
2130
while (listIndex != UINT32_MAX) {
2131
const ListNode &listNode = listNodes[listIndex];
2132
instructionInDegrees[listNode.instructionIndex]++;
2133
instructionOutDegrees[i]++;
2134
listIndex = listNode.nextListIndex;
2135
}
2136
}
2137
2138
// Sort degrees doesn't need to be cleared as its contents will be copied over.
2139
thread_local std::vector<uint32_t> sortDegrees;
2140
thread_local std::vector<uint32_t> instructionStack;
2141
thread_local std::vector<InstructionSort> instructionSortVector;
2142
instructionStack.clear();
2143
instructionSortVector.clear();
2144
2145
// Make a copy of the degrees as they'll be used to perform a topological sort.
2146
sortDegrees.resize(instructionInDegrees.size());
2147
memcpy(sortDegrees.data(), instructionInDegrees.data(), sizeof(uint32_t) * sortDegrees.size());
2148
2149
// The first nodes to be processed should be the ones with no incoming connections.
2150
for (uint32_t i = 0; i < uint32_t(instructions.size()); i++) {
2151
if (sortDegrees[i] == 0) {
2152
instructionStack.emplace_back(i);
2153
}
2154
}
2155
2156
instructionOrder.reserve(instructions.size());
2157
instructionOrder.clear();
2158
while (!instructionStack.empty()) {
2159
uint32_t i = instructionStack.back();
2160
instructionStack.pop_back();
2161
instructionOrder.emplace_back(i);
2162
2163
// Look for the adjacents and reduce their degree. Push it to the stack if their degree reaches zero.
2164
uint32_t listIndex = instructionAdjacentListIndices[i];
2165
while (listIndex != UINT32_MAX) {
2166
const ListNode &listNode = listNodes[listIndex];
2167
uint32_t &sortDegree = sortDegrees[listNode.instructionIndex];
2168
assert(sortDegree > 0);
2169
sortDegree--;
2170
if (sortDegree == 0) {
2171
instructionStack.emplace_back(listNode.instructionIndex);
2172
}
2173
2174
listIndex = listNode.nextListIndex;
2175
}
2176
}
2177
2178
if (instructionOrder.size() < instructions.size()) {
2179
fprintf(stderr, "Sorting shader failed. Not all instructions could be reached.\n");
2180
#if RESPV_VERBOSE_ERRORS
2181
for (uint32_t i = 0; i < uint32_t(instructions.size()); i++) {
2182
if (sortDegrees[i] != 0) {
2183
fprintf(stderr, "[%d] Remaining Degrees %d\n", i, sortDegrees[i]);
2184
}
2185
}
2186
#endif
2187
return false;
2188
}
2189
2190
instructionSortVector.resize(instructionOrder.size(), InstructionSort());
2191
for (uint32_t instructionIndex : instructionOrder) {
2192
uint64_t nextLevel = instructionSortVector[instructionIndex].instructionLevel + 1;
2193
uint32_t listIndex = instructionAdjacentListIndices[instructionIndex];
2194
while (listIndex != UINT32_MAX) {
2195
const ListNode &listNode = listNodes[listIndex];
2196
instructionSortVector[listNode.instructionIndex].instructionLevel = std::max(instructionSortVector[listNode.instructionIndex].instructionLevel, nextLevel);
2197
listIndex = listNode.nextListIndex;
2198
}
2199
2200
instructionSortVector[instructionIndex].instructionIndex = instructionIndex;
2201
}
2202
2203
std::sort(instructionSortVector.begin(), instructionSortVector.end());
2204
2205
// Rebuild the instruction order vector with the sorted indices. If any of the instructions are pointers, store
2206
// them in a separate vector that will be used for another optimization pass.
2207
instructionOrder.clear();
2208
variableOrder.clear();
2209
for (InstructionSort &instructionSort : instructionSortVector) {
2210
instructionOrder.emplace_back(uint32_t(instructionSort.instructionIndex));
2211
2212
uint32_t wordIndex = instructions[instructionSort.instructionIndex].wordIndex;
2213
SpvOp opCode = SpvOp(dataWords[wordIndex] & 0xFFFFU);
2214
if (opCode == SpvOpVariable) {
2215
variableOrder.emplace_back(uint32_t(instructionSort.instructionIndex));
2216
}
2217
}
2218
2219
return true;
2220
}
2221
2222
bool Shader::parse(const void *pData, size_t pSize, bool pInlineFunctions) {
2223
assert(pData != nullptr);
2224
assert((pSize % sizeof(uint32_t) == 0) && "Size of data must be aligned to the word size.");
2225
2226
clear();
2227
2228
if (!checkData(pData, pSize)) {
2229
return false;
2230
}
2231
2232
extSpirvWords = reinterpret_cast<const uint32_t *>(pData);
2233
extSpirvWordCount = pSize / sizeof(uint32_t);
2234
2235
if (pInlineFunctions && !inlineData(pData, pSize)) {
2236
clear();
2237
return false;
2238
}
2239
2240
const void *data = pInlineFunctions ? inlinedSpirvWords.data() : pData;
2241
const size_t size = pInlineFunctions ? (inlinedSpirvWords.size() * sizeof(uint32_t)) : pSize;
2242
if (!parseData(data, size)) {
2243
clear();
2244
return false;
2245
}
2246
2247
if (!process(data, size)) {
2248
clear();
2249
return false;
2250
}
2251
2252
if (!sort(data, size)) {
2253
clear();
2254
return false;
2255
}
2256
2257
return true;
2258
}
2259
2260
bool Shader::empty() const {
2261
return inlinedSpirvWords.empty() && ((extSpirvWords == nullptr) || (extSpirvWordCount == 0));
2262
}
2263
2264
// Optimizer
2265
2266
struct Resolution {
2267
enum Type {
2268
Unknown,
2269
Constant,
2270
Variable
2271
};
2272
2273
Type type = Type::Unknown;
2274
2275
struct {
2276
union {
2277
int32_t i32;
2278
uint32_t u32;
2279
};
2280
} value = {};
2281
2282
static Resolution fromBool(bool pValue) {
2283
Resolution r;
2284
r.type = Type::Constant;
2285
r.value.u32 = pValue ? 1 : 0;
2286
return r;
2287
}
2288
2289
static Resolution fromInt32(int32_t pValue) {
2290
Resolution r;
2291
r.type = Type::Constant;
2292
r.value.i32 = pValue;
2293
return r;
2294
}
2295
2296
static Resolution fromUint32(uint32_t pValue) {
2297
Resolution r;
2298
r.type = Type::Constant;
2299
r.value.u32 = pValue;
2300
return r;
2301
}
2302
};
2303
2304
struct OptimizerContext {
2305
const Shader &shader;
2306
std::vector<uint32_t> &instructionAdjacentListIndices;
2307
std::vector<uint32_t> &instructionInDegrees;
2308
std::vector<uint32_t> &instructionOutDegrees;
2309
std::vector<ListNode> &listNodes;
2310
std::vector<Resolution> &resolutions;
2311
std::vector<uint8_t> &optimizedData;
2312
Options options;
2313
2314
OptimizerContext() = delete;
2315
};
2316
2317
static void optimizerEliminateInstruction(uint32_t pInstructionIndex, OptimizerContext &rContext) {
2318
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
2319
uint32_t wordIndex = rContext.shader.instructions[pInstructionIndex].wordIndex;
2320
uint32_t wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2321
for (uint32_t j = 0; j < wordCount; j++) {
2322
optimizedWords[wordIndex + j] = UINT32_MAX;
2323
}
2324
}
2325
2326
static void optimizerReduceResultDegrees(OptimizerContext &rContext, std::vector<uint32_t> &rResultStack) {
2327
const uint32_t *optimizedWords = reinterpret_cast<const uint32_t *>(rContext.optimizedData.data());
2328
auto optimizerCheckOperands = [&](SpvOp opCode, uint32_t wordIndex, uint32_t wordCount) {
2329
uint32_t operandWordStart, operandWordCount, operandWordStride, operandWordSkip;
2330
bool operandWordSkipString;
2331
if (SpvHasOperands(opCode, operandWordStart, operandWordCount, operandWordStride, operandWordSkip, operandWordSkipString, true)) {
2332
uint32_t operandWordIndex = operandWordStart;
2333
for (uint32_t j = 0; j < operandWordCount; j++) {
2334
if (checkOperandWordSkip(wordIndex, optimizedWords, j, operandWordSkip, operandWordSkipString, operandWordIndex)) {
2335
continue;
2336
}
2337
2338
if (operandWordIndex >= wordCount) {
2339
break;
2340
}
2341
2342
uint32_t operandId = optimizedWords[wordIndex + operandWordIndex];
2343
rResultStack.emplace_back(operandId);
2344
operandWordIndex += operandWordStride;
2345
}
2346
}
2347
};
2348
2349
while (!rResultStack.empty()) {
2350
uint32_t resultId = rResultStack.back();
2351
rResultStack.pop_back();
2352
2353
uint32_t instructionIndex = rContext.shader.results[resultId].instructionIndex;
2354
uint32_t wordIndex = rContext.shader.instructions[instructionIndex].wordIndex;
2355
2356
// Instruction's been deleted.
2357
if (optimizedWords[wordIndex] == UINT32_MAX) {
2358
continue;
2359
}
2360
2361
// Consider it's possible for a result to have no outgoing connections on an unoptimized shader.
2362
if (rContext.instructionOutDegrees[instructionIndex] > 0) {
2363
rContext.instructionOutDegrees[instructionIndex]--;
2364
}
2365
2366
// When nothing uses the result from this instruction anymore, we can delete it. Push any operands it uses into the stack as well to reduce their out degrees.
2367
// Function calls are excluded from this as it's not easy to evaluate whether the function has side effects or not.
2368
SpvOp opCode = SpvOp(optimizedWords[wordIndex] & 0xFFFFU);
2369
if ((rContext.instructionOutDegrees[instructionIndex] == 0) && !SpvHasSideEffects(opCode)) {
2370
uint32_t wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2371
optimizerCheckOperands(opCode, wordIndex, wordCount);
2372
2373
// Function parameters are excluded from being deleted as they'd break the function type definitions.
2374
// For being able to delete them, the original function type would have to be modified and only as long as no other functions are reusing the same type definition.
2375
if (opCode != SpvOpFunctionParameter) {
2376
optimizerEliminateInstruction(instructionIndex, rContext);
2377
}
2378
2379
// When a function is deleted, we just delete any instructions we can find until finding the function end.
2380
if (opCode == SpvOpFunction) {
2381
bool foundFunctionEnd = false;
2382
uint32_t instructionCount = rContext.shader.instructions.size();
2383
for (uint32_t i = instructionIndex; (i < instructionCount) && !foundFunctionEnd; i++) {
2384
wordIndex = rContext.shader.instructions[i].wordIndex;
2385
if (optimizedWords[wordIndex] == UINT32_MAX) {
2386
continue;
2387
}
2388
2389
opCode = SpvOp(optimizedWords[wordIndex] & 0xFFFFU);
2390
wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2391
foundFunctionEnd = opCode == SpvOpFunctionEnd;
2392
2393
optimizerCheckOperands(opCode, wordIndex, wordCount);
2394
optimizerEliminateInstruction(i, rContext);
2395
}
2396
}
2397
}
2398
}
2399
}
2400
2401
static bool optimizerPrepareData(OptimizerContext &rContext) {
2402
OptimizerContext &c = rContext;
2403
c.resolutions.clear();
2404
c.resolutions.resize(c.shader.results.size(), Resolution());
2405
c.instructionAdjacentListIndices.resize(c.shader.instructionAdjacentListIndices.size());
2406
c.instructionInDegrees.resize(c.shader.instructionInDegrees.size());
2407
c.instructionOutDegrees.resize(c.shader.instructionOutDegrees.size());
2408
c.listNodes.resize(c.shader.listNodes.size());
2409
memcpy(c.instructionAdjacentListIndices.data(), c.shader.instructionAdjacentListIndices.data(), sizeof(uint32_t) * c.shader.instructionAdjacentListIndices.size());
2410
memcpy(c.instructionInDegrees.data(), c.shader.instructionInDegrees.data(), sizeof(uint32_t) * c.shader.instructionInDegrees.size());
2411
memcpy(c.instructionOutDegrees.data(), c.shader.instructionOutDegrees.data(), sizeof(uint32_t) * c.shader.instructionOutDegrees.size());
2412
memcpy(c.listNodes.data(), c.shader.listNodes.data(), sizeof(ListNode) * c.shader.listNodes.size());
2413
2414
if (c.shader.inlinedSpirvWords.empty()) {
2415
c.optimizedData.resize(c.shader.extSpirvWordCount * sizeof(uint32_t));
2416
memcpy(c.optimizedData.data(), c.shader.extSpirvWords, c.optimizedData.size());
2417
}
2418
else {
2419
c.optimizedData.resize(c.shader.inlinedSpirvWords.size() * sizeof(uint32_t));
2420
memcpy(c.optimizedData.data(), c.shader.inlinedSpirvWords.data(), c.optimizedData.size());
2421
}
2422
2423
return true;
2424
}
2425
2426
static bool optimizerPatchSpecializationConstants(const SpecConstant *pNewSpecConstants, uint32_t pNewSpecConstantCount, OptimizerContext &rContext) {
2427
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
2428
for (uint32_t i = 0; i < pNewSpecConstantCount; i++) {
2429
const SpecConstant &newSpecConstant = pNewSpecConstants[i];
2430
if (newSpecConstant.specId >= rContext.shader.specializations.size()) {
2431
continue;
2432
}
2433
2434
const Specialization &specialization = rContext.shader.specializations[newSpecConstant.specId];
2435
if (specialization.constantInstructionIndex == UINT32_MAX) {
2436
continue;
2437
}
2438
2439
uint32_t constantWordIndex = rContext.shader.instructions[specialization.constantInstructionIndex].wordIndex;
2440
SpvOp constantOpCode = SpvOp(optimizedWords[constantWordIndex] & 0xFFFFU);
2441
uint32_t constantWordCount = (optimizedWords[constantWordIndex] >> 16U) & 0xFFFFU;
2442
switch (constantOpCode) {
2443
case SpvOpSpecConstantTrue:
2444
case SpvOpSpecConstantFalse:
2445
optimizedWords[constantWordIndex] = (newSpecConstant.values[0] ? SpvOpConstantTrue : SpvOpConstantFalse) | (constantWordCount << 16U);
2446
break;
2447
case SpvOpSpecConstant:
2448
if (constantWordCount <= 3) {
2449
fprintf(stderr, "Optimization error. Specialization constant has less words than expected.\n");
2450
return false;
2451
}
2452
2453
if (newSpecConstant.values.size() != (constantWordCount - 3)) {
2454
fprintf(stderr, "Optimization error. Value count for specialization constant %u differs from the expected size.\n", newSpecConstant.specId);
2455
return false;
2456
}
2457
2458
optimizedWords[constantWordIndex] = SpvOpConstant | (constantWordCount << 16U);
2459
memcpy(&optimizedWords[constantWordIndex + 3], newSpecConstant.values.data(), sizeof(uint32_t) * (constantWordCount - 3));
2460
break;
2461
default:
2462
fprintf(stderr, "Optimization error. Can't patch opCode %u.\n", constantOpCode);
2463
return false;
2464
}
2465
2466
// Eliminate the decorator instruction as well.
2467
optimizerEliminateInstruction(specialization.decorationInstructionIndex, rContext);
2468
}
2469
2470
return true;
2471
}
2472
2473
static void optimizerEvaluateResult(uint32_t pResultId, OptimizerContext &rContext) {
2474
const uint32_t *optimizedWords = reinterpret_cast<const uint32_t *>(rContext.optimizedData.data());
2475
const Result &result = rContext.shader.results[pResultId];
2476
Resolution &resolution = rContext.resolutions[pResultId];
2477
uint32_t resultWordIndex = rContext.shader.instructions[result.instructionIndex].wordIndex;
2478
SpvOp opCode = SpvOp(optimizedWords[resultWordIndex] & 0xFFFFU);
2479
uint32_t wordCount = (optimizedWords[resultWordIndex] >> 16U) & 0xFFFFU;
2480
switch (opCode) {
2481
case SpvOpConstant: {
2482
// Parse the known type of constants. Any other types will be considered as variable.
2483
const Result &typeResult = rContext.shader.results[optimizedWords[resultWordIndex + 1]];
2484
uint32_t typeWordIndex = rContext.shader.instructions[typeResult.instructionIndex].wordIndex;
2485
SpvOp typeOpCode = SpvOp(optimizedWords[typeWordIndex] & 0xFFFFU);
2486
uint32_t typeWidthInBits = optimizedWords[typeWordIndex + 2];
2487
uint32_t typeSigned = optimizedWords[typeWordIndex + 3];
2488
if ((typeOpCode == SpvOpTypeInt) && (typeWidthInBits == 32)) {
2489
if (typeSigned) {
2490
resolution = Resolution::fromInt32(int32_t(optimizedWords[resultWordIndex + 3]));
2491
}
2492
else {
2493
resolution = Resolution::fromUint32(optimizedWords[resultWordIndex + 3]);
2494
}
2495
}
2496
else {
2497
resolution.type = Resolution::Type::Variable;
2498
}
2499
2500
break;
2501
}
2502
case SpvOpConstantTrue:
2503
resolution = Resolution::fromBool(true);
2504
break;
2505
case SpvOpConstantFalse:
2506
resolution = Resolution::fromBool(false);
2507
break;
2508
case SpvOpBitcast: {
2509
const Resolution &operandResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2510
resolution = Resolution::fromUint32(operandResolution.value.u32);
2511
break;
2512
}
2513
case SpvOpIAdd: {
2514
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2515
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2516
resolution = Resolution::fromUint32(firstResolution.value.u32 + secondResolution.value.u32);
2517
break;
2518
}
2519
case SpvOpISub: {
2520
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2521
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2522
resolution = Resolution::fromUint32(firstResolution.value.u32 - secondResolution.value.u32);
2523
break;
2524
}
2525
case SpvOpIMul: {
2526
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2527
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2528
resolution = Resolution::fromUint32(firstResolution.value.u32 * secondResolution.value.u32);
2529
break;
2530
}
2531
case SpvOpUDiv: {
2532
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2533
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2534
resolution = Resolution::fromUint32(firstResolution.value.u32 / secondResolution.value.u32);
2535
break;
2536
}
2537
case SpvOpSDiv: {
2538
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2539
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2540
resolution = Resolution::fromUint32(firstResolution.value.i32 / secondResolution.value.i32);
2541
break;
2542
}
2543
case SpvOpLogicalEqual: {
2544
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2545
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2546
resolution = Resolution::fromBool((firstResolution.value.u32 != 0) == (secondResolution.value.u32 != 0));
2547
break;
2548
}
2549
case SpvOpLogicalNotEqual: {
2550
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2551
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2552
resolution = Resolution::fromBool((firstResolution.value.u32 != 0) != (secondResolution.value.u32 != 0));
2553
break;
2554
}
2555
case SpvOpLogicalOr: {
2556
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2557
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2558
resolution = Resolution::fromBool((firstResolution.value.u32 != 0) || (secondResolution.value.u32 != 0));
2559
break;
2560
}
2561
case SpvOpLogicalAnd: {
2562
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2563
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2564
resolution = Resolution::fromBool((firstResolution.value.u32 != 0) && (secondResolution.value.u32 != 0));
2565
break;
2566
}
2567
case SpvOpLogicalNot: {
2568
const Resolution &operandResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2569
resolution = Resolution::fromBool(operandResolution.value.u32 == 0);
2570
break;
2571
}
2572
case SpvOpSelect: {
2573
const Resolution &conditionResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2574
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2575
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 5]];
2576
resolution = (conditionResolution.value.u32 != 0) ? firstResolution : secondResolution;
2577
break;
2578
}
2579
case SpvOpIEqual: {
2580
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2581
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2582
resolution = Resolution::fromBool(firstResolution.value.u32 == secondResolution.value.u32);
2583
break;
2584
}
2585
case SpvOpINotEqual: {
2586
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2587
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2588
resolution = Resolution::fromBool(firstResolution.value.u32 != secondResolution.value.u32);
2589
break;
2590
}
2591
case SpvOpUGreaterThan: {
2592
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2593
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2594
resolution = Resolution::fromBool(firstResolution.value.u32 > secondResolution.value.u32);
2595
break;
2596
}
2597
case SpvOpSGreaterThan: {
2598
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2599
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2600
resolution = Resolution::fromBool(firstResolution.value.i32 > secondResolution.value.i32);
2601
break;
2602
}
2603
case SpvOpUGreaterThanEqual: {
2604
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2605
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2606
resolution = Resolution::fromBool(firstResolution.value.u32 >= secondResolution.value.u32);
2607
break;
2608
}
2609
case SpvOpSGreaterThanEqual: {
2610
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2611
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2612
resolution = Resolution::fromBool(firstResolution.value.i32 >= secondResolution.value.i32);
2613
break;
2614
}
2615
case SpvOpULessThan: {
2616
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2617
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2618
resolution = Resolution::fromBool(firstResolution.value.u32 < secondResolution.value.u32);
2619
break;
2620
}
2621
case SpvOpSLessThan: {
2622
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2623
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2624
resolution = Resolution::fromBool(firstResolution.value.i32 < secondResolution.value.i32);
2625
break;
2626
}
2627
case SpvOpULessThanEqual: {
2628
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2629
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2630
resolution = Resolution::fromBool(firstResolution.value.u32 <= secondResolution.value.u32);
2631
break;
2632
}
2633
case SpvOpSLessThanEqual: {
2634
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2635
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2636
resolution = Resolution::fromBool(firstResolution.value.i32 <= secondResolution.value.i32);
2637
break;
2638
}
2639
case SpvOpShiftRightLogical: {
2640
const Resolution &baseResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2641
const Resolution &shiftResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2642
resolution = Resolution::fromUint32(baseResolution.value.u32 >> shiftResolution.value.u32);
2643
break;
2644
}
2645
case SpvOpShiftRightArithmetic: {
2646
const Resolution &baseResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2647
const Resolution &shiftResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2648
resolution = Resolution::fromInt32(baseResolution.value.i32 >> shiftResolution.value.i32);
2649
break;
2650
}
2651
case SpvOpShiftLeftLogical: {
2652
const Resolution &baseResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2653
const Resolution &shiftResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2654
resolution = Resolution::fromUint32(baseResolution.value.u32 << shiftResolution.value.u32);
2655
break;
2656
}
2657
case SpvOpBitwiseOr: {
2658
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2659
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2660
resolution = Resolution::fromUint32(firstResolution.value.u32 | secondResolution.value.u32);
2661
break;
2662
}
2663
case SpvOpBitwiseAnd: {
2664
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2665
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2666
resolution = Resolution::fromUint32(firstResolution.value.u32 & secondResolution.value.u32);
2667
break;
2668
}
2669
case SpvOpBitwiseXor: {
2670
const Resolution &firstResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2671
const Resolution &secondResolution = rContext.resolutions[optimizedWords[resultWordIndex + 4]];
2672
resolution = Resolution::fromUint32(firstResolution.value.u32 ^ secondResolution.value.u32);
2673
break;
2674
}
2675
case SpvOpNot: {
2676
const Resolution &operandResolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2677
resolution = Resolution::fromUint32(~operandResolution.value.u32);
2678
break;
2679
}
2680
case SpvOpPhi: {
2681
// Resolve as constant if Phi operator was compacted to only one option.
2682
if (wordCount == 5) {
2683
resolution = rContext.resolutions[optimizedWords[resultWordIndex + 3]];
2684
}
2685
else {
2686
resolution.type = Resolution::Type::Variable;
2687
}
2688
2689
break;
2690
}
2691
default:
2692
// It's not known how to evaluate the instruction, consider the result a variable.
2693
resolution.type = Resolution::Type::Variable;
2694
break;
2695
}
2696
}
2697
2698
static void optimizerReduceLabelDegree(uint32_t pFirstLabelId, OptimizerContext &rContext) {
2699
thread_local std::vector<uint32_t> labelStack;
2700
thread_local std::vector<uint32_t> resultStack;
2701
thread_local std::vector<uint32_t> degreeReductions;
2702
labelStack.emplace_back(pFirstLabelId);
2703
resultStack.clear();
2704
degreeReductions.clear();
2705
2706
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
2707
while (!labelStack.empty()) {
2708
uint32_t labelId = labelStack.back();
2709
labelStack.pop_back();
2710
2711
uint32_t instructionIndex = rContext.shader.results[labelId].instructionIndex;
2712
if (rContext.instructionInDegrees[instructionIndex] == 0) {
2713
continue;
2714
}
2715
2716
rContext.instructionInDegrees[instructionIndex]--;
2717
2718
// If a label's degree becomes 0, eliminate all the instructions of the block.
2719
// Eliminate as many instructions as possible until finding the terminator of the block.
2720
// When finding the terminator, look at the labels it has and push them to the stack to
2721
// reduce their degrees as well.
2722
if (rContext.instructionInDegrees[instructionIndex] == 0) {
2723
bool foundTerminator = false;
2724
uint32_t instructionCount = rContext.shader.instructions.size();
2725
for (uint32_t i = instructionIndex; (i < instructionCount) && !foundTerminator; i++) {
2726
uint32_t wordIndex = rContext.shader.instructions[i].wordIndex;
2727
if (optimizedWords[wordIndex] == UINT32_MAX) {
2728
continue;
2729
}
2730
2731
// If the instruction has labels it can reference, we push the labels to reduce their degrees as well.
2732
SpvOp opCode = SpvOp(optimizedWords[wordIndex] & 0xFFFFU);
2733
uint32_t wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2734
uint32_t labelWordStart, labelWordCount, labelWordStride;
2735
if (SpvHasLabels(opCode, labelWordStart, labelWordCount, labelWordStride, false)) {
2736
for (uint32_t j = 0; (j < labelWordCount) && ((labelWordStart + j * labelWordStride) < wordCount); j++) {
2737
uint32_t terminatorLabelId = optimizedWords[wordIndex + labelWordStart + j * labelWordStride];
2738
labelStack.emplace_back(terminatorLabelId);
2739
}
2740
}
2741
2742
// If the instruction has operands, decrease their degree.
2743
uint32_t operandWordStart, operandWordCount, operandWordStride, operandWordSkip;
2744
bool operandWordSkipString;
2745
if (SpvHasOperands(opCode, operandWordStart, operandWordCount, operandWordStride, operandWordSkip, operandWordSkipString, true)) {
2746
uint32_t operandWordIndex = operandWordStart;
2747
for (uint32_t j = 0; j < operandWordCount; j++) {
2748
if (checkOperandWordSkip(wordIndex, optimizedWords, j, operandWordSkip, operandWordSkipString, operandWordIndex)) {
2749
continue;
2750
}
2751
2752
if (operandWordIndex >= wordCount) {
2753
break;
2754
}
2755
2756
uint32_t operandId = optimizedWords[wordIndex + operandWordIndex];
2757
resultStack.emplace_back(operandId);
2758
operandWordIndex += operandWordStride;
2759
}
2760
}
2761
2762
foundTerminator = SpvOpIsTerminator(opCode);
2763
optimizerEliminateInstruction(i, rContext);
2764
}
2765
}
2766
}
2767
2768
optimizerReduceResultDegrees(rContext, resultStack);
2769
}
2770
2771
static void optimizerEvaluateTerminator(uint32_t pInstructionIndex, OptimizerContext &rContext) {
2772
// For each type of supported terminator, check if the operands can be resolved into constants.
2773
// If they can be resolved, eliminate any other branches that don't pass the condition.
2774
uint32_t wordIndex = rContext.shader.instructions[pInstructionIndex].wordIndex;
2775
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
2776
SpvOp opCode = SpvOp(optimizedWords[wordIndex] & 0xFFFFU);
2777
uint32_t wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2778
uint32_t defaultLabelId = UINT32_MAX;
2779
2780
// Both instructions share that the second word is the operator they must use to resolve the condition.
2781
// Operator can't be anything but a constant to be able to resolve a terminator.
2782
const uint32_t operatorId = optimizedWords[wordIndex + 1];
2783
const Resolution &operatorResolution = rContext.resolutions[operatorId];
2784
if (operatorResolution.type != Resolution::Type::Constant) {
2785
return;
2786
}
2787
2788
if (opCode == SpvOpBranchConditional) {
2789
// Branch conditional only needs to choose either label depending on whether the result is true or false.
2790
if (operatorResolution.value.u32) {
2791
defaultLabelId = optimizedWords[wordIndex + 2];
2792
optimizerReduceLabelDegree(optimizedWords[wordIndex + 3], rContext);
2793
}
2794
else {
2795
defaultLabelId = optimizedWords[wordIndex + 3];
2796
optimizerReduceLabelDegree(optimizedWords[wordIndex + 2], rContext);
2797
}
2798
2799
// If there's a selection merge before this branch, we place the unconditional branch in its place.
2800
const uint32_t mergeWordCount = 3;
2801
uint32_t mergeWordIndex = wordIndex - mergeWordCount;
2802
SpvOp mergeOpCode = SpvOp(optimizedWords[mergeWordIndex] & 0xFFFFU);
2803
2804
uint32_t patchWordIndex;
2805
if (mergeOpCode == SpvOpSelectionMerge) {
2806
optimizerReduceLabelDegree(optimizedWords[mergeWordIndex + 1], rContext);
2807
patchWordIndex = mergeWordIndex;
2808
}
2809
else {
2810
patchWordIndex = wordIndex;
2811
}
2812
2813
// Make the final label the new default case and reduce the word count.
2814
optimizedWords[patchWordIndex] = SpvOpBranch | (2U << 16U);
2815
optimizedWords[patchWordIndex + 1] = defaultLabelId;
2816
2817
// Eliminate any remaining words on the block.
2818
for (uint32_t i = patchWordIndex + 2; i < (wordIndex + wordCount); i++) {
2819
optimizedWords[i] = UINT32_MAX;
2820
}
2821
}
2822
else if (opCode == SpvOpSwitch) {
2823
// Switch must compare the integer result of the operator to all the possible labels.
2824
// If the label is not as possible result, then reduce its block's degree.
2825
for (uint32_t i = 3; i < wordCount; i += 2) {
2826
if (operatorResolution.value.u32 == optimizedWords[wordIndex + i]) {
2827
defaultLabelId = optimizedWords[wordIndex + i + 1];
2828
}
2829
else {
2830
optimizerReduceLabelDegree(optimizedWords[wordIndex + i + 1], rContext);
2831
}
2832
}
2833
2834
// If none are chosen, the default label is selected. Otherwise, reduce the block's degree
2835
// for the default label.
2836
if (defaultLabelId == UINT32_MAX) {
2837
defaultLabelId = optimizedWords[wordIndex + 2];
2838
}
2839
else {
2840
optimizerReduceLabelDegree(optimizedWords[wordIndex + 2], rContext);
2841
}
2842
2843
// Make the final label the new default case and reduce the word count.
2844
optimizedWords[wordIndex] = SpvOpSwitch | (3U << 16U);
2845
optimizedWords[wordIndex + 1] = rContext.shader.defaultSwitchOpConstantInt;
2846
optimizedWords[wordIndex + 2] = defaultLabelId;
2847
2848
// Increase the degree of the default constant that was chosen so it's not considered as dead code.
2849
uint32_t defaultConstantInstructionIndex = rContext.shader.results[rContext.shader.defaultSwitchOpConstantInt].instructionIndex;
2850
rContext.instructionOutDegrees[defaultConstantInstructionIndex]++;
2851
2852
// Eliminate any remaining words on the block.
2853
for (uint32_t i = wordIndex + 3; i < (wordIndex + wordCount); i++) {
2854
optimizedWords[i] = UINT32_MAX;
2855
}
2856
}
2857
2858
// The condition operator can be discarded.
2859
thread_local std::vector<uint32_t> resultStack;
2860
resultStack.clear();
2861
resultStack.emplace_back(operatorId);
2862
optimizerReduceResultDegrees(rContext, resultStack);
2863
}
2864
2865
static bool optimizerCompactPhi(uint32_t pInstructionIndex, OptimizerContext &rContext) {
2866
// Do a backwards search first to find out what label this instruction belongs to.
2867
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
2868
uint32_t searchInstructionIndex = pInstructionIndex;
2869
uint32_t instructionLabelId = UINT32_MAX;
2870
while (searchInstructionIndex > 0) {
2871
uint32_t searchWordIndex = rContext.shader.instructions[searchInstructionIndex].wordIndex;
2872
SpvOp searchOpCode = SpvOp(optimizedWords[searchWordIndex] & 0xFFFFU);
2873
if (searchOpCode == SpvOpLabel) {
2874
instructionLabelId = optimizedWords[searchWordIndex + 1];
2875
break;
2876
}
2877
2878
searchInstructionIndex--;
2879
}
2880
2881
if (instructionLabelId == UINT32_MAX) {
2882
fprintf(stderr, "Unable to find a label before OpPhi.\n");
2883
return false;
2884
}
2885
2886
thread_local std::vector<uint32_t> resultStack;
2887
resultStack.clear();
2888
2889
uint32_t wordIndex = rContext.shader.instructions[pInstructionIndex].wordIndex;
2890
uint32_t wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2891
uint32_t newWordCount = 3;
2892
uint32_t instructionCount = rContext.shader.instructions.size();
2893
for (uint32_t i = 3; i < wordCount; i += 2) {
2894
uint32_t labelId = optimizedWords[wordIndex + i + 1];
2895
uint32_t labelInstructionIndex = rContext.shader.results[labelId].instructionIndex;
2896
uint32_t labelWordIndex = rContext.shader.instructions[labelInstructionIndex].wordIndex;
2897
2898
// Label's been eliminated. Skip it.
2899
if (optimizedWords[labelWordIndex] == UINT32_MAX) {
2900
resultStack.emplace_back(optimizedWords[wordIndex + i]);
2901
continue;
2902
}
2903
2904
// While the label may not have been eliminated, verify its terminator is still pointing to this block.
2905
bool foundBranchToThisBlock = false;
2906
for (uint32_t j = labelInstructionIndex; j < instructionCount; j++) {
2907
uint32_t searchWordIndex = rContext.shader.instructions[j].wordIndex;
2908
SpvOp searchOpCode = SpvOp(optimizedWords[searchWordIndex] & 0xFFFFU);
2909
uint32_t searchWordCount = (optimizedWords[searchWordIndex] >> 16U) & 0xFFFFU;
2910
if (SpvOpIsTerminator(searchOpCode)) {
2911
uint32_t labelWordStart, labelWordCount, labelWordStride;
2912
if (SpvHasLabels(searchOpCode, labelWordStart, labelWordCount, labelWordStride, false)) {
2913
for (uint32_t j = 0; (j < labelWordCount) && ((labelWordStart + j * labelWordStride) < searchWordCount); j++) {
2914
uint32_t searchLabelId = optimizedWords[searchWordIndex + labelWordStart + j * labelWordStride];
2915
if (searchLabelId == instructionLabelId) {
2916
foundBranchToThisBlock = true;
2917
break;
2918
}
2919
}
2920
}
2921
2922
break;
2923
}
2924
}
2925
2926
// The preceding block did not have any reference to this block. Skip it.
2927
if (!foundBranchToThisBlock) {
2928
resultStack.emplace_back(optimizedWords[wordIndex + i]);
2929
continue;
2930
}
2931
2932
// Copy the words.
2933
optimizedWords[wordIndex + newWordCount + 0] = optimizedWords[wordIndex + i + 0];
2934
optimizedWords[wordIndex + newWordCount + 1] = optimizedWords[wordIndex + i + 1];
2935
newWordCount += 2;
2936
}
2937
2938
// Patch in the new word count.
2939
assert((optimizedWords[wordIndex] != UINT32_MAX) && "The instruction shouldn't be getting deleted from reducing the degree of the operands.");
2940
optimizedWords[wordIndex] = SpvOpPhi | (newWordCount << 16U);
2941
2942
// Delete any of the remaining words.
2943
for (uint32_t i = newWordCount; i < wordCount; i++) {
2944
optimizedWords[wordIndex + i] = UINT32_MAX;
2945
}
2946
2947
optimizerReduceResultDegrees(rContext, resultStack);
2948
2949
return true;
2950
}
2951
2952
static bool optimizerRunEvaluationPass(OptimizerContext &rContext) {
2953
if (!rContext.options.removeDeadCode) {
2954
return true;
2955
}
2956
2957
thread_local std::vector<uint32_t> resultStack;
2958
resultStack.clear();
2959
2960
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
2961
uint32_t orderCount = uint32_t(rContext.shader.instructionOrder.size());
2962
for (uint32_t i = 0; i < orderCount; i++) {
2963
uint32_t instructionIndex = rContext.shader.instructionOrder[i];
2964
uint32_t wordIndex = rContext.shader.instructions[instructionIndex].wordIndex;
2965
2966
// Instruction has been deleted.
2967
if (optimizedWords[wordIndex] == UINT32_MAX) {
2968
continue;
2969
}
2970
2971
SpvOp opCode = SpvOp(optimizedWords[wordIndex] & 0xFFFFU);
2972
uint32_t wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2973
uint32_t patchedWordCount = wordCount;
2974
bool hasResult, hasType;
2975
SpvHasResultAndType(opCode, &hasResult, &hasType);
2976
2977
if (hasResult) {
2978
const uint32_t resultId = optimizedWords[wordIndex + (hasType ? 2 : 1)];
2979
if ((opCode != SpvOpLabel) && (opCode != SpvOpFunctionCall) && (rContext.instructionOutDegrees[instructionIndex] == 0)) {
2980
resultStack.emplace_back(resultId);
2981
}
2982
else {
2983
if (opCode == SpvOpPhi) {
2984
if (optimizerCompactPhi(instructionIndex, rContext)) {
2985
patchedWordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
2986
}
2987
else {
2988
return false;
2989
}
2990
}
2991
2992
// Check if any of the operands isn't a constant.
2993
bool allOperandsAreConstant = true;
2994
uint32_t operandWordStart, operandWordCount, operandWordStride, operandWordSkip;
2995
bool operandWordSkipString;
2996
if (SpvHasOperands(opCode, operandWordStart, operandWordCount, operandWordStride, operandWordSkip, operandWordSkipString, true)) {
2997
uint32_t operandWordIndex = operandWordStart;
2998
for (uint32_t j = 0; j < operandWordCount; j++) {
2999
if (checkOperandWordSkip(wordIndex, optimizedWords, j, operandWordSkip, operandWordSkipString, operandWordIndex)) {
3000
continue;
3001
}
3002
3003
if (operandWordIndex >= patchedWordCount) {
3004
break;
3005
}
3006
3007
uint32_t operandId = optimizedWords[wordIndex + operandWordIndex];
3008
assert((operandId != UINT32_MAX) && "An operand that's been deleted shouldn't be getting evaluated.");
3009
3010
// It shouldn't be possible for an operand to not be solved, but OpPhi can do so because previous blocks might've been deleted.
3011
if ((opCode != SpvOpPhi) && (rContext.resolutions[operandId].type == Resolution::Type::Unknown)) {
3012
fprintf(stderr, "Error in resolution of the operations. Operand %u was not solved.\n", operandId);
3013
return false;
3014
}
3015
3016
if (rContext.resolutions[operandId].type == Resolution::Type::Variable) {
3017
allOperandsAreConstant = false;
3018
break;
3019
}
3020
3021
operandWordIndex += operandWordStride;
3022
}
3023
}
3024
3025
// The result can only be evaluated if all operands are constant.
3026
if (allOperandsAreConstant) {
3027
optimizerEvaluateResult(resultId, rContext);
3028
}
3029
else {
3030
rContext.resolutions[resultId].type = Resolution::Type::Variable;
3031
}
3032
}
3033
}
3034
else if ((opCode == SpvOpBranchConditional) || (opCode == SpvOpSwitch)) {
3035
optimizerEvaluateTerminator(instructionIndex, rContext);
3036
}
3037
}
3038
3039
optimizerReduceResultDegrees(rContext, resultStack);
3040
3041
return true;
3042
}
3043
3044
static bool optimizerDoesInstructionDominate(const Shader &pShader, const Instruction &pInstructionA, const Instruction &pInstructionB) {
3045
// If on the same block, the instruction will only dominate the other one if it precedes it.
3046
if (pInstructionA.blockIndex == pInstructionB.blockIndex) {
3047
return pInstructionA.wordIndex < pInstructionB.wordIndex;
3048
}
3049
// If the blocks are different, compare the indices of the pre-order and post-order traversal
3050
// to determine whether it dominates the other block.
3051
else {
3052
const uint32_t aPreIndex = pShader.blockPreOrderIndices[pInstructionA.blockIndex];
3053
const uint32_t bPreIndex = pShader.blockPreOrderIndices[pInstructionB.blockIndex];
3054
const uint32_t aPostIndex = pShader.blockPostOrderIndices[pInstructionA.blockIndex];
3055
const uint32_t bPostIndex = pShader.blockPostOrderIndices[pInstructionB.blockIndex];
3056
return (aPreIndex < bPreIndex) && (aPostIndex > bPostIndex);
3057
}
3058
}
3059
3060
static bool optimizerRemoveUnusedVariables(OptimizerContext &rContext) {
3061
if (!rContext.options.removeDeadCode) {
3062
return true;
3063
}
3064
3065
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
3066
int32_t orderCount = int32_t(rContext.shader.variableOrder.size());
3067
for (int32_t i = orderCount - 1; i >= 0; i--) {
3068
uint32_t instructionIndex = rContext.shader.variableOrder[i];
3069
const Instruction &instruction = rContext.shader.instructions[instructionIndex];
3070
uint32_t resultId = optimizedWords[instruction.wordIndex + 2];
3071
if (resultId == UINT32_MAX) {
3072
// This variable has already been deleted.
3073
continue;
3074
}
3075
3076
SpvStorageClass storageClass = SpvStorageClass(optimizedWords[instruction.wordIndex + 3]);
3077
if (storageClass != SpvStorageClassFunction) {
3078
// Only evaluate variables local to the function.
3079
continue;
3080
}
3081
3082
thread_local std::vector<uint32_t> resultStack;
3083
thread_local std::vector<uint32_t> accessStack;
3084
thread_local std::vector<uint32_t> storeInstructionIndices;
3085
thread_local std::vector<uint32_t> partialLoadInstructionIndices;
3086
thread_local std::vector<uint32_t> fullLoadInstructionIndices;
3087
bool storeIsFull = true;
3088
resultStack.clear();
3089
accessStack.clear();
3090
storeInstructionIndices.clear();
3091
partialLoadInstructionIndices.clear();
3092
fullLoadInstructionIndices.clear();
3093
accessStack.emplace_back(instructionIndex);
3094
while (!accessStack.empty()) {
3095
uint32_t accessInstructionIndex = accessStack.back();
3096
const Instruction &accessInstruction = rContext.shader.instructions[accessInstructionIndex];
3097
accessStack.pop_back();
3098
3099
if (rContext.instructionOutDegrees[accessInstructionIndex] > 0) {
3100
uint32_t listIndex = rContext.instructionAdjacentListIndices[accessInstructionIndex];
3101
while (listIndex != UINT32_MAX) {
3102
uint32_t adjacentInstructionIndex = rContext.listNodes[listIndex].instructionIndex;
3103
uint32_t adjacentWordIndex = rContext.shader.instructions[adjacentInstructionIndex].wordIndex;
3104
listIndex = rContext.listNodes[listIndex].nextListIndex;
3105
3106
// Only check the instruction if it hasn't been deleted yet.
3107
if (optimizedWords[adjacentWordIndex] != UINT32_MAX) {
3108
SpvOp opCode = SpvOp(optimizedWords[adjacentWordIndex] & 0xFFFFU);
3109
if (opCode == SpvOpAccessChain) {
3110
accessStack.emplace_back(adjacentInstructionIndex);
3111
}
3112
else if (opCode == SpvOpStore) {
3113
storeInstructionIndices.emplace_back(adjacentInstructionIndex);
3114
storeIsFull = storeIsFull && (optimizedWords[adjacentWordIndex + 1] == resultId);
3115
}
3116
else if (opCode == SpvOpLoad) {
3117
if (optimizedWords[adjacentWordIndex + 3] == resultId) {
3118
fullLoadInstructionIndices.emplace_back(adjacentInstructionIndex);
3119
}
3120
else {
3121
partialLoadInstructionIndices.emplace_back(adjacentInstructionIndex);
3122
}
3123
}
3124
else {
3125
// The whole search process is stopped if anything in the access chain is not recognized.
3126
accessStack.clear();
3127
storeInstructionIndices.clear();
3128
fullLoadInstructionIndices.clear();
3129
partialLoadInstructionIndices.clear();
3130
listIndex = UINT32_MAX;
3131
}
3132
}
3133
}
3134
}
3135
else {
3136
resultStack.emplace_back(resultId);
3137
}
3138
}
3139
3140
// Single store load elimination. Any variables that are only stored to once can eliminate any loads
3141
// and remap the results of the adjacent instructions. However, a strict requirement is that the block
3142
// that holds the store must dominate the block that holds the load as per SPIR-V rules.
3143
size_t fullLoadInstructionsEliminated = 0;
3144
if (!fullLoadInstructionIndices.empty() && (storeInstructionIndices.size() == 1) && storeIsFull) {
3145
uint32_t storeInstructionIndex = storeInstructionIndices.front();
3146
const Instruction &storeInstruction = rContext.shader.instructions[storeInstructionIndex];
3147
if (optimizedWords[storeInstruction.wordIndex] != UINT32_MAX) {
3148
uint32_t storeResultId = optimizedWords[storeInstruction.wordIndex + 2];
3149
uint32_t storeResultInstructionIndex = rContext.shader.results[storeResultId].instructionIndex;
3150
for (uint32_t loadInstructionIndex : fullLoadInstructionIndices) {
3151
const Instruction &loadInstruction = rContext.shader.instructions[loadInstructionIndex];
3152
uint32_t loadWordIndex = loadInstruction.wordIndex;
3153
if (optimizedWords[loadWordIndex] == UINT32_MAX) {
3154
// Instruction has been deleted already.
3155
continue;
3156
}
3157
3158
if (!optimizerDoesInstructionDominate(rContext.shader, storeInstruction, loadInstruction)) {
3159
// Store's block must dominate the load's block for the elimination to be possible.
3160
continue;
3161
}
3162
3163
uint32_t loadResultId = optimizedWords[loadWordIndex + 2];
3164
uint32_t listIndex = rContext.instructionAdjacentListIndices[loadInstructionIndex];
3165
while (listIndex != UINT32_MAX) {
3166
uint32_t adjacentInstructionIndex = rContext.listNodes[listIndex].instructionIndex;
3167
uint32_t adjacentWordIndex = rContext.shader.instructions[adjacentInstructionIndex].wordIndex;
3168
if (optimizedWords[adjacentWordIndex] != UINT32_MAX) {
3169
SpvOp adjacentOpCode = SpvOp(optimizedWords[adjacentWordIndex] & 0xFFFFU);
3170
uint32_t adjancentWordCount = (optimizedWords[adjacentWordIndex] >> 16U) & 0xFFFFU;
3171
uint32_t operandWordStart, operandWordCount, operandWordStride, operandWordSkip;
3172
bool operandWordSkipString;
3173
if (SpvHasOperands(adjacentOpCode, operandWordStart, operandWordCount, operandWordStride, operandWordSkip, operandWordSkipString, true)) {
3174
uint32_t operandWordIndex = operandWordStart;
3175
for (uint32_t j = 0; j < operandWordCount; j++) {
3176
if (checkOperandWordSkip(adjacentWordIndex, optimizedWords, j, operandWordSkip, operandWordSkipString, operandWordIndex)) {
3177
continue;
3178
}
3179
3180
if (operandWordIndex >= adjancentWordCount) {
3181
break;
3182
}
3183
3184
uint32_t shaderWordIndex = adjacentWordIndex + operandWordIndex;
3185
uint32_t &operandId = optimizedWords[shaderWordIndex];
3186
if (operandId == loadResultId) {
3187
operandId = storeResultId;
3188
resultStack.emplace_back(loadResultId);
3189
rContext.instructionAdjacentListIndices[storeResultInstructionIndex] = addToList(adjacentInstructionIndex, rContext.instructionAdjacentListIndices[storeResultInstructionIndex], rContext.listNodes);
3190
rContext.instructionOutDegrees[storeResultInstructionIndex]++;
3191
}
3192
3193
operandWordIndex += operandWordStride;
3194
}
3195
}
3196
}
3197
3198
listIndex = rContext.listNodes[listIndex].nextListIndex;
3199
}
3200
3201
fullLoadInstructionsEliminated++;
3202
}
3203
}
3204
}
3205
3206
if ((fullLoadInstructionIndices.size() == fullLoadInstructionsEliminated) && partialLoadInstructionIndices.empty()) {
3207
// Unused store elimination. Any variables which have no loads but have stores can be eliminated.
3208
for (uint32_t storeInstructionIndex : storeInstructionIndices) {
3209
uint32_t storeWordIndex = rContext.shader.instructions[storeInstructionIndex].wordIndex;
3210
if (optimizedWords[storeWordIndex] == UINT32_MAX) {
3211
// Instruction has been deleted already.
3212
continue;
3213
}
3214
3215
resultStack.emplace_back(optimizedWords[storeWordIndex + 1]);
3216
resultStack.emplace_back(optimizedWords[storeWordIndex + 2]);
3217
optimizerEliminateInstruction(storeInstructionIndex, rContext);
3218
}
3219
}
3220
3221
optimizerReduceResultDegrees(rContext, resultStack);
3222
}
3223
3224
return true;
3225
}
3226
3227
static bool optimizerRemoveUnusedDecorations(OptimizerContext &rContext) {
3228
if (!rContext.options.removeDeadCode) {
3229
return true;
3230
}
3231
3232
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
3233
for (Decoration decoration : rContext.shader.decorations) {
3234
uint32_t wordIndex = rContext.shader.instructions[decoration.instructionIndex].wordIndex;
3235
uint32_t resultId = optimizedWords[wordIndex + 1];
3236
if (resultId == UINT32_MAX) {
3237
// This decoration has already been deleted.
3238
continue;
3239
}
3240
3241
uint32_t resultInstructionIndex = rContext.shader.results[resultId].instructionIndex;
3242
uint32_t resultWordIndex = rContext.shader.instructions[resultInstructionIndex].wordIndex;
3243
3244
// The result has been deleted, so we delete the decoration as well.
3245
if (optimizedWords[resultWordIndex] == UINT32_MAX) {
3246
optimizerEliminateInstruction(decoration.instructionIndex, rContext);
3247
}
3248
}
3249
3250
return true;
3251
}
3252
3253
static bool optimizerCompactPhis(OptimizerContext &rContext) {
3254
if (!rContext.options.removeDeadCode) {
3255
return true;
3256
}
3257
3258
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
3259
for (Phi phi : rContext.shader.phis) {
3260
uint32_t wordIndex = rContext.shader.instructions[phi.instructionIndex].wordIndex;
3261
if (optimizedWords[wordIndex] == UINT32_MAX) {
3262
// This operation has already been deleted.
3263
continue;
3264
}
3265
3266
if (!optimizerCompactPhi(phi.instructionIndex, rContext)) {
3267
return false;
3268
}
3269
}
3270
3271
return true;
3272
}
3273
3274
static bool optimizerCompactData(OptimizerContext &rContext) {
3275
uint32_t *optimizedWords = reinterpret_cast<uint32_t *>(rContext.optimizedData.data());
3276
uint32_t optimizedWordCount = 0;
3277
uint32_t instructionCount = rContext.shader.instructions.size();
3278
3279
// Copy the header.
3280
const uint32_t startingWordIndex = 5;
3281
for (uint32_t i = 0; i < startingWordIndex; i++) {
3282
optimizedWords[optimizedWordCount++] = optimizedWords[i];
3283
}
3284
3285
// Write out all the words for all the instructions and skip any that were marked as deleted.
3286
for (uint32_t i = 0; i < instructionCount; i++) {
3287
uint32_t wordIndex = rContext.shader.instructions[i].wordIndex;
3288
3289
// Instruction has been deleted.
3290
if (optimizedWords[wordIndex] == UINT32_MAX) {
3291
continue;
3292
}
3293
3294
// Check if the instruction should be ignored.
3295
SpvOp opCode = SpvOp(optimizedWords[wordIndex] & 0xFFFFU);
3296
if (rContext.options.removeDeadCode && SpvIsIgnored(opCode)) {
3297
continue;
3298
}
3299
3300
// Copy all the words of the instruction.
3301
uint32_t wordCount = (optimizedWords[wordIndex] >> 16U) & 0xFFFFU;
3302
for (uint32_t j = 0; j < wordCount; j++) {
3303
optimizedWords[optimizedWordCount++] = optimizedWords[wordIndex + j];
3304
}
3305
}
3306
3307
rContext.optimizedData.resize(optimizedWordCount * sizeof(uint32_t));
3308
3309
return true;
3310
}
3311
3312
bool Optimizer::run(const Shader &pShader, const SpecConstant *pNewSpecConstants, uint32_t pNewSpecConstantCount, std::vector<uint8_t> &pOptimizedData, Options pOptions) {
3313
thread_local std::vector<uint32_t> instructionAdjacentListIndices;
3314
thread_local std::vector<uint32_t> instructionInDegrees;
3315
thread_local std::vector<uint32_t> instructionOutDegrees;
3316
thread_local std::vector<ListNode> listNodes;
3317
thread_local std::vector<Resolution> resolutions;
3318
OptimizerContext context = { pShader, instructionAdjacentListIndices, instructionInDegrees, instructionOutDegrees, listNodes, resolutions, pOptimizedData, pOptions };
3319
if (!optimizerPrepareData(context)) {
3320
return false;
3321
}
3322
3323
if (!optimizerPatchSpecializationConstants(pNewSpecConstants, pNewSpecConstantCount, context)) {
3324
return false;
3325
}
3326
3327
if (!optimizerRunEvaluationPass(context)) {
3328
return false;
3329
}
3330
3331
if (!optimizerRemoveUnusedVariables(context)) {
3332
return false;
3333
}
3334
3335
if (!optimizerRemoveUnusedDecorations(context)) {
3336
return false;
3337
}
3338
3339
// FIXME: For some reason, it seems that based on the order of the resolution, OpPhis can be compacted
3340
// before all their preceding blocks have been evaluated in time whether they should be deleted or not.
3341
// This pass merely re-runs the compaction step as a safeguard to remove any stale references. There's
3342
// potential for further optimization if this is fixed properly.
3343
if (!optimizerCompactPhis(context)) {
3344
return false;
3345
}
3346
3347
if (!optimizerCompactData(context)) {
3348
return false;
3349
}
3350
3351
return true;
3352
}
3353
}; //namespace respv
3354
3355