Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/glslang/SPIRV/SpvPostProcess.cpp
9902 views
1
//
2
// Copyright (C) 2018 Google, Inc.
3
//
4
// All rights reserved.
5
//
6
// Redistribution and use in source and binary forms, with or without
7
// modification, are permitted provided that the following conditions
8
// are met:
9
//
10
// Redistributions of source code must retain the above copyright
11
// notice, this list of conditions and the following disclaimer.
12
//
13
// Redistributions in binary form must reproduce the above
14
// copyright notice, this list of conditions and the following
15
// disclaimer in the documentation and/or other materials provided
16
// with the distribution.
17
//
18
// Neither the name of 3Dlabs Inc. Ltd. nor the names of its
19
// contributors may be used to endorse or promote products derived
20
// from this software without specific prior written permission.
21
//
22
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26
// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33
// POSSIBILITY OF SUCH DAMAGE.
34
35
//
36
// Post-processing for SPIR-V IR, in internal form, not standard binary form.
37
//
38
39
#include <cassert>
40
#include <cstdlib>
41
42
#include <unordered_map>
43
#include <unordered_set>
44
#include <algorithm>
45
46
#include "SpvBuilder.h"
47
#include "spirv.hpp"
48
49
namespace spv {
50
#include "GLSL.std.450.h"
51
#include "GLSL.ext.KHR.h"
52
#include "GLSL.ext.EXT.h"
53
#include "GLSL.ext.AMD.h"
54
#include "GLSL.ext.NV.h"
55
#include "GLSL.ext.ARM.h"
56
#include "GLSL.ext.QCOM.h"
57
}
58
59
namespace spv {
60
61
// Hook to visit each operand type and result type of an instruction.
62
// Will be called multiple times for one instruction, once for each typed
63
// operand and the result.
64
void Builder::postProcessType(const Instruction& inst, Id typeId)
65
{
66
// Characterize the type being questioned
67
Id basicTypeOp = getMostBasicTypeClass(typeId);
68
int width = 0;
69
if (basicTypeOp == OpTypeFloat || basicTypeOp == OpTypeInt)
70
width = getScalarTypeWidth(typeId);
71
72
// Do opcode-specific checks
73
switch (inst.getOpCode()) {
74
case OpLoad:
75
case OpStore:
76
if (basicTypeOp == OpTypeStruct) {
77
if (containsType(typeId, OpTypeInt, 8))
78
addCapability(CapabilityInt8);
79
if (containsType(typeId, OpTypeInt, 16))
80
addCapability(CapabilityInt16);
81
if (containsType(typeId, OpTypeFloat, 16))
82
addCapability(CapabilityFloat16);
83
} else {
84
StorageClass storageClass = getStorageClass(inst.getIdOperand(0));
85
if (width == 8) {
86
switch (storageClass) {
87
case StorageClassPhysicalStorageBufferEXT:
88
case StorageClassUniform:
89
case StorageClassStorageBuffer:
90
case StorageClassPushConstant:
91
break;
92
default:
93
addCapability(CapabilityInt8);
94
break;
95
}
96
} else if (width == 16) {
97
switch (storageClass) {
98
case StorageClassPhysicalStorageBufferEXT:
99
case StorageClassUniform:
100
case StorageClassStorageBuffer:
101
case StorageClassPushConstant:
102
case StorageClassInput:
103
case StorageClassOutput:
104
break;
105
default:
106
if (basicTypeOp == OpTypeInt)
107
addCapability(CapabilityInt16);
108
if (basicTypeOp == OpTypeFloat)
109
addCapability(CapabilityFloat16);
110
break;
111
}
112
}
113
}
114
break;
115
case OpCopyObject:
116
break;
117
case OpFConvert:
118
case OpSConvert:
119
case OpUConvert:
120
// Look for any 8/16-bit storage capabilities. If there are none, assume that
121
// the convert instruction requires the Float16/Int8/16 capability.
122
if (containsType(typeId, OpTypeFloat, 16) || containsType(typeId, OpTypeInt, 16)) {
123
bool foundStorage = false;
124
for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
125
spv::Capability cap = *it;
126
if (cap == spv::CapabilityStorageInputOutput16 ||
127
cap == spv::CapabilityStoragePushConstant16 ||
128
cap == spv::CapabilityStorageUniformBufferBlock16 ||
129
cap == spv::CapabilityStorageUniform16) {
130
foundStorage = true;
131
break;
132
}
133
}
134
if (!foundStorage) {
135
if (containsType(typeId, OpTypeFloat, 16))
136
addCapability(CapabilityFloat16);
137
if (containsType(typeId, OpTypeInt, 16))
138
addCapability(CapabilityInt16);
139
}
140
}
141
if (containsType(typeId, OpTypeInt, 8)) {
142
bool foundStorage = false;
143
for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
144
spv::Capability cap = *it;
145
if (cap == spv::CapabilityStoragePushConstant8 ||
146
cap == spv::CapabilityUniformAndStorageBuffer8BitAccess ||
147
cap == spv::CapabilityStorageBuffer8BitAccess) {
148
foundStorage = true;
149
break;
150
}
151
}
152
if (!foundStorage) {
153
addCapability(CapabilityInt8);
154
}
155
}
156
break;
157
case OpExtInst:
158
switch (inst.getImmediateOperand(1)) {
159
case GLSLstd450Frexp:
160
case GLSLstd450FrexpStruct:
161
if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, OpTypeInt, 16))
162
addExtension(spv::E_SPV_AMD_gpu_shader_int16);
163
break;
164
case GLSLstd450InterpolateAtCentroid:
165
case GLSLstd450InterpolateAtSample:
166
case GLSLstd450InterpolateAtOffset:
167
if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, OpTypeFloat, 16))
168
addExtension(spv::E_SPV_AMD_gpu_shader_half_float);
169
break;
170
default:
171
break;
172
}
173
break;
174
case OpAccessChain:
175
case OpPtrAccessChain:
176
if (isPointerType(typeId))
177
break;
178
if (basicTypeOp == OpTypeInt) {
179
if (width == 16)
180
addCapability(CapabilityInt16);
181
else if (width == 8)
182
addCapability(CapabilityInt8);
183
}
184
break;
185
default:
186
if (basicTypeOp == OpTypeInt) {
187
if (width == 16)
188
addCapability(CapabilityInt16);
189
else if (width == 8)
190
addCapability(CapabilityInt8);
191
else if (width == 64)
192
addCapability(CapabilityInt64);
193
} else if (basicTypeOp == OpTypeFloat) {
194
if (width == 16)
195
addCapability(CapabilityFloat16);
196
else if (width == 64)
197
addCapability(CapabilityFloat64);
198
}
199
break;
200
}
201
}
202
203
// Called for each instruction that resides in a block.
204
void Builder::postProcess(Instruction& inst)
205
{
206
// Add capabilities based simply on the opcode.
207
switch (inst.getOpCode()) {
208
case OpExtInst:
209
switch (inst.getImmediateOperand(1)) {
210
case GLSLstd450InterpolateAtCentroid:
211
case GLSLstd450InterpolateAtSample:
212
case GLSLstd450InterpolateAtOffset:
213
addCapability(CapabilityInterpolationFunction);
214
break;
215
default:
216
break;
217
}
218
break;
219
case OpDPdxFine:
220
case OpDPdyFine:
221
case OpFwidthFine:
222
case OpDPdxCoarse:
223
case OpDPdyCoarse:
224
case OpFwidthCoarse:
225
addCapability(CapabilityDerivativeControl);
226
break;
227
228
case OpImageQueryLod:
229
case OpImageQuerySize:
230
case OpImageQuerySizeLod:
231
case OpImageQuerySamples:
232
case OpImageQueryLevels:
233
addCapability(CapabilityImageQuery);
234
break;
235
236
case OpGroupNonUniformPartitionNV:
237
addExtension(E_SPV_NV_shader_subgroup_partitioned);
238
addCapability(CapabilityGroupNonUniformPartitionedNV);
239
break;
240
241
case OpLoad:
242
case OpStore:
243
{
244
// For any load/store to a PhysicalStorageBufferEXT, walk the accesschain
245
// index list to compute the misalignment. The pre-existing alignment value
246
// (set via Builder::AccessChain::alignment) only accounts for the base of
247
// the reference type and any scalar component selection in the accesschain,
248
// and this function computes the rest from the SPIR-V Offset decorations.
249
Instruction *accessChain = module.getInstruction(inst.getIdOperand(0));
250
if (accessChain->getOpCode() == OpAccessChain) {
251
Instruction *base = module.getInstruction(accessChain->getIdOperand(0));
252
// Get the type of the base of the access chain. It must be a pointer type.
253
Id typeId = base->getTypeId();
254
Instruction *type = module.getInstruction(typeId);
255
assert(type->getOpCode() == OpTypePointer);
256
if (type->getImmediateOperand(0) != StorageClassPhysicalStorageBufferEXT) {
257
break;
258
}
259
// Get the pointee type.
260
typeId = type->getIdOperand(1);
261
type = module.getInstruction(typeId);
262
// Walk the index list for the access chain. For each index, find any
263
// misalignment that can apply when accessing the member/element via
264
// Offset/ArrayStride/MatrixStride decorations, and bitwise OR them all
265
// together.
266
int alignment = 0;
267
for (int i = 1; i < accessChain->getNumOperands(); ++i) {
268
Instruction *idx = module.getInstruction(accessChain->getIdOperand(i));
269
if (type->getOpCode() == OpTypeStruct) {
270
assert(idx->getOpCode() == OpConstant);
271
unsigned int c = idx->getImmediateOperand(0);
272
273
const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
274
if (decoration.get()->getOpCode() == OpMemberDecorate &&
275
decoration.get()->getIdOperand(0) == typeId &&
276
decoration.get()->getImmediateOperand(1) == c &&
277
(decoration.get()->getImmediateOperand(2) == DecorationOffset ||
278
decoration.get()->getImmediateOperand(2) == DecorationMatrixStride)) {
279
alignment |= decoration.get()->getImmediateOperand(3);
280
}
281
};
282
std::for_each(decorations.begin(), decorations.end(), function);
283
// get the next member type
284
typeId = type->getIdOperand(c);
285
type = module.getInstruction(typeId);
286
} else if (type->getOpCode() == OpTypeArray ||
287
type->getOpCode() == OpTypeRuntimeArray) {
288
const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
289
if (decoration.get()->getOpCode() == OpDecorate &&
290
decoration.get()->getIdOperand(0) == typeId &&
291
decoration.get()->getImmediateOperand(1) == DecorationArrayStride) {
292
alignment |= decoration.get()->getImmediateOperand(2);
293
}
294
};
295
std::for_each(decorations.begin(), decorations.end(), function);
296
// Get the element type
297
typeId = type->getIdOperand(0);
298
type = module.getInstruction(typeId);
299
} else {
300
// Once we get to any non-aggregate type, we're done.
301
break;
302
}
303
}
304
assert(inst.getNumOperands() >= 3);
305
unsigned int memoryAccess = inst.getImmediateOperand((inst.getOpCode() == OpStore) ? 2 : 1);
306
assert(memoryAccess & MemoryAccessAlignedMask);
307
static_cast<void>(memoryAccess);
308
// Compute the index of the alignment operand.
309
int alignmentIdx = 2;
310
if (inst.getOpCode() == OpStore)
311
alignmentIdx++;
312
// Merge new and old (mis)alignment
313
alignment |= inst.getImmediateOperand(alignmentIdx);
314
// Pick the LSB
315
alignment = alignment & ~(alignment & (alignment-1));
316
// update the Aligned operand
317
inst.setImmediateOperand(alignmentIdx, alignment);
318
}
319
break;
320
}
321
322
default:
323
break;
324
}
325
326
// Checks based on type
327
if (inst.getTypeId() != NoType)
328
postProcessType(inst, inst.getTypeId());
329
for (int op = 0; op < inst.getNumOperands(); ++op) {
330
if (inst.isIdOperand(op)) {
331
// In blocks, these are always result ids, but we are relying on
332
// getTypeId() to return NoType for things like OpLabel.
333
if (getTypeId(inst.getIdOperand(op)) != NoType)
334
postProcessType(inst, getTypeId(inst.getIdOperand(op)));
335
}
336
}
337
}
338
339
// comment in header
340
void Builder::postProcessCFG()
341
{
342
// reachableBlocks is the set of blockss reached via control flow, or which are
343
// unreachable continue targert or unreachable merge.
344
std::unordered_set<const Block*> reachableBlocks;
345
std::unordered_map<Block*, Block*> headerForUnreachableContinue;
346
std::unordered_set<Block*> unreachableMerges;
347
std::unordered_set<Id> unreachableDefinitions;
348
// Collect IDs defined in unreachable blocks. For each function, label the
349
// reachable blocks first. Then for each unreachable block, collect the
350
// result IDs of the instructions in it.
351
for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {
352
Function* f = *fi;
353
Block* entry = f->getEntryBlock();
354
inReadableOrder(entry,
355
[&reachableBlocks, &unreachableMerges, &headerForUnreachableContinue]
356
(Block* b, ReachReason why, Block* header) {
357
reachableBlocks.insert(b);
358
if (why == ReachDeadContinue) headerForUnreachableContinue[b] = header;
359
if (why == ReachDeadMerge) unreachableMerges.insert(b);
360
});
361
for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {
362
Block* b = *bi;
363
if (unreachableMerges.count(b) != 0 || headerForUnreachableContinue.count(b) != 0) {
364
auto ii = b->getInstructions().cbegin();
365
++ii; // Keep potential decorations on the label.
366
for (; ii != b->getInstructions().cend(); ++ii)
367
unreachableDefinitions.insert(ii->get()->getResultId());
368
} else if (reachableBlocks.count(b) == 0) {
369
// The normal case for unreachable code. All definitions are considered dead.
370
for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ++ii)
371
unreachableDefinitions.insert(ii->get()->getResultId());
372
}
373
}
374
}
375
376
// Modify unreachable merge blocks and unreachable continue targets.
377
// Delete their contents.
378
for (auto mergeIter = unreachableMerges.begin(); mergeIter != unreachableMerges.end(); ++mergeIter) {
379
(*mergeIter)->rewriteAsCanonicalUnreachableMerge();
380
}
381
for (auto continueIter = headerForUnreachableContinue.begin();
382
continueIter != headerForUnreachableContinue.end();
383
++continueIter) {
384
Block* continue_target = continueIter->first;
385
Block* header = continueIter->second;
386
continue_target->rewriteAsCanonicalUnreachableContinue(header);
387
}
388
389
// Remove unneeded decorations, for unreachable instructions
390
decorations.erase(std::remove_if(decorations.begin(), decorations.end(),
391
[&unreachableDefinitions](std::unique_ptr<Instruction>& I) -> bool {
392
Id decoration_id = I.get()->getIdOperand(0);
393
return unreachableDefinitions.count(decoration_id) != 0;
394
}),
395
decorations.end());
396
}
397
398
// comment in header
399
void Builder::postProcessFeatures() {
400
// Add per-instruction capabilities, extensions, etc.,
401
402
// Look for any 8/16 bit type in physical storage buffer class, and set the
403
// appropriate capability. This happens in createSpvVariable for other storage
404
// classes, but there isn't always a variable for physical storage buffer.
405
for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) {
406
Instruction* type = groupedTypes[OpTypePointer][t];
407
if (type->getImmediateOperand(0) == (unsigned)StorageClassPhysicalStorageBufferEXT) {
408
if (containsType(type->getIdOperand(1), OpTypeInt, 8)) {
409
addIncorporatedExtension(spv::E_SPV_KHR_8bit_storage, spv::Spv_1_5);
410
addCapability(spv::CapabilityStorageBuffer8BitAccess);
411
}
412
if (containsType(type->getIdOperand(1), OpTypeInt, 16) ||
413
containsType(type->getIdOperand(1), OpTypeFloat, 16)) {
414
addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3);
415
addCapability(spv::CapabilityStorageBuffer16BitAccess);
416
}
417
}
418
}
419
420
// process all block-contained instructions
421
for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {
422
Function* f = *fi;
423
for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {
424
Block* b = *bi;
425
for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ii++)
426
postProcess(*ii->get());
427
428
// For all local variables that contain pointers to PhysicalStorageBufferEXT, check whether
429
// there is an existing restrict/aliased decoration. If we don't find one, add Aliased as the
430
// default.
431
for (auto vi = b->getLocalVariables().cbegin(); vi != b->getLocalVariables().cend(); vi++) {
432
const Instruction& inst = *vi->get();
433
Id resultId = inst.getResultId();
434
if (containsPhysicalStorageBufferOrArray(getDerefTypeId(resultId))) {
435
bool foundDecoration = false;
436
const auto function = [&](const std::unique_ptr<Instruction>& decoration) {
437
if (decoration.get()->getIdOperand(0) == resultId &&
438
decoration.get()->getOpCode() == OpDecorate &&
439
(decoration.get()->getImmediateOperand(1) == spv::DecorationAliasedPointerEXT ||
440
decoration.get()->getImmediateOperand(1) == spv::DecorationRestrictPointerEXT)) {
441
foundDecoration = true;
442
}
443
};
444
std::for_each(decorations.begin(), decorations.end(), function);
445
if (!foundDecoration) {
446
addDecoration(resultId, spv::DecorationAliasedPointerEXT);
447
}
448
}
449
}
450
}
451
}
452
453
// If any Vulkan memory model-specific functionality is used, update the
454
// OpMemoryModel to match.
455
if (capabilities.find(spv::CapabilityVulkanMemoryModelKHR) != capabilities.end()) {
456
memoryModel = spv::MemoryModelVulkanKHR;
457
addIncorporatedExtension(spv::E_SPV_KHR_vulkan_memory_model, spv::Spv_1_5);
458
}
459
460
// Add Aliased decoration if there's more than one Workgroup Block variable.
461
if (capabilities.find(spv::CapabilityWorkgroupMemoryExplicitLayoutKHR) != capabilities.end()) {
462
assert(entryPoints.size() == 1);
463
auto &ep = entryPoints[0];
464
465
std::vector<Id> workgroup_variables;
466
for (int i = 0; i < (int)ep->getNumOperands(); i++) {
467
if (!ep->isIdOperand(i))
468
continue;
469
470
const Id id = ep->getIdOperand(i);
471
const Instruction *instr = module.getInstruction(id);
472
if (instr->getOpCode() != spv::OpVariable)
473
continue;
474
475
if (instr->getImmediateOperand(0) == spv::StorageClassWorkgroup)
476
workgroup_variables.push_back(id);
477
}
478
479
if (workgroup_variables.size() > 1) {
480
for (size_t i = 0; i < workgroup_variables.size(); i++)
481
addDecoration(workgroup_variables[i], spv::DecorationAliased);
482
}
483
}
484
}
485
486
// comment in header
487
void Builder::postProcess(bool compileOnly)
488
{
489
// postProcessCFG needs an entrypoint to determine what is reachable, but if we are not creating an "executable" shader, we don't have an entrypoint
490
if (!compileOnly)
491
postProcessCFG();
492
493
postProcessFeatures();
494
}
495
496
}; // end spv namespace
497
498