Path: blob/master/thirdparty/glslang/SPIRV/SpvPostProcess.cpp
9902 views
//1// Copyright (C) 2018 Google, Inc.2//3// All rights reserved.4//5// Redistribution and use in source and binary forms, with or without6// modification, are permitted provided that the following conditions7// are met:8//9// Redistributions of source code must retain the above copyright10// notice, this list of conditions and the following disclaimer.11//12// Redistributions in binary form must reproduce the above13// copyright notice, this list of conditions and the following14// disclaimer in the documentation and/or other materials provided15// with the distribution.16//17// Neither the name of 3Dlabs Inc. Ltd. nor the names of its18// contributors may be used to endorse or promote products derived19// from this software without specific prior written permission.20//21// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS22// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT23// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS24// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE25// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,26// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,27// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;28// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER29// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT30// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN31// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE32// POSSIBILITY OF SUCH DAMAGE.3334//35// Post-processing for SPIR-V IR, in internal form, not standard binary form.36//3738#include <cassert>39#include <cstdlib>4041#include <unordered_map>42#include <unordered_set>43#include <algorithm>4445#include "SpvBuilder.h"46#include "spirv.hpp"4748namespace spv {49#include "GLSL.std.450.h"50#include "GLSL.ext.KHR.h"51#include "GLSL.ext.EXT.h"52#include "GLSL.ext.AMD.h"53#include "GLSL.ext.NV.h"54#include "GLSL.ext.ARM.h"55#include "GLSL.ext.QCOM.h"56}5758namespace spv {5960// Hook to visit each operand type and result type of an instruction.61// Will be called multiple times for one instruction, once for each typed62// operand and the result.63void Builder::postProcessType(const Instruction& inst, Id typeId)64{65// Characterize the type being questioned66Id basicTypeOp = getMostBasicTypeClass(typeId);67int width = 0;68if (basicTypeOp == OpTypeFloat || basicTypeOp == OpTypeInt)69width = getScalarTypeWidth(typeId);7071// Do opcode-specific checks72switch (inst.getOpCode()) {73case OpLoad:74case OpStore:75if (basicTypeOp == OpTypeStruct) {76if (containsType(typeId, OpTypeInt, 8))77addCapability(CapabilityInt8);78if (containsType(typeId, OpTypeInt, 16))79addCapability(CapabilityInt16);80if (containsType(typeId, OpTypeFloat, 16))81addCapability(CapabilityFloat16);82} else {83StorageClass storageClass = getStorageClass(inst.getIdOperand(0));84if (width == 8) {85switch (storageClass) {86case StorageClassPhysicalStorageBufferEXT:87case StorageClassUniform:88case StorageClassStorageBuffer:89case StorageClassPushConstant:90break;91default:92addCapability(CapabilityInt8);93break;94}95} else if (width == 16) {96switch (storageClass) {97case StorageClassPhysicalStorageBufferEXT:98case StorageClassUniform:99case StorageClassStorageBuffer:100case StorageClassPushConstant:101case StorageClassInput:102case StorageClassOutput:103break;104default:105if (basicTypeOp == OpTypeInt)106addCapability(CapabilityInt16);107if (basicTypeOp == OpTypeFloat)108addCapability(CapabilityFloat16);109break;110}111}112}113break;114case OpCopyObject:115break;116case OpFConvert:117case OpSConvert:118case OpUConvert:119// Look for any 8/16-bit storage capabilities. If there are none, assume that120// the convert instruction requires the Float16/Int8/16 capability.121if (containsType(typeId, OpTypeFloat, 16) || containsType(typeId, OpTypeInt, 16)) {122bool foundStorage = false;123for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {124spv::Capability cap = *it;125if (cap == spv::CapabilityStorageInputOutput16 ||126cap == spv::CapabilityStoragePushConstant16 ||127cap == spv::CapabilityStorageUniformBufferBlock16 ||128cap == spv::CapabilityStorageUniform16) {129foundStorage = true;130break;131}132}133if (!foundStorage) {134if (containsType(typeId, OpTypeFloat, 16))135addCapability(CapabilityFloat16);136if (containsType(typeId, OpTypeInt, 16))137addCapability(CapabilityInt16);138}139}140if (containsType(typeId, OpTypeInt, 8)) {141bool foundStorage = false;142for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {143spv::Capability cap = *it;144if (cap == spv::CapabilityStoragePushConstant8 ||145cap == spv::CapabilityUniformAndStorageBuffer8BitAccess ||146cap == spv::CapabilityStorageBuffer8BitAccess) {147foundStorage = true;148break;149}150}151if (!foundStorage) {152addCapability(CapabilityInt8);153}154}155break;156case OpExtInst:157switch (inst.getImmediateOperand(1)) {158case GLSLstd450Frexp:159case GLSLstd450FrexpStruct:160if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, OpTypeInt, 16))161addExtension(spv::E_SPV_AMD_gpu_shader_int16);162break;163case GLSLstd450InterpolateAtCentroid:164case GLSLstd450InterpolateAtSample:165case GLSLstd450InterpolateAtOffset:166if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, OpTypeFloat, 16))167addExtension(spv::E_SPV_AMD_gpu_shader_half_float);168break;169default:170break;171}172break;173case OpAccessChain:174case OpPtrAccessChain:175if (isPointerType(typeId))176break;177if (basicTypeOp == OpTypeInt) {178if (width == 16)179addCapability(CapabilityInt16);180else if (width == 8)181addCapability(CapabilityInt8);182}183break;184default:185if (basicTypeOp == OpTypeInt) {186if (width == 16)187addCapability(CapabilityInt16);188else if (width == 8)189addCapability(CapabilityInt8);190else if (width == 64)191addCapability(CapabilityInt64);192} else if (basicTypeOp == OpTypeFloat) {193if (width == 16)194addCapability(CapabilityFloat16);195else if (width == 64)196addCapability(CapabilityFloat64);197}198break;199}200}201202// Called for each instruction that resides in a block.203void Builder::postProcess(Instruction& inst)204{205// Add capabilities based simply on the opcode.206switch (inst.getOpCode()) {207case OpExtInst:208switch (inst.getImmediateOperand(1)) {209case GLSLstd450InterpolateAtCentroid:210case GLSLstd450InterpolateAtSample:211case GLSLstd450InterpolateAtOffset:212addCapability(CapabilityInterpolationFunction);213break;214default:215break;216}217break;218case OpDPdxFine:219case OpDPdyFine:220case OpFwidthFine:221case OpDPdxCoarse:222case OpDPdyCoarse:223case OpFwidthCoarse:224addCapability(CapabilityDerivativeControl);225break;226227case OpImageQueryLod:228case OpImageQuerySize:229case OpImageQuerySizeLod:230case OpImageQuerySamples:231case OpImageQueryLevels:232addCapability(CapabilityImageQuery);233break;234235case OpGroupNonUniformPartitionNV:236addExtension(E_SPV_NV_shader_subgroup_partitioned);237addCapability(CapabilityGroupNonUniformPartitionedNV);238break;239240case OpLoad:241case OpStore:242{243// For any load/store to a PhysicalStorageBufferEXT, walk the accesschain244// index list to compute the misalignment. The pre-existing alignment value245// (set via Builder::AccessChain::alignment) only accounts for the base of246// the reference type and any scalar component selection in the accesschain,247// and this function computes the rest from the SPIR-V Offset decorations.248Instruction *accessChain = module.getInstruction(inst.getIdOperand(0));249if (accessChain->getOpCode() == OpAccessChain) {250Instruction *base = module.getInstruction(accessChain->getIdOperand(0));251// Get the type of the base of the access chain. It must be a pointer type.252Id typeId = base->getTypeId();253Instruction *type = module.getInstruction(typeId);254assert(type->getOpCode() == OpTypePointer);255if (type->getImmediateOperand(0) != StorageClassPhysicalStorageBufferEXT) {256break;257}258// Get the pointee type.259typeId = type->getIdOperand(1);260type = module.getInstruction(typeId);261// Walk the index list for the access chain. For each index, find any262// misalignment that can apply when accessing the member/element via263// Offset/ArrayStride/MatrixStride decorations, and bitwise OR them all264// together.265int alignment = 0;266for (int i = 1; i < accessChain->getNumOperands(); ++i) {267Instruction *idx = module.getInstruction(accessChain->getIdOperand(i));268if (type->getOpCode() == OpTypeStruct) {269assert(idx->getOpCode() == OpConstant);270unsigned int c = idx->getImmediateOperand(0);271272const auto function = [&](const std::unique_ptr<Instruction>& decoration) {273if (decoration.get()->getOpCode() == OpMemberDecorate &&274decoration.get()->getIdOperand(0) == typeId &&275decoration.get()->getImmediateOperand(1) == c &&276(decoration.get()->getImmediateOperand(2) == DecorationOffset ||277decoration.get()->getImmediateOperand(2) == DecorationMatrixStride)) {278alignment |= decoration.get()->getImmediateOperand(3);279}280};281std::for_each(decorations.begin(), decorations.end(), function);282// get the next member type283typeId = type->getIdOperand(c);284type = module.getInstruction(typeId);285} else if (type->getOpCode() == OpTypeArray ||286type->getOpCode() == OpTypeRuntimeArray) {287const auto function = [&](const std::unique_ptr<Instruction>& decoration) {288if (decoration.get()->getOpCode() == OpDecorate &&289decoration.get()->getIdOperand(0) == typeId &&290decoration.get()->getImmediateOperand(1) == DecorationArrayStride) {291alignment |= decoration.get()->getImmediateOperand(2);292}293};294std::for_each(decorations.begin(), decorations.end(), function);295// Get the element type296typeId = type->getIdOperand(0);297type = module.getInstruction(typeId);298} else {299// Once we get to any non-aggregate type, we're done.300break;301}302}303assert(inst.getNumOperands() >= 3);304unsigned int memoryAccess = inst.getImmediateOperand((inst.getOpCode() == OpStore) ? 2 : 1);305assert(memoryAccess & MemoryAccessAlignedMask);306static_cast<void>(memoryAccess);307// Compute the index of the alignment operand.308int alignmentIdx = 2;309if (inst.getOpCode() == OpStore)310alignmentIdx++;311// Merge new and old (mis)alignment312alignment |= inst.getImmediateOperand(alignmentIdx);313// Pick the LSB314alignment = alignment & ~(alignment & (alignment-1));315// update the Aligned operand316inst.setImmediateOperand(alignmentIdx, alignment);317}318break;319}320321default:322break;323}324325// Checks based on type326if (inst.getTypeId() != NoType)327postProcessType(inst, inst.getTypeId());328for (int op = 0; op < inst.getNumOperands(); ++op) {329if (inst.isIdOperand(op)) {330// In blocks, these are always result ids, but we are relying on331// getTypeId() to return NoType for things like OpLabel.332if (getTypeId(inst.getIdOperand(op)) != NoType)333postProcessType(inst, getTypeId(inst.getIdOperand(op)));334}335}336}337338// comment in header339void Builder::postProcessCFG()340{341// reachableBlocks is the set of blockss reached via control flow, or which are342// unreachable continue targert or unreachable merge.343std::unordered_set<const Block*> reachableBlocks;344std::unordered_map<Block*, Block*> headerForUnreachableContinue;345std::unordered_set<Block*> unreachableMerges;346std::unordered_set<Id> unreachableDefinitions;347// Collect IDs defined in unreachable blocks. For each function, label the348// reachable blocks first. Then for each unreachable block, collect the349// result IDs of the instructions in it.350for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {351Function* f = *fi;352Block* entry = f->getEntryBlock();353inReadableOrder(entry,354[&reachableBlocks, &unreachableMerges, &headerForUnreachableContinue]355(Block* b, ReachReason why, Block* header) {356reachableBlocks.insert(b);357if (why == ReachDeadContinue) headerForUnreachableContinue[b] = header;358if (why == ReachDeadMerge) unreachableMerges.insert(b);359});360for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {361Block* b = *bi;362if (unreachableMerges.count(b) != 0 || headerForUnreachableContinue.count(b) != 0) {363auto ii = b->getInstructions().cbegin();364++ii; // Keep potential decorations on the label.365for (; ii != b->getInstructions().cend(); ++ii)366unreachableDefinitions.insert(ii->get()->getResultId());367} else if (reachableBlocks.count(b) == 0) {368// The normal case for unreachable code. All definitions are considered dead.369for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ++ii)370unreachableDefinitions.insert(ii->get()->getResultId());371}372}373}374375// Modify unreachable merge blocks and unreachable continue targets.376// Delete their contents.377for (auto mergeIter = unreachableMerges.begin(); mergeIter != unreachableMerges.end(); ++mergeIter) {378(*mergeIter)->rewriteAsCanonicalUnreachableMerge();379}380for (auto continueIter = headerForUnreachableContinue.begin();381continueIter != headerForUnreachableContinue.end();382++continueIter) {383Block* continue_target = continueIter->first;384Block* header = continueIter->second;385continue_target->rewriteAsCanonicalUnreachableContinue(header);386}387388// Remove unneeded decorations, for unreachable instructions389decorations.erase(std::remove_if(decorations.begin(), decorations.end(),390[&unreachableDefinitions](std::unique_ptr<Instruction>& I) -> bool {391Id decoration_id = I.get()->getIdOperand(0);392return unreachableDefinitions.count(decoration_id) != 0;393}),394decorations.end());395}396397// comment in header398void Builder::postProcessFeatures() {399// Add per-instruction capabilities, extensions, etc.,400401// Look for any 8/16 bit type in physical storage buffer class, and set the402// appropriate capability. This happens in createSpvVariable for other storage403// classes, but there isn't always a variable for physical storage buffer.404for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) {405Instruction* type = groupedTypes[OpTypePointer][t];406if (type->getImmediateOperand(0) == (unsigned)StorageClassPhysicalStorageBufferEXT) {407if (containsType(type->getIdOperand(1), OpTypeInt, 8)) {408addIncorporatedExtension(spv::E_SPV_KHR_8bit_storage, spv::Spv_1_5);409addCapability(spv::CapabilityStorageBuffer8BitAccess);410}411if (containsType(type->getIdOperand(1), OpTypeInt, 16) ||412containsType(type->getIdOperand(1), OpTypeFloat, 16)) {413addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3);414addCapability(spv::CapabilityStorageBuffer16BitAccess);415}416}417}418419// process all block-contained instructions420for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {421Function* f = *fi;422for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {423Block* b = *bi;424for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ii++)425postProcess(*ii->get());426427// For all local variables that contain pointers to PhysicalStorageBufferEXT, check whether428// there is an existing restrict/aliased decoration. If we don't find one, add Aliased as the429// default.430for (auto vi = b->getLocalVariables().cbegin(); vi != b->getLocalVariables().cend(); vi++) {431const Instruction& inst = *vi->get();432Id resultId = inst.getResultId();433if (containsPhysicalStorageBufferOrArray(getDerefTypeId(resultId))) {434bool foundDecoration = false;435const auto function = [&](const std::unique_ptr<Instruction>& decoration) {436if (decoration.get()->getIdOperand(0) == resultId &&437decoration.get()->getOpCode() == OpDecorate &&438(decoration.get()->getImmediateOperand(1) == spv::DecorationAliasedPointerEXT ||439decoration.get()->getImmediateOperand(1) == spv::DecorationRestrictPointerEXT)) {440foundDecoration = true;441}442};443std::for_each(decorations.begin(), decorations.end(), function);444if (!foundDecoration) {445addDecoration(resultId, spv::DecorationAliasedPointerEXT);446}447}448}449}450}451452// If any Vulkan memory model-specific functionality is used, update the453// OpMemoryModel to match.454if (capabilities.find(spv::CapabilityVulkanMemoryModelKHR) != capabilities.end()) {455memoryModel = spv::MemoryModelVulkanKHR;456addIncorporatedExtension(spv::E_SPV_KHR_vulkan_memory_model, spv::Spv_1_5);457}458459// Add Aliased decoration if there's more than one Workgroup Block variable.460if (capabilities.find(spv::CapabilityWorkgroupMemoryExplicitLayoutKHR) != capabilities.end()) {461assert(entryPoints.size() == 1);462auto &ep = entryPoints[0];463464std::vector<Id> workgroup_variables;465for (int i = 0; i < (int)ep->getNumOperands(); i++) {466if (!ep->isIdOperand(i))467continue;468469const Id id = ep->getIdOperand(i);470const Instruction *instr = module.getInstruction(id);471if (instr->getOpCode() != spv::OpVariable)472continue;473474if (instr->getImmediateOperand(0) == spv::StorageClassWorkgroup)475workgroup_variables.push_back(id);476}477478if (workgroup_variables.size() > 1) {479for (size_t i = 0; i < workgroup_variables.size(); i++)480addDecoration(workgroup_variables[i], spv::DecorationAliased);481}482}483}484485// comment in header486void Builder::postProcess(bool compileOnly)487{488// postProcessCFG needs an entrypoint to determine what is reachable, but if we are not creating an "executable" shader, we don't have an entrypoint489if (!compileOnly)490postProcessCFG();491492postProcessFeatures();493}494495}; // end spv namespace496497498