Path: blob/master/thirdparty/glslang/SPIRV/SpvPostProcess.cpp
21011 views
//1// Copyright (C) 2018 Google, Inc.2//3// All rights reserved.4//5// Redistribution and use in source and binary forms, with or without6// modification, are permitted provided that the following conditions7// are met:8//9// Redistributions of source code must retain the above copyright10// notice, this list of conditions and the following disclaimer.11//12// Redistributions in binary form must reproduce the above13// copyright notice, this list of conditions and the following14// disclaimer in the documentation and/or other materials provided15// with the distribution.16//17// Neither the name of 3Dlabs Inc. Ltd. nor the names of its18// contributors may be used to endorse or promote products derived19// from this software without specific prior written permission.20//21// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS22// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT23// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS24// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE25// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,26// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,27// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;28// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER29// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT30// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN31// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE32// POSSIBILITY OF SUCH DAMAGE.3334//35// Post-processing for SPIR-V IR, in internal form, not standard binary form.36//3738#include <cassert>39#include <cstdlib>4041#include <unordered_map>42#include <unordered_set>43#include <algorithm>4445#include "SPIRV/spvIR.h"46#include "SpvBuilder.h"47#include "spirv.hpp11"48#include "spvUtil.h"4950namespace spv {51#include "GLSL.std.450.h"52#include "GLSL.ext.KHR.h"53#include "GLSL.ext.EXT.h"54#include "GLSL.ext.AMD.h"55#include "GLSL.ext.NV.h"56#include "GLSL.ext.ARM.h"57#include "GLSL.ext.QCOM.h"58}5960namespace spv {6162// Hook to visit each operand type and result type of an instruction.63// Will be called multiple times for one instruction, once for each typed64// operand and the result.65void Builder::postProcessType(const Instruction& inst, Id typeId)66{67// Characterize the type being questioned68Op basicTypeOp = getMostBasicTypeClass(typeId);69int width = 0;70if (basicTypeOp == Op::OpTypeFloat || basicTypeOp == Op::OpTypeInt)71width = getScalarTypeWidth(typeId);7273// Do opcode-specific checks74switch (inst.getOpCode()) {75case Op::OpLoad:76case Op::OpStore:77if (basicTypeOp == Op::OpTypeStruct) {78if (containsType(typeId, Op::OpTypeInt, 8))79addCapability(Capability::Int8);80if (containsType(typeId, Op::OpTypeInt, 16))81addCapability(Capability::Int16);82if (containsType(typeId, Op::OpTypeFloat, 16))83addCapability(Capability::Float16);84} else {85StorageClass storageClass = getStorageClass(inst.getIdOperand(0));86if (width == 8) {87switch (storageClass) {88case StorageClass::PhysicalStorageBufferEXT:89case StorageClass::Uniform:90case StorageClass::StorageBuffer:91case StorageClass::PushConstant:92break;93default:94addCapability(Capability::Int8);95break;96}97} else if (width == 16) {98switch (storageClass) {99case StorageClass::PhysicalStorageBufferEXT:100case StorageClass::Uniform:101case StorageClass::StorageBuffer:102case StorageClass::PushConstant:103case StorageClass::Input:104case StorageClass::Output:105break;106default:107if (basicTypeOp == Op::OpTypeInt)108addCapability(Capability::Int16);109if (basicTypeOp == Op::OpTypeFloat)110addCapability(Capability::Float16);111break;112}113}114}115break;116case Op::OpCopyObject:117break;118case Op::OpFConvert:119case Op::OpSConvert:120case Op::OpUConvert:121// Look for any 8/16-bit storage capabilities. If there are none, assume that122// the convert instruction requires the Float16/Int8/16 capability.123if (containsType(typeId, Op::OpTypeFloat, 16) || containsType(typeId, Op::OpTypeInt, 16)) {124bool foundStorage = false;125for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {126spv::Capability cap = *it;127if (cap == spv::Capability::StorageInputOutput16 ||128cap == spv::Capability::StoragePushConstant16 ||129cap == spv::Capability::StorageUniformBufferBlock16 ||130cap == spv::Capability::StorageUniform16) {131foundStorage = true;132break;133}134}135if (!foundStorage) {136if (containsType(typeId, Op::OpTypeFloat, 16))137addCapability(Capability::Float16);138if (containsType(typeId, Op::OpTypeInt, 16))139addCapability(Capability::Int16);140}141}142if (containsType(typeId, Op::OpTypeInt, 8)) {143bool foundStorage = false;144for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {145spv::Capability cap = *it;146if (cap == spv::Capability::StoragePushConstant8 ||147cap == spv::Capability::UniformAndStorageBuffer8BitAccess ||148cap == spv::Capability::StorageBuffer8BitAccess) {149foundStorage = true;150break;151}152}153if (!foundStorage) {154addCapability(Capability::Int8);155}156}157break;158case Op::OpExtInst:159switch (inst.getImmediateOperand(1)) {160case GLSLstd450Frexp:161case GLSLstd450FrexpStruct:162if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, Op::OpTypeInt, 16))163addExtension(spv::E_SPV_AMD_gpu_shader_int16);164break;165case GLSLstd450InterpolateAtCentroid:166case GLSLstd450InterpolateAtSample:167case GLSLstd450InterpolateAtOffset:168if (getSpvVersion() < spv::Spv_1_3 && containsType(typeId, Op::OpTypeFloat, 16))169addExtension(spv::E_SPV_AMD_gpu_shader_half_float);170break;171default:172break;173}174break;175case Op::OpAccessChain:176case Op::OpPtrAccessChain:177if (isPointerType(typeId))178break;179if (basicTypeOp == Op::OpTypeInt) {180if (width == 16)181addCapability(Capability::Int16);182else if (width == 8)183addCapability(Capability::Int8);184}185break;186default:187if (basicTypeOp == Op::OpTypeInt) {188if (width == 16)189addCapability(Capability::Int16);190else if (width == 8)191addCapability(Capability::Int8);192else if (width == 64)193addCapability(Capability::Int64);194} else if (basicTypeOp == Op::OpTypeFloat) {195if (width == 16)196addCapability(Capability::Float16);197else if (width == 64)198addCapability(Capability::Float64);199}200break;201}202}203204unsigned int Builder::postProcessGetLargestScalarSize(const Instruction& type)205{206switch (type.getOpCode()) {207case Op::OpTypeBool:208return 1;209case Op::OpTypeInt:210case Op::OpTypeFloat:211return type.getImmediateOperand(0) / 8;212case Op::OpTypePointer:213return 8;214case Op::OpTypeVector:215case Op::OpTypeMatrix:216case Op::OpTypeArray:217case Op::OpTypeRuntimeArray: {218const Instruction* elem_type = module.getInstruction(type.getIdOperand(0));219return postProcessGetLargestScalarSize(*elem_type);220}221case Op::OpTypeStruct: {222unsigned int largest = 0;223for (int i = 0; i < type.getNumOperands(); ++i) {224const Instruction* elem_type = module.getInstruction(type.getIdOperand(i));225unsigned int elem_size = postProcessGetLargestScalarSize(*elem_type);226largest = std::max(largest, elem_size);227}228return largest;229}230default:231return 0;232}233}234235// Called for each instruction that resides in a block.236void Builder::postProcess(Instruction& inst)237{238// Add capabilities based simply on the opcode.239switch (inst.getOpCode()) {240case Op::OpExtInst:241switch (inst.getImmediateOperand(1)) {242case GLSLstd450InterpolateAtCentroid:243case GLSLstd450InterpolateAtSample:244case GLSLstd450InterpolateAtOffset:245addCapability(Capability::InterpolationFunction);246break;247default:248break;249}250break;251case Op::OpDPdxFine:252case Op::OpDPdyFine:253case Op::OpFwidthFine:254case Op::OpDPdxCoarse:255case Op::OpDPdyCoarse:256case Op::OpFwidthCoarse:257addCapability(Capability::DerivativeControl);258break;259260case Op::OpImageQueryLod:261case Op::OpImageQuerySize:262case Op::OpImageQuerySizeLod:263case Op::OpImageQuerySamples:264case Op::OpImageQueryLevels:265addCapability(Capability::ImageQuery);266break;267268case Op::OpGroupNonUniformPartitionNV:269addExtension(E_SPV_NV_shader_subgroup_partitioned);270addCapability(Capability::GroupNonUniformPartitionedNV);271break;272273case Op::OpLoad:274case Op::OpStore:275{276// For any load/store to a PhysicalStorageBufferEXT, walk the accesschain277// index list to compute the misalignment. The pre-existing alignment value278// (set via Builder::AccessChain::alignment) only accounts for the base of279// the reference type and any scalar component selection in the accesschain,280// and this function computes the rest from the SPIR-V Offset decorations.281Instruction *accessChain = module.getInstruction(inst.getIdOperand(0));282if (accessChain->getOpCode() == Op::OpAccessChain) {283const Instruction* base = module.getInstruction(accessChain->getIdOperand(0));284// Get the type of the base of the access chain. It must be a pointer type.285Id typeId = base->getTypeId();286Instruction *type = module.getInstruction(typeId);287assert(type->getOpCode() == Op::OpTypePointer);288if (type->getImmediateOperand(0) != StorageClass::PhysicalStorageBuffer) {289break;290}291// Get the pointee type.292typeId = type->getIdOperand(1);293type = module.getInstruction(typeId);294// Walk the index list for the access chain. For each index, find any295// misalignment that can apply when accessing the member/element via296// Offset/ArrayStride/MatrixStride decorations, and bitwise OR them all297// together.298int alignment = 0;299bool first_struct_elem = false;300for (int i = 1; i < accessChain->getNumOperands(); ++i) {301Instruction *idx = module.getInstruction(accessChain->getIdOperand(i));302if (type->getOpCode() == Op::OpTypeStruct) {303assert(idx->getOpCode() == Op::OpConstant);304unsigned int c = idx->getImmediateOperand(0);305306const auto function = [&](const std::unique_ptr<Instruction>& decoration) {307if (decoration.get()->getOpCode() == Op::OpMemberDecorate &&308decoration.get()->getIdOperand(0) == typeId &&309decoration.get()->getImmediateOperand(1) == c &&310(decoration.get()->getImmediateOperand(2) == Decoration::Offset ||311decoration.get()->getImmediateOperand(2) == Decoration::MatrixStride)) {312unsigned int opernad_value = decoration.get()->getImmediateOperand(3);313alignment |= opernad_value;314if (opernad_value == 0 &&315decoration.get()->getImmediateOperand(2) == Decoration::Offset) {316first_struct_elem = true;317}318}319};320std::for_each(decorations.begin(), decorations.end(), function);321// get the next member type322typeId = type->getIdOperand(c);323type = module.getInstruction(typeId);324} else if (type->getOpCode() == Op::OpTypeArray ||325type->getOpCode() == Op::OpTypeRuntimeArray) {326const auto function = [&](const std::unique_ptr<Instruction>& decoration) {327if (decoration.get()->getOpCode() == Op::OpDecorate &&328decoration.get()->getIdOperand(0) == typeId &&329decoration.get()->getImmediateOperand(1) == Decoration::ArrayStride) {330alignment |= decoration.get()->getImmediateOperand(2);331}332};333std::for_each(decorations.begin(), decorations.end(), function);334// Get the element type335typeId = type->getIdOperand(0);336type = module.getInstruction(typeId);337} else {338// Once we get to any non-aggregate type, we're done.339break;340}341}342assert(inst.getNumOperands() >= 3);343const bool is_store = inst.getOpCode() == Op::OpStore;344auto const memoryAccess = (MemoryAccessMask)inst.getImmediateOperand(is_store ? 2 : 1);345assert(anySet(memoryAccess, MemoryAccessMask::Aligned));346static_cast<void>(memoryAccess);347348// Compute the index of the alignment operand.349int alignmentIdx = 2;350if (is_store)351alignmentIdx++;352// Merge new and old (mis)alignment353alignment |= inst.getImmediateOperand(alignmentIdx);354355if (!is_store) {356Instruction* inst_type = module.getInstruction(inst.getTypeId());357if (inst_type->getOpCode() == Op::OpTypePointer &&358inst_type->getImmediateOperand(0) == StorageClass::PhysicalStorageBuffer) {359// This means we are loading a pointer which means need to ensure it is at least 8-byte aligned360// See https://github.com/KhronosGroup/glslang/issues/4084361// In case the alignment is currently 4, need to ensure it is 8 before grabbing the LSB362alignment |= 8;363alignment &= 8;364}365}366367// Pick the LSB368alignment = alignment & ~(alignment & (alignment-1));369370// The edge case we find is when copying a struct to another struct, we never find the alignment anywhere,371// so in this case, fallback to doing a full size lookup on the type372if (alignment == 0 && first_struct_elem) {373// Quick get the struct type back374const Instruction* pointer_type = module.getInstruction(base->getTypeId());375const Instruction* struct_type = module.getInstruction(pointer_type->getIdOperand(1));376assert(struct_type->getOpCode() == Op::OpTypeStruct);377378const Instruction* elem_type = module.getInstruction(struct_type->getIdOperand(0));379unsigned int largest_scalar = postProcessGetLargestScalarSize(*elem_type);380if (largest_scalar != 0) {381alignment = largest_scalar;382} else {383alignment = 16; // fallback if can't determine a godo alignment384}385}386// update the Aligned operand387assert(alignment != 0);388inst.setImmediateOperand(alignmentIdx, alignment);389}390break;391}392393default:394break;395}396397// Checks based on type398if (inst.getTypeId() != NoType)399postProcessType(inst, inst.getTypeId());400for (int op = 0; op < inst.getNumOperands(); ++op) {401if (inst.isIdOperand(op)) {402// In blocks, these are always result ids, but we are relying on403// getTypeId() to return NoType for things like OpLabel.404if (getTypeId(inst.getIdOperand(op)) != NoType)405postProcessType(inst, getTypeId(inst.getIdOperand(op)));406}407}408}409410// comment in header411void Builder::postProcessCFG()412{413// reachableBlocks is the set of blockss reached via control flow, or which are414// unreachable continue targert or unreachable merge.415std::unordered_set<const Block*> reachableBlocks;416std::unordered_map<Block*, Block*> headerForUnreachableContinue;417std::unordered_set<Block*> unreachableMerges;418std::unordered_set<Id> unreachableDefinitions;419// Collect IDs defined in unreachable blocks. For each function, label the420// reachable blocks first. Then for each unreachable block, collect the421// result IDs of the instructions in it.422for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {423Function* f = *fi;424Block* entry = f->getEntryBlock();425inReadableOrder(entry,426[&reachableBlocks, &unreachableMerges, &headerForUnreachableContinue]427(Block* b, ReachReason why, Block* header) {428reachableBlocks.insert(b);429if (why == ReachDeadContinue) headerForUnreachableContinue[b] = header;430if (why == ReachDeadMerge) unreachableMerges.insert(b);431});432for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {433Block* b = *bi;434if (unreachableMerges.count(b) != 0 || headerForUnreachableContinue.count(b) != 0) {435auto ii = b->getInstructions().cbegin();436++ii; // Keep potential decorations on the label.437for (; ii != b->getInstructions().cend(); ++ii)438unreachableDefinitions.insert(ii->get()->getResultId());439} else if (reachableBlocks.count(b) == 0) {440// The normal case for unreachable code. All definitions are considered dead.441for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ++ii)442unreachableDefinitions.insert(ii->get()->getResultId());443}444}445}446447// Modify unreachable merge blocks and unreachable continue targets.448// Delete their contents.449for (auto mergeIter = unreachableMerges.begin(); mergeIter != unreachableMerges.end(); ++mergeIter) {450(*mergeIter)->rewriteAsCanonicalUnreachableMerge();451}452for (auto continueIter = headerForUnreachableContinue.begin();453continueIter != headerForUnreachableContinue.end();454++continueIter) {455Block* continue_target = continueIter->first;456Block* header = continueIter->second;457continue_target->rewriteAsCanonicalUnreachableContinue(header);458}459460// Remove unneeded decorations, for unreachable instructions461for (auto decorationIter = decorations.begin(); decorationIter != decorations.end();) {462Id decorationId = (*decorationIter)->getIdOperand(0);463if (unreachableDefinitions.count(decorationId) != 0) {464decorationIter = decorations.erase(decorationIter);465} else {466++decorationIter;467}468}469}470471// comment in header472void Builder::postProcessFeatures() {473// Add per-instruction capabilities, extensions, etc.,474475// Look for any 8/16 bit type in physical storage buffer class, and set the476// appropriate capability. This happens in createSpvVariable for other storage477// classes, but there isn't always a variable for physical storage buffer.478for (int t = 0; t < (int)groupedTypes[enumCast(Op::OpTypePointer)].size(); ++t) {479Instruction* type = groupedTypes[enumCast(Op::OpTypePointer)][t];480if (type->getImmediateOperand(0) == (unsigned)StorageClass::PhysicalStorageBufferEXT) {481if (containsType(type->getIdOperand(1), Op::OpTypeInt, 8)) {482addIncorporatedExtension(spv::E_SPV_KHR_8bit_storage, spv::Spv_1_5);483addCapability(spv::Capability::StorageBuffer8BitAccess);484}485if (containsType(type->getIdOperand(1), Op::OpTypeInt, 16) ||486containsType(type->getIdOperand(1), Op::OpTypeFloat, 16)) {487addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3);488addCapability(spv::Capability::StorageBuffer16BitAccess);489}490}491}492493// process all block-contained instructions494for (auto fi = module.getFunctions().cbegin(); fi != module.getFunctions().cend(); fi++) {495Function* f = *fi;496for (auto bi = f->getBlocks().cbegin(); bi != f->getBlocks().cend(); bi++) {497Block* b = *bi;498for (auto ii = b->getInstructions().cbegin(); ii != b->getInstructions().cend(); ii++)499postProcess(*ii->get());500501// For all local variables that contain pointers to PhysicalStorageBufferEXT, check whether502// there is an existing restrict/aliased decoration. If we don't find one, add Aliased as the503// default.504for (auto vi = b->getLocalVariables().cbegin(); vi != b->getLocalVariables().cend(); vi++) {505const Instruction& inst = *vi->get();506Id resultId = inst.getResultId();507if (containsPhysicalStorageBufferOrArray(getDerefTypeId(resultId))) {508bool foundDecoration = false;509const auto function = [&](const std::unique_ptr<Instruction>& decoration) {510if (decoration.get()->getIdOperand(0) == resultId &&511decoration.get()->getOpCode() == Op::OpDecorate &&512(decoration.get()->getImmediateOperand(1) == spv::Decoration::AliasedPointerEXT ||513decoration.get()->getImmediateOperand(1) == spv::Decoration::RestrictPointerEXT)) {514foundDecoration = true;515}516};517std::for_each(decorations.begin(), decorations.end(), function);518if (!foundDecoration) {519addDecoration(resultId, spv::Decoration::AliasedPointerEXT);520}521}522}523}524}525526// If any Vulkan memory model-specific functionality is used, update the527// OpMemoryModel to match.528if (capabilities.find(spv::Capability::VulkanMemoryModelKHR) != capabilities.end()) {529memoryModel = spv::MemoryModel::VulkanKHR;530addIncorporatedExtension(spv::E_SPV_KHR_vulkan_memory_model, spv::Spv_1_5);531}532533// Add Aliased decoration if there's more than one Workgroup Block variable.534if (capabilities.find(spv::Capability::WorkgroupMemoryExplicitLayoutKHR) != capabilities.end()) {535assert(entryPoints.size() == 1);536auto &ep = entryPoints[0];537538std::vector<Id> workgroup_variables;539for (int i = 0; i < (int)ep->getNumOperands(); i++) {540if (!ep->isIdOperand(i))541continue;542543const Id id = ep->getIdOperand(i);544const Instruction *instr = module.getInstruction(id);545if (instr->getOpCode() != spv::Op::OpVariable)546continue;547548if (instr->getImmediateOperand(0) == spv::StorageClass::Workgroup)549workgroup_variables.push_back(id);550}551552if (workgroup_variables.size() > 1) {553for (size_t i = 0; i < workgroup_variables.size(); i++)554addDecoration(workgroup_variables[i], spv::Decoration::Aliased);555}556}557}558559// SPIR-V requires that any instruction consuming the result of an OpSampledImage560// be in the same block as the OpSampledImage instruction. This pass goes finds561// uses of OpSampledImage where that is not the case and duplicates the562// OpSampledImage to be immediately before the instruction that consumes it.563// The old OpSampledImage is left in place, potentially with no users.564void Builder::postProcessSamplers()565{566// first, find all OpSampledImage instructions and store them in a map.567std::map<Id, Instruction*> sampledImageInstrs;568for (auto f: module.getFunctions()) {569for (auto b: f->getBlocks()) {570for (auto &i: b->getInstructions()) {571if (i->getOpCode() == spv::Op::OpSampledImage) {572sampledImageInstrs[i->getResultId()] = i.get();573}574}575}576}577// next find all uses of the given ids and rewrite them if needed.578for (auto f: module.getFunctions()) {579for (auto b: f->getBlocks()) {580auto &instrs = b->getInstructions();581for (size_t idx = 0; idx < instrs.size(); idx++) {582Instruction *i = instrs[idx].get();583for (int opnum = 0; opnum < i->getNumOperands(); opnum++) {584// Is this operand of the current instruction the result of an OpSampledImage?585if (i->isIdOperand(opnum) &&586sampledImageInstrs.count(i->getIdOperand(opnum)))587{588Instruction *opSampImg = sampledImageInstrs[i->getIdOperand(opnum)];589if (i->getBlock() != opSampImg->getBlock()) {590Instruction *newInstr = new Instruction(getUniqueId(),591opSampImg->getTypeId(),592spv::Op::OpSampledImage);593newInstr->addIdOperand(opSampImg->getIdOperand(0));594newInstr->addIdOperand(opSampImg->getIdOperand(1));595newInstr->setBlock(b);596597// rewrite the user of the OpSampledImage to use the new instruction.598i->setIdOperand(opnum, newInstr->getResultId());599// insert the new OpSampledImage right before the current instruction.600instrs.insert(instrs.begin() + idx,601std::unique_ptr<Instruction>(newInstr));602idx++;603}604}605}606}607}608}609}610611// comment in header612void Builder::postProcess(bool compileOnly)613{614// postProcessCFG needs an entrypoint to determine what is reachable, but if we are not creating an "executable" shader, we don't have an entrypoint615if (!compileOnly)616postProcessCFG();617618postProcessFeatures();619postProcessSamplers();620}621622} // end spv namespace623624625