Path: blob/21.2-virgl/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
4574 views
/*1* Copyright 2011 Christoph Bumiller2* 2014 Red Hat Inc.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice shall be included in12* all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR18* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,19* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR20* OTHER DEALINGS IN THE SOFTWARE.21*/2223#include "codegen/nv50_ir_target_gm107.h"24#include "codegen/nv50_ir_lowering_gm107.h"2526namespace nv50_ir {2728Target *getTargetGM107(unsigned int chipset)29{30return new TargetGM107(chipset);31}3233// BULTINS / LIBRARY FUNCTIONS:3435// lazyness -> will just hardcode everything for the time being3637#include "lib/gm107.asm.h"3839void40TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const41{42*code = (const uint32_t *)&gm107_builtin_code[0];43*size = sizeof(gm107_builtin_code);44}4546uint32_t47TargetGM107::getBuiltinOffset(int builtin) const48{49assert(builtin < NVC0_BUILTIN_COUNT);50return gm107_builtin_offsets[builtin];51}5253bool54TargetGM107::isOpSupported(operation op, DataType ty) const55{56switch (op) {57case OP_SAD:58case OP_POW:59case OP_DIV:60case OP_MOD:61return false;62case OP_SQRT:63if (ty == TYPE_F64)64return false;65return chipset >= NVISA_GM200_CHIPSET;66case OP_XMAD:67if (isFloatType(ty))68return false;69break;70default:71break;72}7374return true;75}7677// Return true when an instruction supports the reuse flag. When supported, the78// hardware will use the operand reuse cache introduced since Maxwell, which79// should try to reduce bank conflicts by caching values for the subsequent80// instructions. Note that the next instructions have to use the same GPR id in81// the same operand slot.82bool83TargetGM107::isReuseSupported(const Instruction *insn) const84{85const OpClass cl = getOpClass(insn->op);8687// TODO: double-check!88switch (cl) {89case OPCLASS_ARITH:90case OPCLASS_COMPARE:91case OPCLASS_LOGIC:92case OPCLASS_MOVE:93case OPCLASS_SHIFT:94return true;95case OPCLASS_BITFIELD:96if (insn->op == OP_INSBF || insn->op == OP_EXTBF)97return true;98break;99default:100break;101}102return false;103}104105// Return true when an instruction requires to set up a barrier because it106// doesn't operate at a fixed latency. Variable latency instructions are memory107// operations, double precision operations, special function unit operations108// and other low throughput instructions.109bool110TargetGM107::isBarrierRequired(const Instruction *insn) const111{112const OpClass cl = getOpClass(insn->op);113114if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64)115return true;116117switch (cl) {118case OPCLASS_ATOMIC:119case OPCLASS_LOAD:120case OPCLASS_STORE:121case OPCLASS_SURFACE:122case OPCLASS_TEXTURE:123return true;124case OPCLASS_SFU:125switch (insn->op) {126case OP_COS:127case OP_EX2:128case OP_LG2:129case OP_LINTERP:130case OP_PINTERP:131case OP_RCP:132case OP_RSQ:133case OP_SIN:134case OP_SQRT:135return true;136default:137break;138}139break;140case OPCLASS_BITFIELD:141switch (insn->op) {142case OP_BFIND:143case OP_POPCNT:144return true;145default:146break;147}148break;149case OPCLASS_CONTROL:150switch (insn->op) {151case OP_EMIT:152case OP_RESTART:153return true;154default:155break;156}157break;158case OPCLASS_OTHER:159switch (insn->op) {160case OP_AFETCH:161case OP_PFETCH:162case OP_PIXLD:163case OP_SHFL:164return true;165case OP_RDSV:166return !isCS2RSV(insn->getSrc(0)->reg.data.sv.sv);167default:168break;169}170break;171case OPCLASS_ARITH:172if ((insn->op == OP_MUL || insn->op == OP_MAD) &&173!isFloatType(insn->dType))174return true;175break;176case OPCLASS_CONVERT:177if (insn->def(0).getFile() != FILE_PREDICATE &&178insn->src(0).getFile() != FILE_PREDICATE)179return true;180break;181default:182break;183}184return false;185}186187bool188TargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const189{190// TODO191return false;192}193194// Return the number of stall counts needed to complete a single instruction.195// On Maxwell GPUs, the pipeline depth is 6, but some instructions require196// different number of stall counts like memory operations.197int198TargetGM107::getLatency(const Instruction *insn) const199{200// TODO: better values! This should be good enough for now though.201switch (insn->op) {202case OP_EMIT:203case OP_EXPORT:204case OP_PIXLD:205case OP_RESTART:206case OP_STORE:207case OP_SUSTB:208case OP_SUSTP:209return 1;210case OP_SHFL:211return 2;212case OP_ADD:213case OP_AND:214case OP_EXTBF:215case OP_FMA:216case OP_INSBF:217case OP_MAD:218case OP_MAX:219case OP_MIN:220case OP_MOV:221case OP_MUL:222case OP_NOT:223case OP_OR:224case OP_PREEX2:225case OP_PRESIN:226case OP_QUADOP:227case OP_SELP:228case OP_SET:229case OP_SET_AND:230case OP_SET_OR:231case OP_SET_XOR:232case OP_SHL:233case OP_SHLADD:234case OP_SHR:235case OP_SLCT:236case OP_SUB:237case OP_VOTE:238case OP_XOR:239case OP_XMAD:240if (insn->dType != TYPE_F64)241return 6;242break;243case OP_RDSV:244return isCS2RSV(insn->getSrc(0)->reg.data.sv.sv) ? 6 : 15;245case OP_ABS:246case OP_CEIL:247case OP_CVT:248case OP_FLOOR:249case OP_NEG:250case OP_SAT:251case OP_TRUNC:252if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||253insn->src(0).getFile() == FILE_PREDICATE))254return 6;255break;256case OP_BFIND:257case OP_COS:258case OP_EX2:259case OP_LG2:260case OP_POPCNT:261case OP_QUADON:262case OP_QUADPOP:263case OP_RCP:264case OP_RSQ:265case OP_SIN:266case OP_SQRT:267return 13;268default:269break;270}271// Use the maximum number of stall counts for other instructions.272return 15;273}274275// Return the operand read latency which is the number of stall counts before276// an instruction can read its sources. For memory operations like ATOM, LOAD277// and STORE, the memory access has to be indirect.278int279TargetGM107::getReadLatency(const Instruction *insn) const280{281switch (insn->op) {282case OP_ABS:283case OP_BFIND:284case OP_CEIL:285case OP_COS:286case OP_EX2:287case OP_FLOOR:288case OP_LG2:289case OP_NEG:290case OP_POPCNT:291case OP_RCP:292case OP_RSQ:293case OP_SAT:294case OP_SIN:295case OP_SQRT:296case OP_SULDB:297case OP_SULDP:298case OP_SUREDB:299case OP_SUREDP:300case OP_SUSTB:301case OP_SUSTP:302case OP_TRUNC:303return 4;304case OP_CVT:305if (insn->def(0).getFile() != FILE_PREDICATE &&306insn->src(0).getFile() != FILE_PREDICATE)307return 4;308break;309case OP_ATOM:310case OP_LOAD:311case OP_STORE:312if (insn->src(0).isIndirect(0)) {313switch (insn->src(0).getFile()) {314case FILE_MEMORY_SHARED:315case FILE_MEMORY_CONST:316return 2;317case FILE_MEMORY_GLOBAL:318case FILE_MEMORY_LOCAL:319return 4;320default:321break;322}323}324break;325case OP_EXPORT:326case OP_PFETCH:327case OP_SHFL:328case OP_VFETCH:329return 2;330default:331break;332}333return 0;334}335336bool337TargetGM107::isCS2RSV(SVSemantic sv) const338{339return sv == SV_CLOCK;340}341342bool343TargetGM107::runLegalizePass(Program *prog, CGStage stage) const344{345if (stage == CG_STAGE_PRE_SSA) {346GM107LoweringPass pass(prog);347return pass.run(prog, false, true);348} else349if (stage == CG_STAGE_POST_RA) {350NVC0LegalizePostRA pass(prog);351return pass.run(prog, false, true);352} else353if (stage == CG_STAGE_SSA) {354GM107LegalizeSSA pass;355return pass.run(prog, false, true);356}357return false;358}359360CodeEmitter *361TargetGM107::getCodeEmitter(Program::Type type)362{363return createCodeEmitterGM107(type);364}365366} // namespace nv50_ir367368369