Path: blob/21.2-virgl/src/intel/compiler/brw_eu_validate.c
4550 views
/*1* Copyright © 2015-2019 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223/** @file brw_eu_validate.c24*25* This file implements a pass that validates shader assembly.26*27* The restrictions implemented herein are intended to verify that instructions28* in shader assembly do not violate restrictions documented in the graphics29* programming reference manuals.30*31* The restrictions are difficult for humans to quickly verify due to their32* complexity and abundance.33*34* It is critical that this code is thoroughly unit tested because false35* results will lead developers astray, which is worse than having no validator36* at all. Functional changes to this file without corresponding unit tests (in37* test_eu_validate.cpp) will be rejected.38*/3940#include <stdlib.h>41#include "brw_eu.h"4243/* We're going to do lots of string concatenation, so this should help. */44struct string {45char *str;46size_t len;47};4849static void50cat(struct string *dest, const struct string src)51{52dest->str = realloc(dest->str, dest->len + src.len + 1);53memcpy(dest->str + dest->len, src.str, src.len);54dest->str[dest->len + src.len] = '\0';55dest->len = dest->len + src.len;56}57#define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)})5859static bool60contains(const struct string haystack, const struct string needle)61{62return haystack.str && memmem(haystack.str, haystack.len,63needle.str, needle.len) != NULL;64}65#define CONTAINS(haystack, needle) \66contains(haystack, (struct string){needle, strlen(needle)})6768#define error(str) "\tERROR: " str "\n"69#define ERROR_INDENT "\t "7071#define ERROR(msg) ERROR_IF(true, msg)72#define ERROR_IF(cond, msg) \73do { \74if ((cond) && !CONTAINS(error_msg, error(msg))) { \75CAT(error_msg, error(msg)); \76} \77} while(0)7879#define CHECK(func, args...) \80do { \81struct string __msg = func(devinfo, inst, ##args); \82if (__msg.str) { \83cat(&error_msg, __msg); \84free(__msg.str); \85} \86} while (0)8788#define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0)89#define WIDTH(width) (1 << (width))9091static bool92inst_is_send(const struct intel_device_info *devinfo, const brw_inst *inst)93{94switch (brw_inst_opcode(devinfo, inst)) {95case BRW_OPCODE_SEND:96case BRW_OPCODE_SENDC:97case BRW_OPCODE_SENDS:98case BRW_OPCODE_SENDSC:99return true;100default:101return false;102}103}104105static bool106inst_is_split_send(const struct intel_device_info *devinfo,107const brw_inst *inst)108{109if (devinfo->ver >= 12) {110return inst_is_send(devinfo, inst);111} else {112switch (brw_inst_opcode(devinfo, inst)) {113case BRW_OPCODE_SENDS:114case BRW_OPCODE_SENDSC:115return true;116default:117return false;118}119}120}121122static unsigned123signed_type(unsigned type)124{125switch (type) {126case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_TYPE_D;127case BRW_REGISTER_TYPE_UW: return BRW_REGISTER_TYPE_W;128case BRW_REGISTER_TYPE_UB: return BRW_REGISTER_TYPE_B;129case BRW_REGISTER_TYPE_UQ: return BRW_REGISTER_TYPE_Q;130default: return type;131}132}133134static enum brw_reg_type135inst_dst_type(const struct intel_device_info *devinfo, const brw_inst *inst)136{137return (devinfo->ver < 12 || !inst_is_send(devinfo, inst)) ?138brw_inst_dst_type(devinfo, inst) : BRW_REGISTER_TYPE_D;139}140141static bool142inst_is_raw_move(const struct intel_device_info *devinfo, const brw_inst *inst)143{144unsigned dst_type = signed_type(inst_dst_type(devinfo, inst));145unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst));146147if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {148/* FIXME: not strictly true */149if (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_VF ||150brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UV ||151brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_V) {152return false;153}154} else if (brw_inst_src0_negate(devinfo, inst) ||155brw_inst_src0_abs(devinfo, inst)) {156return false;157}158159return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV &&160brw_inst_saturate(devinfo, inst) == 0 &&161dst_type == src_type;162}163164static bool165dst_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)166{167return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&168brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;169}170171static bool172src0_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)173{174return brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT &&175brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&176brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;177}178179static bool180src1_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)181{182return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&183brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;184}185186static bool187src0_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst)188{189return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&190(brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;191}192193static bool194src1_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst)195{196return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&197(brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;198}199200static bool201src0_has_scalar_region(const struct intel_device_info *devinfo,202const brw_inst *inst)203{204return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&205brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 &&206brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;207}208209static bool210src1_has_scalar_region(const struct intel_device_info *devinfo,211const brw_inst *inst)212{213return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&214brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 &&215brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;216}217218static unsigned219num_sources_from_inst(const struct intel_device_info *devinfo,220const brw_inst *inst)221{222const struct opcode_desc *desc =223brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));224unsigned math_function;225226if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {227math_function = brw_inst_math_function(devinfo, inst);228} else if (devinfo->ver < 6 &&229brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) {230if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) {231/* src1 must be a descriptor (including the information to determine232* that the SEND is doing an extended math operation), but src0 can233* actually be null since it serves as the source of the implicit GRF234* to MRF move.235*236* If we stop using that functionality, we'll have to revisit this.237*/238return 2;239} else {240/* Send instructions are allowed to have null sources since they use241* the base_mrf field to specify which message register source.242*/243return 0;244}245} else {246assert(desc->nsrc < 4);247return desc->nsrc;248}249250switch (math_function) {251case BRW_MATH_FUNCTION_INV:252case BRW_MATH_FUNCTION_LOG:253case BRW_MATH_FUNCTION_EXP:254case BRW_MATH_FUNCTION_SQRT:255case BRW_MATH_FUNCTION_RSQ:256case BRW_MATH_FUNCTION_SIN:257case BRW_MATH_FUNCTION_COS:258case BRW_MATH_FUNCTION_SINCOS:259case GFX8_MATH_FUNCTION_INVM:260case GFX8_MATH_FUNCTION_RSQRTM:261return 1;262case BRW_MATH_FUNCTION_FDIV:263case BRW_MATH_FUNCTION_POW:264case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:265case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:266case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:267return 2;268default:269unreachable("not reached");270}271}272273static struct string274invalid_values(const struct intel_device_info *devinfo, const brw_inst *inst)275{276unsigned num_sources = num_sources_from_inst(devinfo, inst);277struct string error_msg = { .str = NULL, .len = 0 };278279switch ((enum brw_execution_size) brw_inst_exec_size(devinfo, inst)) {280case BRW_EXECUTE_1:281case BRW_EXECUTE_2:282case BRW_EXECUTE_4:283case BRW_EXECUTE_8:284case BRW_EXECUTE_16:285case BRW_EXECUTE_32:286break;287default:288ERROR("invalid execution size");289break;290}291292if (inst_is_send(devinfo, inst))293return error_msg;294295if (num_sources == 3) {296/* Nothing to test:297* No 3-src instructions on Gfx4-5298* No reg file bits on Gfx6-10 (align16)299* No invalid encodings on Gfx10-12 (align1)300*/301} else {302if (devinfo->ver > 6) {303ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF ||304(num_sources > 0 &&305brw_inst_src0_reg_file(devinfo, inst) == MRF) ||306(num_sources > 1 &&307brw_inst_src1_reg_file(devinfo, inst) == MRF),308"invalid register file encoding");309}310}311312if (error_msg.str)313return error_msg;314315if (num_sources == 3) {316if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {317if (devinfo->ver >= 10) {318ERROR_IF(brw_inst_3src_a1_dst_type (devinfo, inst) == INVALID_REG_TYPE ||319brw_inst_3src_a1_src0_type(devinfo, inst) == INVALID_REG_TYPE ||320brw_inst_3src_a1_src1_type(devinfo, inst) == INVALID_REG_TYPE ||321brw_inst_3src_a1_src2_type(devinfo, inst) == INVALID_REG_TYPE,322"invalid register type encoding");323} else {324ERROR("Align1 mode not allowed on Gen < 10");325}326} else {327ERROR_IF(brw_inst_3src_a16_dst_type(devinfo, inst) == INVALID_REG_TYPE ||328brw_inst_3src_a16_src_type(devinfo, inst) == INVALID_REG_TYPE,329"invalid register type encoding");330}331} else {332ERROR_IF(brw_inst_dst_type (devinfo, inst) == INVALID_REG_TYPE ||333(num_sources > 0 &&334brw_inst_src0_type(devinfo, inst) == INVALID_REG_TYPE) ||335(num_sources > 1 &&336brw_inst_src1_type(devinfo, inst) == INVALID_REG_TYPE),337"invalid register type encoding");338}339340return error_msg;341}342343static struct string344sources_not_null(const struct intel_device_info *devinfo,345const brw_inst *inst)346{347unsigned num_sources = num_sources_from_inst(devinfo, inst);348struct string error_msg = { .str = NULL, .len = 0 };349350/* Nothing to test. 3-src instructions can only have GRF sources, and351* there's no bit to control the file.352*/353if (num_sources == 3)354return (struct string){};355356/* Nothing to test. Split sends can only encode a file in sources that are357* allowed to be NULL.358*/359if (inst_is_split_send(devinfo, inst))360return (struct string){};361362if (num_sources >= 1 && brw_inst_opcode(devinfo, inst) != BRW_OPCODE_SYNC)363ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");364365if (num_sources == 2)366ERROR_IF(src1_is_null(devinfo, inst), "src1 is null");367368return error_msg;369}370371static struct string372alignment_supported(const struct intel_device_info *devinfo,373const brw_inst *inst)374{375struct string error_msg = { .str = NULL, .len = 0 };376377ERROR_IF(devinfo->ver >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16,378"Align16 not supported");379380return error_msg;381}382383static bool384inst_uses_src_acc(const struct intel_device_info *devinfo, const brw_inst *inst)385{386/* Check instructions that use implicit accumulator sources */387switch (brw_inst_opcode(devinfo, inst)) {388case BRW_OPCODE_MAC:389case BRW_OPCODE_MACH:390case BRW_OPCODE_SADA2:391return true;392default:393break;394}395396/* FIXME: support 3-src instructions */397unsigned num_sources = num_sources_from_inst(devinfo, inst);398assert(num_sources < 3);399400return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst));401}402403static struct string404send_restrictions(const struct intel_device_info *devinfo,405const brw_inst *inst)406{407struct string error_msg = { .str = NULL, .len = 0 };408409if (inst_is_split_send(devinfo, inst)) {410ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&411brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL,412"src1 of split send must be a GRF or NULL");413414ERROR_IF(brw_inst_eot(devinfo, inst) &&415brw_inst_src0_da_reg_nr(devinfo, inst) < 112,416"send with EOT must use g112-g127");417ERROR_IF(brw_inst_eot(devinfo, inst) &&418brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE &&419brw_inst_send_src1_reg_nr(devinfo, inst) < 112,420"send with EOT must use g112-g127");421422if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) {423/* Assume minimums if we don't know */424unsigned mlen = 1;425if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) {426const uint32_t desc = brw_inst_send_desc(devinfo, inst);427mlen = brw_message_desc_mlen(devinfo, desc);428}429430unsigned ex_mlen = 1;431if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) {432const uint32_t ex_desc = brw_inst_sends_ex_desc(devinfo, inst);433ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc);434}435const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst);436const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst);437ERROR_IF((src0_reg_nr <= src1_reg_nr &&438src1_reg_nr < src0_reg_nr + mlen) ||439(src1_reg_nr <= src0_reg_nr &&440src0_reg_nr < src1_reg_nr + ex_mlen),441"split send payloads must not overlap");442}443} else if (inst_is_send(devinfo, inst)) {444ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,445"send must use direct addressing");446447if (devinfo->ver >= 7) {448ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != BRW_GENERAL_REGISTER_FILE,449"send from non-GRF");450ERROR_IF(brw_inst_eot(devinfo, inst) &&451brw_inst_src0_da_reg_nr(devinfo, inst) < 112,452"send with EOT must use g112-g127");453}454455if (devinfo->ver >= 8) {456ERROR_IF(!dst_is_null(devinfo, inst) &&457(brw_inst_dst_da_reg_nr(devinfo, inst) +458brw_inst_rlen(devinfo, inst) > 127) &&459(brw_inst_src0_da_reg_nr(devinfo, inst) +460brw_inst_mlen(devinfo, inst) >461brw_inst_dst_da_reg_nr(devinfo, inst)),462"r127 must not be used for return address when there is "463"a src and dest overlap");464}465}466467return error_msg;468}469470static bool471is_unsupported_inst(const struct intel_device_info *devinfo,472const brw_inst *inst)473{474return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_ILLEGAL;475}476477/**478* Returns whether a combination of two types would qualify as mixed float479* operation mode480*/481static inline bool482types_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1)483{484return (t0 == BRW_REGISTER_TYPE_F && t1 == BRW_REGISTER_TYPE_HF) ||485(t1 == BRW_REGISTER_TYPE_F && t0 == BRW_REGISTER_TYPE_HF);486}487488static enum brw_reg_type489execution_type_for_type(enum brw_reg_type type)490{491switch (type) {492case BRW_REGISTER_TYPE_NF:493case BRW_REGISTER_TYPE_DF:494case BRW_REGISTER_TYPE_F:495case BRW_REGISTER_TYPE_HF:496return type;497498case BRW_REGISTER_TYPE_VF:499return BRW_REGISTER_TYPE_F;500501case BRW_REGISTER_TYPE_Q:502case BRW_REGISTER_TYPE_UQ:503return BRW_REGISTER_TYPE_Q;504505case BRW_REGISTER_TYPE_D:506case BRW_REGISTER_TYPE_UD:507return BRW_REGISTER_TYPE_D;508509case BRW_REGISTER_TYPE_W:510case BRW_REGISTER_TYPE_UW:511case BRW_REGISTER_TYPE_B:512case BRW_REGISTER_TYPE_UB:513case BRW_REGISTER_TYPE_V:514case BRW_REGISTER_TYPE_UV:515return BRW_REGISTER_TYPE_W;516}517unreachable("not reached");518}519520/**521* Returns the execution type of an instruction \p inst522*/523static enum brw_reg_type524execution_type(const struct intel_device_info *devinfo, const brw_inst *inst)525{526unsigned num_sources = num_sources_from_inst(devinfo, inst);527enum brw_reg_type src0_exec_type, src1_exec_type;528529/* Execution data type is independent of destination data type, except in530* mixed F/HF instructions.531*/532enum brw_reg_type dst_exec_type = inst_dst_type(devinfo, inst);533534src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst));535if (num_sources == 1) {536if (src0_exec_type == BRW_REGISTER_TYPE_HF)537return dst_exec_type;538return src0_exec_type;539}540541src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst));542if (types_are_mixed_float(src0_exec_type, src1_exec_type) ||543types_are_mixed_float(src0_exec_type, dst_exec_type) ||544types_are_mixed_float(src1_exec_type, dst_exec_type)) {545return BRW_REGISTER_TYPE_F;546}547548if (src0_exec_type == src1_exec_type)549return src0_exec_type;550551if (src0_exec_type == BRW_REGISTER_TYPE_NF ||552src1_exec_type == BRW_REGISTER_TYPE_NF)553return BRW_REGISTER_TYPE_NF;554555/* Mixed operand types where one is float is float on Gen < 6556* (and not allowed on later platforms)557*/558if (devinfo->ver < 6 &&559(src0_exec_type == BRW_REGISTER_TYPE_F ||560src1_exec_type == BRW_REGISTER_TYPE_F))561return BRW_REGISTER_TYPE_F;562563if (src0_exec_type == BRW_REGISTER_TYPE_Q ||564src1_exec_type == BRW_REGISTER_TYPE_Q)565return BRW_REGISTER_TYPE_Q;566567if (src0_exec_type == BRW_REGISTER_TYPE_D ||568src1_exec_type == BRW_REGISTER_TYPE_D)569return BRW_REGISTER_TYPE_D;570571if (src0_exec_type == BRW_REGISTER_TYPE_W ||572src1_exec_type == BRW_REGISTER_TYPE_W)573return BRW_REGISTER_TYPE_W;574575if (src0_exec_type == BRW_REGISTER_TYPE_DF ||576src1_exec_type == BRW_REGISTER_TYPE_DF)577return BRW_REGISTER_TYPE_DF;578579unreachable("not reached");580}581582/**583* Returns whether a region is packed584*585* A region is packed if its elements are adjacent in memory, with no586* intervening space, no overlap, and no replicated values.587*/588static bool589is_packed(unsigned vstride, unsigned width, unsigned hstride)590{591if (vstride == width) {592if (vstride == 1) {593return hstride == 0;594} else {595return hstride == 1;596}597}598599return false;600}601602/**603* Returns whether an instruction is an explicit or implicit conversion604* to/from half-float.605*/606static bool607is_half_float_conversion(const struct intel_device_info *devinfo,608const brw_inst *inst)609{610enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);611612unsigned num_sources = num_sources_from_inst(devinfo, inst);613enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);614615if (dst_type != src0_type &&616(dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) {617return true;618} else if (num_sources > 1) {619enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);620return dst_type != src1_type &&621(dst_type == BRW_REGISTER_TYPE_HF ||622src1_type == BRW_REGISTER_TYPE_HF);623}624625return false;626}627628/*629* Returns whether an instruction is using mixed float operation mode630*/631static bool632is_mixed_float(const struct intel_device_info *devinfo, const brw_inst *inst)633{634if (devinfo->ver < 8)635return false;636637if (inst_is_send(devinfo, inst))638return false;639640unsigned opcode = brw_inst_opcode(devinfo, inst);641const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode);642if (desc->ndst == 0)643return false;644645/* FIXME: support 3-src instructions */646unsigned num_sources = num_sources_from_inst(devinfo, inst);647assert(num_sources < 3);648649enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);650enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);651652if (num_sources == 1)653return types_are_mixed_float(src0_type, dst_type);654655enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);656657return types_are_mixed_float(src0_type, src1_type) ||658types_are_mixed_float(src0_type, dst_type) ||659types_are_mixed_float(src1_type, dst_type);660}661662/**663* Returns whether an instruction is an explicit or implicit conversion664* to/from byte.665*/666static bool667is_byte_conversion(const struct intel_device_info *devinfo,668const brw_inst *inst)669{670enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);671672unsigned num_sources = num_sources_from_inst(devinfo, inst);673enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);674675if (dst_type != src0_type &&676(type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) {677return true;678} else if (num_sources > 1) {679enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);680return dst_type != src1_type &&681(type_sz(dst_type) == 1 || type_sz(src1_type) == 1);682}683684return false;685}686687/**688* Checks restrictions listed in "General Restrictions Based on Operand Types"689* in the "Register Region Restrictions" section.690*/691static struct string692general_restrictions_based_on_operand_types(const struct intel_device_info *devinfo,693const brw_inst *inst)694{695const struct opcode_desc *desc =696brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));697unsigned num_sources = num_sources_from_inst(devinfo, inst);698unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);699struct string error_msg = { .str = NULL, .len = 0 };700701if (inst_is_send(devinfo, inst))702return error_msg;703704if (devinfo->ver >= 11) {705if (num_sources == 3) {706ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 ||707brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1,708"Byte data type is not supported for src1/2 register regioning. This includes "709"byte broadcast as well.");710}711if (num_sources == 2) {712ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1,713"Byte data type is not supported for src1 register regioning. This includes "714"byte broadcast as well.");715}716}717718if (num_sources == 3)719return error_msg;720721if (exec_size == 1)722return error_msg;723724if (desc->ndst == 0)725return error_msg;726727/* The PRMs say:728*729* Where n is the largest element size in bytes for any source or730* destination operand type, ExecSize * n must be <= 64.731*732* But we do not attempt to enforce it, because it is implied by other733* rules:734*735* - that the destination stride must match the execution data type736* - sources may not span more than two adjacent GRF registers737* - destination may not span more than two adjacent GRF registers738*739* In fact, checking it would weaken testing of the other rules.740*/741742unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));743enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);744bool dst_type_is_byte =745inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_B ||746inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_UB;747748if (dst_type_is_byte) {749if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) {750if (!inst_is_raw_move(devinfo, inst))751ERROR("Only raw MOV supports a packed-byte destination");752return error_msg;753}754}755756unsigned exec_type = execution_type(devinfo, inst);757unsigned exec_type_size = brw_reg_type_to_size(exec_type);758unsigned dst_type_size = brw_reg_type_to_size(dst_type);759760/* On IVB/BYT, region parameters and execution size for DF are in terms of761* 32-bit elements, so they are doubled. For evaluating the validity of an762* instruction, we halve them.763*/764if (devinfo->verx10 == 70 &&765exec_type_size == 8 && dst_type_size == 4)766dst_type_size = 8;767768if (is_byte_conversion(devinfo, inst)) {769/* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:770*771* "There is no direct conversion from B/UB to DF or DF to B/UB.772* There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB."773*774* Even if these restrictions are listed for the MOV instruction, we775* validate this more generally, since there is the possibility776* of implicit conversions from other instructions.777*/778enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);779enum brw_reg_type src1_type = num_sources > 1 ?780brw_inst_src1_type(devinfo, inst) : 0;781782ERROR_IF(type_sz(dst_type) == 1 &&783(type_sz(src0_type) == 8 ||784(num_sources > 1 && type_sz(src1_type) == 8)),785"There are no direct conversions between 64-bit types and B/UB");786787ERROR_IF(type_sz(dst_type) == 8 &&788(type_sz(src0_type) == 1 ||789(num_sources > 1 && type_sz(src1_type) == 1)),790"There are no direct conversions between 64-bit types and B/UB");791}792793if (is_half_float_conversion(devinfo, inst)) {794/**795* A helper to validate used in the validation of the following restriction796* from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:797*798* "There is no direct conversion from HF to DF or DF to HF.799* There is no direct conversion from HF to Q/UQ or Q/UQ to HF."800*801* Even if these restrictions are listed for the MOV instruction, we802* validate this more generally, since there is the possibility803* of implicit conversions from other instructions, such us implicit804* conversion from integer to HF with the ADD instruction in SKL+.805*/806enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);807enum brw_reg_type src1_type = num_sources > 1 ?808brw_inst_src1_type(devinfo, inst) : 0;809ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF &&810(type_sz(src0_type) == 8 ||811(num_sources > 1 && type_sz(src1_type) == 8)),812"There are no direct conversions between 64-bit types and HF");813814ERROR_IF(type_sz(dst_type) == 8 &&815(src0_type == BRW_REGISTER_TYPE_HF ||816(num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)),817"There are no direct conversions between 64-bit types and HF");818819/* From the BDW+ PRM:820*821* "Conversion between Integer and HF (Half Float) must be822* DWord-aligned and strided by a DWord on the destination."823*824* Also, the above restrictions seems to be expanded on CHV and SKL+ by:825*826* "There is a relaxed alignment rule for word destinations. When827* the destination type is word (UW, W, HF), destination data types828* can be aligned to either the lowest word or the second lowest829* word of the execution channel. This means the destination data830* words can be either all in the even word locations or all in the831* odd word locations."832*833* We do not implement the second rule as is though, since empirical834* testing shows inconsistencies:835* - It suggests that packed 16-bit is not allowed, which is not true.836* - It suggests that conversions from Q/DF to W (which need to be837* 64-bit aligned on the destination) are not possible, which is838* not true.839*840* So from this rule we only validate the implication that conversions841* from F to HF need to be DWord strided (except in Align1 mixed842* float mode where packed fp16 destination is allowed so long as the843* destination is oword-aligned).844*845* Finally, we only validate this for Align1 because Align16 always846* requires packed destinations, so these restrictions can't possibly847* apply to Align16 mode.848*/849if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {850if ((dst_type == BRW_REGISTER_TYPE_HF &&851(brw_reg_type_is_integer(src0_type) ||852(num_sources > 1 && brw_reg_type_is_integer(src1_type)))) ||853(brw_reg_type_is_integer(dst_type) &&854(src0_type == BRW_REGISTER_TYPE_HF ||855(num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) {856ERROR_IF(dst_stride * dst_type_size != 4,857"Conversions between integer and half-float must be "858"strided by a DWord on the destination");859860unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);861ERROR_IF(subreg % 4 != 0,862"Conversions between integer and half-float must be "863"aligned to a DWord on the destination");864} else if ((devinfo->is_cherryview || devinfo->ver >= 9) &&865dst_type == BRW_REGISTER_TYPE_HF) {866unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);867ERROR_IF(dst_stride != 2 &&868!(is_mixed_float(devinfo, inst) &&869dst_stride == 1 && subreg % 16 == 0),870"Conversions to HF must have either all words in even "871"word locations or all words in odd word locations or "872"be mixed-float with Oword-aligned packed destination");873}874}875}876877/* There are special regioning rules for mixed-float mode in CHV and SKL that878* override the general rule for the ratio of sizes of the destination type879* and the execution type. We will add validation for those in a later patch.880*/881bool validate_dst_size_and_exec_size_ratio =882!is_mixed_float(devinfo, inst) ||883!(devinfo->is_cherryview || devinfo->ver >= 9);884885if (validate_dst_size_and_exec_size_ratio &&886exec_type_size > dst_type_size) {887if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) {888ERROR_IF(dst_stride * dst_type_size != exec_type_size,889"Destination stride must be equal to the ratio of the sizes "890"of the execution data type to the destination type");891}892893unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);894895if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&896brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) {897/* The i965 PRM says:898*899* Implementation Restriction: The relaxed alignment rule for byte900* destination (#10.5) is not supported.901*/902if ((devinfo->ver > 4 || devinfo->is_g4x) && dst_type_is_byte) {903ERROR_IF(subreg % exec_type_size != 0 &&904subreg % exec_type_size != 1,905"Destination subreg must be aligned to the size of the "906"execution data type (or to the next lowest byte for byte "907"destinations)");908} else {909ERROR_IF(subreg % exec_type_size != 0,910"Destination subreg must be aligned to the size of the "911"execution data type");912}913}914}915916return error_msg;917}918919/**920* Checks restrictions listed in "General Restrictions on Regioning Parameters"921* in the "Register Region Restrictions" section.922*/923static struct string924general_restrictions_on_region_parameters(const struct intel_device_info *devinfo,925const brw_inst *inst)926{927const struct opcode_desc *desc =928brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));929unsigned num_sources = num_sources_from_inst(devinfo, inst);930unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);931struct string error_msg = { .str = NULL, .len = 0 };932933if (num_sources == 3)934return (struct string){};935936/* Split sends don't have the bits in the instruction to encode regions so937* there's nothing to check.938*/939if (inst_is_split_send(devinfo, inst))940return (struct string){};941942if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) {943if (desc->ndst != 0 && !dst_is_null(devinfo, inst))944ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1,945"Destination Horizontal Stride must be 1");946947if (num_sources >= 1) {948if (devinfo->verx10 >= 75) {949ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&950brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&951brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&952brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,953"In Align16 mode, only VertStride of 0, 2, or 4 is allowed");954} else {955ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&956brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&957brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,958"In Align16 mode, only VertStride of 0 or 4 is allowed");959}960}961962if (num_sources == 2) {963if (devinfo->verx10 >= 75) {964ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&965brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&966brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&967brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,968"In Align16 mode, only VertStride of 0, 2, or 4 is allowed");969} else {970ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE &&971brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&972brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,973"In Align16 mode, only VertStride of 0 or 4 is allowed");974}975}976977return error_msg;978}979980for (unsigned i = 0; i < num_sources; i++) {981unsigned vstride, width, hstride, element_size, subreg;982enum brw_reg_type type;983984#define DO_SRC(n) \985if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \986BRW_IMMEDIATE_VALUE) \987continue; \988\989vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \990width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \991hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \992type = brw_inst_src ## n ## _type(devinfo, inst); \993element_size = brw_reg_type_to_size(type); \994subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst)995996if (i == 0) {997DO_SRC(0);998} else {999DO_SRC(1);1000}1001#undef DO_SRC10021003/* On IVB/BYT, region parameters and execution size for DF are in terms of1004* 32-bit elements, so they are doubled. For evaluating the validity of an1005* instruction, we halve them.1006*/1007if (devinfo->verx10 == 70 &&1008element_size == 8)1009element_size = 4;10101011/* ExecSize must be greater than or equal to Width. */1012ERROR_IF(exec_size < width, "ExecSize must be greater than or equal "1013"to Width");10141015/* If ExecSize = Width and HorzStride ≠ 0,1016* VertStride must be set to Width * HorzStride.1017*/1018if (exec_size == width && hstride != 0) {1019ERROR_IF(vstride != width * hstride,1020"If ExecSize = Width and HorzStride ≠ 0, "1021"VertStride must be set to Width * HorzStride");1022}10231024/* If Width = 1, HorzStride must be 0 regardless of the values of1025* ExecSize and VertStride.1026*/1027if (width == 1) {1028ERROR_IF(hstride != 0,1029"If Width = 1, HorzStride must be 0 regardless "1030"of the values of ExecSize and VertStride");1031}10321033/* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */1034if (exec_size == 1 && width == 1) {1035ERROR_IF(vstride != 0 || hstride != 0,1036"If ExecSize = Width = 1, both VertStride "1037"and HorzStride must be 0");1038}10391040/* If VertStride = HorzStride = 0, Width must be 1 regardless of the1041* value of ExecSize.1042*/1043if (vstride == 0 && hstride == 0) {1044ERROR_IF(width != 1,1045"If VertStride = HorzStride = 0, Width must be "1046"1 regardless of the value of ExecSize");1047}10481049/* VertStride must be used to cross GRF register boundaries. This rule1050* implies that elements within a 'Width' cannot cross GRF boundaries.1051*/1052const uint64_t mask = (1ULL << element_size) - 1;1053unsigned rowbase = subreg;10541055for (int y = 0; y < exec_size / width; y++) {1056uint64_t access_mask = 0;1057unsigned offset = rowbase;10581059for (int x = 0; x < width; x++) {1060access_mask |= mask << (offset % 64);1061offset += hstride * element_size;1062}10631064rowbase += vstride * element_size;10651066if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) {1067ERROR("VertStride must be used to cross GRF register boundaries");1068break;1069}1070}1071}10721073/* Dst.HorzStride must not be 0. */1074if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) {1075ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0,1076"Destination Horizontal Stride must not be 0");1077}10781079return error_msg;1080}10811082static struct string1083special_restrictions_for_mixed_float_mode(const struct intel_device_info *devinfo,1084const brw_inst *inst)1085{1086struct string error_msg = { .str = NULL, .len = 0 };10871088const unsigned opcode = brw_inst_opcode(devinfo, inst);1089const unsigned num_sources = num_sources_from_inst(devinfo, inst);1090if (num_sources >= 3)1091return error_msg;10921093if (!is_mixed_float(devinfo, inst))1094return error_msg;10951096unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);1097bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;10981099enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);1100enum brw_reg_type src1_type = num_sources > 1 ?1101brw_inst_src1_type(devinfo, inst) : 0;1102enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);11031104unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));1105bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride);11061107/* From the SKL PRM, Special Restrictions for Handling Mixed Mode1108* Float Operations:1109*1110* "Indirect addressing on source is not supported when source and1111* destination data types are mixed float."1112*/1113ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT ||1114(num_sources > 1 &&1115brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT),1116"Indirect addressing on source is not supported when source and "1117"destination data types are mixed float");11181119/* From the SKL PRM, Special Restrictions for Handling Mixed Mode1120* Float Operations:1121*1122* "No SIMD16 in mixed mode when destination is f32. Instruction1123* execution size must be no more than 8."1124*/1125ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F,1126"Mixed float mode with 32-bit float destination is limited "1127"to SIMD8");11281129if (is_align16) {1130/* From the SKL PRM, Special Restrictions for Handling Mixed Mode1131* Float Operations:1132*1133* "In Align16 mode, when half float and float data types are mixed1134* between source operands OR between source and destination operands,1135* the register content are assumed to be packed."1136*1137* Since Align16 doesn't have a concept of horizontal stride (or width),1138* it means that vertical stride must always be 4, since 0 and 2 would1139* lead to replicated data, and any other value is disallowed in Align16.1140*/1141ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,1142"Align16 mixed float mode assumes packed data (vstride must be 4");11431144ERROR_IF(num_sources >= 2 &&1145brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,1146"Align16 mixed float mode assumes packed data (vstride must be 4");11471148/* From the SKL PRM, Special Restrictions for Handling Mixed Mode1149* Float Operations:1150*1151* "For Align16 mixed mode, both input and output packed f16 data1152* must be oword aligned, no oword crossing in packed f16."1153*1154* The previous rule requires that Align16 operands are always packed,1155* and since there is only one bit for Align16 subnr, which represents1156* offsets 0B and 16B, this rule is always enforced and we don't need to1157* validate it.1158*/11591160/* From the SKL PRM, Special Restrictions for Handling Mixed Mode1161* Float Operations:1162*1163* "No SIMD16 in mixed mode when destination is packed f16 for both1164* Align1 and Align16."1165*1166* And:1167*1168* "In Align16 mode, when half float and float data types are mixed1169* between source operands OR between source and destination operands,1170* the register content are assumed to be packed."1171*1172* Which implies that SIMD16 is not available in Align16. This is further1173* confirmed by:1174*1175* "For Align16 mixed mode, both input and output packed f16 data1176* must be oword aligned, no oword crossing in packed f16"1177*1178* Since oword-aligned packed f16 data would cross oword boundaries when1179* the execution size is larger than 8.1180*/1181ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8");11821183/* From the SKL PRM, Special Restrictions for Handling Mixed Mode1184* Float Operations:1185*1186* "No accumulator read access for Align16 mixed float."1187*/1188ERROR_IF(inst_uses_src_acc(devinfo, inst),1189"No accumulator read access for Align16 mixed float");1190} else {1191assert(!is_align16);11921193/* From the SKL PRM, Special Restrictions for Handling Mixed Mode1194* Float Operations:1195*1196* "No SIMD16 in mixed mode when destination is packed f16 for both1197* Align1 and Align16."1198*/1199ERROR_IF(exec_size > 8 && dst_is_packed &&1200dst_type == BRW_REGISTER_TYPE_HF,1201"Align1 mixed float mode is limited to SIMD8 when destination "1202"is packed half-float");12031204/* From the SKL PRM, Special Restrictions for Handling Mixed Mode1205* Float Operations:1206*1207* "Math operations for mixed mode:1208* - In Align1, f16 inputs need to be strided"1209*/1210if (opcode == BRW_OPCODE_MATH) {1211if (src0_type == BRW_REGISTER_TYPE_HF) {1212ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1,1213"Align1 mixed mode math needs strided half-float inputs");1214}12151216if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) {1217ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1,1218"Align1 mixed mode math needs strided half-float inputs");1219}1220}12211222if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) {1223/* From the SKL PRM, Special Restrictions for Handling Mixed Mode1224* Float Operations:1225*1226* "In Align1, destination stride can be smaller than execution1227* type. When destination is stride of 1, 16 bit packed data is1228* updated on the destination. However, output packed f16 data1229* must be oword aligned, no oword crossing in packed f16."1230*1231* The requirement of not crossing oword boundaries for 16-bit oword1232* aligned data means that execution size is limited to 8.1233*/1234unsigned subreg;1235if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT)1236subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);1237else1238subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst);1239ERROR_IF(subreg % 16 != 0,1240"Align1 mixed mode packed half-float output must be "1241"oword aligned");1242ERROR_IF(exec_size > 8,1243"Align1 mixed mode packed half-float output must not "1244"cross oword boundaries (max exec size is 8)");12451246/* From the SKL PRM, Special Restrictions for Handling Mixed Mode1247* Float Operations:1248*1249* "When source is float or half float from accumulator register and1250* destination is half float with a stride of 1, the source must1251* register aligned. i.e., source must have offset zero."1252*1253* Align16 mixed float mode doesn't allow accumulator access on sources,1254* so we only need to check this for Align1.1255*/1256if (src0_is_acc(devinfo, inst) &&1257(src0_type == BRW_REGISTER_TYPE_F ||1258src0_type == BRW_REGISTER_TYPE_HF)) {1259ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0,1260"Mixed float mode requires register-aligned accumulator "1261"source reads when destination is packed half-float");12621263}12641265if (num_sources > 1 &&1266src1_is_acc(devinfo, inst) &&1267(src1_type == BRW_REGISTER_TYPE_F ||1268src1_type == BRW_REGISTER_TYPE_HF)) {1269ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0,1270"Mixed float mode requires register-aligned accumulator "1271"source reads when destination is packed half-float");1272}1273}12741275/* From the SKL PRM, Special Restrictions for Handling Mixed Mode1276* Float Operations:1277*1278* "No swizzle is allowed when an accumulator is used as an implicit1279* source or an explicit source in an instruction. i.e. when1280* destination is half float with an implicit accumulator source,1281* destination stride needs to be 2."1282*1283* FIXME: it is not quite clear what the first sentence actually means1284* or its link to the implication described after it, so we only1285* validate the explicit implication, which is clearly described.1286*/1287if (dst_type == BRW_REGISTER_TYPE_HF &&1288inst_uses_src_acc(devinfo, inst)) {1289ERROR_IF(dst_stride != 2,1290"Mixed float mode with implicit/explicit accumulator "1291"source and half-float destination requires a stride "1292"of 2 on the destination");1293}1294}12951296return error_msg;1297}12981299/**1300* Creates an \p access_mask for an \p exec_size, \p element_size, and a region1301*1302* An \p access_mask is a 32-element array of uint64_t, where each uint64_t is1303* a bitmask of bytes accessed by the region.1304*1305* For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 41306* instruction would be1307*1308* access_mask[0] = 0x00000000000000F01309* access_mask[1] = 0x000000000000F0001310* access_mask[2] = 0x0000000000F000001311* access_mask[3] = 0x00000000F00000001312* access_mask[4-31] = 01313*1314* because the first execution channel accesses bytes 7-4 and the second1315* execution channel accesses bytes 15-12, etc.1316*/1317static void1318align1_access_mask(uint64_t access_mask[static 32],1319unsigned exec_size, unsigned element_size, unsigned subreg,1320unsigned vstride, unsigned width, unsigned hstride)1321{1322const uint64_t mask = (1ULL << element_size) - 1;1323unsigned rowbase = subreg;1324unsigned element = 0;13251326for (int y = 0; y < exec_size / width; y++) {1327unsigned offset = rowbase;13281329for (int x = 0; x < width; x++) {1330access_mask[element++] = mask << (offset % 64);1331offset += hstride * element_size;1332}13331334rowbase += vstride * element_size;1335}13361337assert(element == 0 || element == exec_size);1338}13391340/**1341* Returns the number of registers accessed according to the \p access_mask1342*/1343static int1344registers_read(const uint64_t access_mask[static 32])1345{1346int regs_read = 0;13471348for (unsigned i = 0; i < 32; i++) {1349if (access_mask[i] > 0xFFFFFFFF) {1350return 2;1351} else if (access_mask[i]) {1352regs_read = 1;1353}1354}13551356return regs_read;1357}13581359/**1360* Checks restrictions listed in "Region Alignment Rules" in the "Register1361* Region Restrictions" section.1362*/1363static struct string1364region_alignment_rules(const struct intel_device_info *devinfo,1365const brw_inst *inst)1366{1367const struct opcode_desc *desc =1368brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));1369unsigned num_sources = num_sources_from_inst(devinfo, inst);1370unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);1371uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32];1372struct string error_msg = { .str = NULL, .len = 0 };13731374if (num_sources == 3)1375return (struct string){};13761377if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16)1378return (struct string){};13791380if (inst_is_send(devinfo, inst))1381return (struct string){};13821383memset(dst_access_mask, 0, sizeof(dst_access_mask));1384memset(src0_access_mask, 0, sizeof(src0_access_mask));1385memset(src1_access_mask, 0, sizeof(src1_access_mask));13861387for (unsigned i = 0; i < num_sources; i++) {1388unsigned vstride, width, hstride, element_size, subreg;1389enum brw_reg_type type;13901391/* In Direct Addressing mode, a source cannot span more than 2 adjacent1392* GRF registers.1393*/13941395#define DO_SRC(n) \1396if (brw_inst_src ## n ## _address_mode(devinfo, inst) != \1397BRW_ADDRESS_DIRECT) \1398continue; \1399\1400if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \1401BRW_IMMEDIATE_VALUE) \1402continue; \1403\1404vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \1405width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \1406hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \1407type = brw_inst_src ## n ## _type(devinfo, inst); \1408element_size = brw_reg_type_to_size(type); \1409subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \1410align1_access_mask(src ## n ## _access_mask, \1411exec_size, element_size, subreg, \1412vstride, width, hstride)14131414if (i == 0) {1415DO_SRC(0);1416} else {1417DO_SRC(1);1418}1419#undef DO_SRC14201421unsigned num_vstride = exec_size / width;1422unsigned num_hstride = width;1423unsigned vstride_elements = (num_vstride - 1) * vstride;1424unsigned hstride_elements = (num_hstride - 1) * hstride;1425unsigned offset = (vstride_elements + hstride_elements) * element_size +1426subreg;1427ERROR_IF(offset >= 64,1428"A source cannot span more than 2 adjacent GRF registers");1429}14301431if (desc->ndst == 0 || dst_is_null(devinfo, inst))1432return error_msg;14331434unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));1435enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);1436unsigned element_size = brw_reg_type_to_size(dst_type);1437unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);1438unsigned offset = ((exec_size - 1) * stride * element_size) + subreg;1439ERROR_IF(offset >= 64,1440"A destination cannot span more than 2 adjacent GRF registers");14411442if (error_msg.str)1443return error_msg;14441445/* On IVB/BYT, region parameters and execution size for DF are in terms of1446* 32-bit elements, so they are doubled. For evaluating the validity of an1447* instruction, we halve them.1448*/1449if (devinfo->verx10 == 70 &&1450element_size == 8)1451element_size = 4;14521453align1_access_mask(dst_access_mask, exec_size, element_size, subreg,1454exec_size == 1 ? 0 : exec_size * stride,1455exec_size == 1 ? 1 : exec_size,1456exec_size == 1 ? 0 : stride);14571458unsigned dst_regs = registers_read(dst_access_mask);1459unsigned src0_regs = registers_read(src0_access_mask);1460unsigned src1_regs = registers_read(src1_access_mask);14611462/* The SNB, IVB, HSW, BDW, and CHV PRMs say:1463*1464* When an instruction has a source region spanning two registers and a1465* destination region contained in one register, the number of elements1466* must be the same between two sources and one of the following must be1467* true:1468*1469* 1. The destination region is entirely contained in the lower OWord1470* of a register.1471* 2. The destination region is entirely contained in the upper OWord1472* of a register.1473* 3. The destination elements are evenly split between the two OWords1474* of a register.1475*/1476if (devinfo->ver <= 8) {1477if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) {1478unsigned upper_oword_writes = 0, lower_oword_writes = 0;14791480for (unsigned i = 0; i < exec_size; i++) {1481if (dst_access_mask[i] > 0x0000FFFF) {1482upper_oword_writes++;1483} else {1484assert(dst_access_mask[i] != 0);1485lower_oword_writes++;1486}1487}14881489ERROR_IF(lower_oword_writes != 0 &&1490upper_oword_writes != 0 &&1491upper_oword_writes != lower_oword_writes,1492"Writes must be to only one OWord or "1493"evenly split between OWords");1494}1495}14961497/* The IVB and HSW PRMs say:1498*1499* When an instruction has a source region that spans two registers and1500* the destination spans two registers, the destination elements must be1501* evenly split between the two registers [...]1502*1503* The SNB PRM contains similar wording (but written in a much more1504* confusing manner).1505*1506* The BDW PRM says:1507*1508* When destination spans two registers, the source may be one or two1509* registers. The destination elements must be evenly split between the1510* two registers.1511*1512* The SKL PRM says:1513*1514* When destination of MATH instruction spans two registers, the1515* destination elements must be evenly split between the two registers.1516*1517* It is not known whether this restriction applies to KBL other Gens after1518* SKL.1519*/1520if (devinfo->ver <= 8 ||1521brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {15221523/* Nothing explicitly states that on Gen < 8 elements must be evenly1524* split between two destination registers in the two exceptional1525* source-region-spans-one-register cases, but since Broadwell requires1526* evenly split writes regardless of source region, we assume that it was1527* an oversight and require it.1528*/1529if (dst_regs == 2) {1530unsigned upper_reg_writes = 0, lower_reg_writes = 0;15311532for (unsigned i = 0; i < exec_size; i++) {1533if (dst_access_mask[i] > 0xFFFFFFFF) {1534upper_reg_writes++;1535} else {1536assert(dst_access_mask[i] != 0);1537lower_reg_writes++;1538}1539}15401541ERROR_IF(upper_reg_writes != lower_reg_writes,1542"Writes must be evenly split between the two "1543"destination registers");1544}1545}15461547/* The IVB and HSW PRMs say:1548*1549* When an instruction has a source region that spans two registers and1550* the destination spans two registers, the destination elements must be1551* evenly split between the two registers and each destination register1552* must be entirely derived from one source register.1553*1554* Note: In such cases, the regioning parameters must ensure that the1555* offset from the two source registers is the same.1556*1557* The SNB PRM contains similar wording (but written in a much more1558* confusing manner).1559*1560* There are effectively three rules stated here:1561*1562* For an instruction with a source and a destination spanning two1563* registers,1564*1565* (1) destination elements must be evenly split between the two1566* registers1567* (2) all destination elements in a register must be derived1568* from one source register1569* (3) the offset (i.e. the starting location in each of the two1570* registers spanned by a region) must be the same in the two1571* registers spanned by a region1572*1573* It is impossible to violate rule (1) without violating (2) or (3), so we1574* do not attempt to validate it.1575*/1576if (devinfo->ver <= 7 && dst_regs == 2) {1577for (unsigned i = 0; i < num_sources; i++) {1578#define DO_SRC(n) \1579if (src ## n ## _regs <= 1) \1580continue; \1581\1582for (unsigned i = 0; i < exec_size; i++) { \1583if ((dst_access_mask[i] > 0xFFFFFFFF) != \1584(src ## n ## _access_mask[i] > 0xFFFFFFFF)) { \1585ERROR("Each destination register must be entirely derived " \1586"from one source register"); \1587break; \1588} \1589} \1590\1591unsigned offset_0 = \1592brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \1593unsigned offset_1 = offset_0; \1594\1595for (unsigned i = 0; i < exec_size; i++) { \1596if (src ## n ## _access_mask[i] > 0xFFFFFFFF) { \1597offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32; \1598break; \1599} \1600} \1601\1602ERROR_IF(num_sources == 2 && offset_0 != offset_1, \1603"The offset from the two source registers " \1604"must be the same")16051606if (i == 0) {1607DO_SRC(0);1608} else {1609DO_SRC(1);1610}1611#undef DO_SRC1612}1613}16141615/* The IVB and HSW PRMs say:1616*1617* When destination spans two registers, the source MUST span two1618* registers. The exception to the above rule:1619* 1. When source is scalar, the source registers are not1620* incremented.1621* 2. When source is packed integer Word and destination is packed1622* integer DWord, the source register is not incremented by the1623* source sub register is incremented.1624*1625* The SNB PRM does not contain this rule, but the internal documentation1626* indicates that it applies to SNB as well. We assume that the rule applies1627* to Gen <= 5 although their PRMs do not state it.1628*1629* While the documentation explicitly says in exception (2) that the1630* destination must be an integer DWord, the hardware allows at least a1631* float destination type as well. We emit such instructions from1632*1633* fs_visitor::emit_interpolation_setup_gfx61634* fs_visitor::emit_fragcoord_interpolation1635*1636* and have for years with no ill effects.1637*1638* Additionally the simulator source code indicates that the real condition1639* is that the size of the destination type is 4 bytes.1640*/1641if (devinfo->ver <= 7 && dst_regs == 2) {1642enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);1643bool dst_is_packed_dword =1644is_packed(exec_size * stride, exec_size, stride) &&1645brw_reg_type_to_size(dst_type) == 4;16461647for (unsigned i = 0; i < num_sources; i++) {1648#define DO_SRC(n) \1649unsigned vstride, width, hstride; \1650vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \1651width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \1652hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \1653bool src ## n ## _is_packed_word = \1654is_packed(vstride, width, hstride) && \1655(brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W || \1656brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW); \1657\1658ERROR_IF(src ## n ## _regs == 1 && \1659!src ## n ## _has_scalar_region(devinfo, inst) && \1660!(dst_is_packed_dword && src ## n ## _is_packed_word), \1661"When the destination spans two registers, the source must " \1662"span two registers\n" ERROR_INDENT "(exceptions for scalar " \1663"source and packed-word to packed-dword expansion)")16641665if (i == 0) {1666DO_SRC(0);1667} else {1668DO_SRC(1);1669}1670#undef DO_SRC1671}1672}16731674return error_msg;1675}16761677static struct string1678vector_immediate_restrictions(const struct intel_device_info *devinfo,1679const brw_inst *inst)1680{1681unsigned num_sources = num_sources_from_inst(devinfo, inst);1682struct string error_msg = { .str = NULL, .len = 0 };16831684if (num_sources == 3 || num_sources == 0)1685return (struct string){};16861687unsigned file = num_sources == 1 ?1688brw_inst_src0_reg_file(devinfo, inst) :1689brw_inst_src1_reg_file(devinfo, inst);1690if (file != BRW_IMMEDIATE_VALUE)1691return (struct string){};16921693enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);1694unsigned dst_type_size = brw_reg_type_to_size(dst_type);1695unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ?1696brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0;1697unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));1698enum brw_reg_type type = num_sources == 1 ?1699brw_inst_src0_type(devinfo, inst) :1700brw_inst_src1_type(devinfo, inst);17011702/* The PRMs say:1703*1704* When an immediate vector is used in an instruction, the destination1705* must be 128-bit aligned with destination horizontal stride equivalent1706* to a word for an immediate integer vector (v) and equivalent to a1707* DWord for an immediate float vector (vf).1708*1709* The text has not been updated for the addition of the immediate unsigned1710* integer vector type (uv) on SNB, but presumably the same restriction1711* applies.1712*/1713switch (type) {1714case BRW_REGISTER_TYPE_V:1715case BRW_REGISTER_TYPE_UV:1716case BRW_REGISTER_TYPE_VF:1717ERROR_IF(dst_subreg % (128 / 8) != 0,1718"Destination must be 128-bit aligned in order to use immediate "1719"vector types");17201721if (type == BRW_REGISTER_TYPE_VF) {1722ERROR_IF(dst_type_size * dst_stride != 4,1723"Destination must have stride equivalent to dword in order "1724"to use the VF type");1725} else {1726ERROR_IF(dst_type_size * dst_stride != 2,1727"Destination must have stride equivalent to word in order "1728"to use the V or UV type");1729}1730break;1731default:1732break;1733}17341735return error_msg;1736}17371738static struct string1739special_requirements_for_handling_double_precision_data_types(1740const struct intel_device_info *devinfo,1741const brw_inst *inst)1742{1743unsigned num_sources = num_sources_from_inst(devinfo, inst);1744struct string error_msg = { .str = NULL, .len = 0 };17451746if (num_sources == 3 || num_sources == 0)1747return (struct string){};17481749/* Split sends don't have types so there's no doubles there. */1750if (inst_is_split_send(devinfo, inst))1751return (struct string){};17521753enum brw_reg_type exec_type = execution_type(devinfo, inst);1754unsigned exec_type_size = brw_reg_type_to_size(exec_type);17551756enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst);1757enum brw_reg_type dst_type = inst_dst_type(devinfo, inst);1758unsigned dst_type_size = brw_reg_type_to_size(dst_type);1759unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst));1760unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst);1761unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);1762unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst);17631764bool is_integer_dword_multiply =1765devinfo->ver >= 8 &&1766brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL &&1767(brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||1768brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) &&1769(brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||1770brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD);17711772const bool is_double_precision =1773dst_type_size == 8 || exec_type_size == 8 || is_integer_dword_multiply;17741775for (unsigned i = 0; i < num_sources; i++) {1776unsigned vstride, width, hstride, type_size, reg, subreg, address_mode;1777bool is_scalar_region;1778enum brw_reg_file file;1779enum brw_reg_type type;17801781#define DO_SRC(n) \1782if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \1783BRW_IMMEDIATE_VALUE) \1784continue; \1785\1786is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst); \1787vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \1788width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \1789hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \1790file = brw_inst_src ## n ## _reg_file(devinfo, inst); \1791type = brw_inst_src ## n ## _type(devinfo, inst); \1792type_size = brw_reg_type_to_size(type); \1793reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst); \1794subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \1795address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst)17961797if (i == 0) {1798DO_SRC(0);1799} else {1800DO_SRC(1);1801}1802#undef DO_SRC18031804const unsigned src_stride = hstride * type_size;1805const unsigned dst_stride = dst_hstride * dst_type_size;18061807/* The PRMs say that for CHV, BXT:1808*1809* When source or destination datatype is 64b or operation is integer1810* DWord multiply, regioning in Align1 must follow these rules:1811*1812* 1. Source and Destination horizontal stride must be aligned to the1813* same qword.1814* 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.1815* 3. Source and Destination offset must be the same, except the case1816* of scalar source.1817*1818* We assume that the restriction applies to GLK as well.1819*/1820if (is_double_precision &&1821brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&1822(devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) {1823ERROR_IF(!is_scalar_region &&1824(src_stride % 8 != 0 ||1825dst_stride % 8 != 0 ||1826src_stride != dst_stride),1827"Source and destination horizontal stride must equal and a "1828"multiple of a qword when the execution type is 64-bit");18291830ERROR_IF(vstride != width * hstride,1831"Vstride must be Width * Hstride when the execution type is "1832"64-bit");18331834ERROR_IF(!is_scalar_region && dst_subreg != subreg,1835"Source and destination offset must be the same when the "1836"execution type is 64-bit");1837}18381839/* The PRMs say that for CHV, BXT:1840*1841* When source or destination datatype is 64b or operation is integer1842* DWord multiply, indirect addressing must not be used.1843*1844* We assume that the restriction applies to GLK as well.1845*/1846if (is_double_precision &&1847(devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) {1848ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode ||1849BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode,1850"Indirect addressing is not allowed when the execution type "1851"is 64-bit");1852}18531854/* The PRMs say that for CHV, BXT:1855*1856* ARF registers must never be used with 64b datatype or when1857* operation is integer DWord multiply.1858*1859* We assume that the restriction applies to GLK as well.1860*1861* We assume that the restriction does not apply to the null register.1862*/1863if (is_double_precision &&1864(devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) {1865ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MAC ||1866brw_inst_acc_wr_control(devinfo, inst) ||1867(BRW_ARCHITECTURE_REGISTER_FILE == file &&1868reg != BRW_ARF_NULL) ||1869(BRW_ARCHITECTURE_REGISTER_FILE == dst_file &&1870dst_reg != BRW_ARF_NULL),1871"Architecture registers cannot be used when the execution "1872"type is 64-bit");1873}18741875/* From the hardware spec section "Register Region Restrictions":1876*1877* "In case where source or destination datatype is 64b or operation is1878* integer DWord multiply [or in case where a floating point data type1879* is used as destination]:1880*1881* 1. Register Regioning patterns where register data bit locations1882* are changed between source and destination are not supported on1883* Src0 and Src1 except for broadcast of a scalar.1884*1885* 2. Explicit ARF registers except null and accumulator must not be1886* used."1887*/1888if (devinfo->verx10 >= 125 &&1889(brw_reg_type_is_floating_point(dst_type) ||1890is_double_precision)) {1891ERROR_IF(!is_scalar_region &&1892(vstride != width * hstride ||1893src_stride != dst_stride ||1894subreg != dst_subreg),1895"Register Regioning patterns where register data bit "1896"locations are changed between source and destination are not "1897"supported except for broadcast of a scalar.");18981899ERROR_IF((file == BRW_ARCHITECTURE_REGISTER_FILE &&1900reg != BRW_ARF_NULL && !(reg >= BRW_ARF_ACCUMULATOR && reg < BRW_ARF_FLAG)) ||1901(dst_file == BRW_ARCHITECTURE_REGISTER_FILE &&1902dst_reg != BRW_ARF_NULL && dst_reg != BRW_ARF_ACCUMULATOR),1903"Explicit ARF registers except null and accumulator must not "1904"be used.");1905}19061907/* From the hardware spec section "Register Region Restrictions":1908*1909* "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float and1910* Quad-Word data must not be used."1911*/1912if (devinfo->verx10 >= 125 &&1913(brw_reg_type_is_floating_point(type) || type_sz(type) == 8)) {1914ERROR_IF(address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER &&1915vstride == BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL,1916"Vx1 and VxH indirect addressing for Float, Half-Float, "1917"Double-Float and Quad-Word data must not be used");1918}1919}19201921/* The PRMs say that for BDW, SKL:1922*1923* If Align16 is required for an operation with QW destination and non-QW1924* source datatypes, the execution size cannot exceed 2.1925*1926* We assume that the restriction applies to all Gfx8+ parts.1927*/1928if (is_double_precision && devinfo->ver >= 8) {1929enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);1930enum brw_reg_type src1_type =1931num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type;1932unsigned src0_type_size = brw_reg_type_to_size(src0_type);1933unsigned src1_type_size = brw_reg_type_to_size(src1_type);19341935ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 &&1936dst_type_size == 8 &&1937(src0_type_size != 8 || src1_type_size != 8) &&1938brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2,1939"In Align16 exec size cannot exceed 2 with a QWord destination "1940"and a non-QWord source");1941}19421943/* The PRMs say that for CHV, BXT:1944*1945* When source or destination datatype is 64b or operation is integer1946* DWord multiply, DepCtrl must not be used.1947*1948* We assume that the restriction applies to GLK as well.1949*/1950if (is_double_precision &&1951(devinfo->is_cherryview || intel_device_info_is_9lp(devinfo))) {1952ERROR_IF(brw_inst_no_dd_check(devinfo, inst) ||1953brw_inst_no_dd_clear(devinfo, inst),1954"DepCtrl is not allowed when the execution type is 64-bit");1955}19561957return error_msg;1958}19591960static struct string1961instruction_restrictions(const struct intel_device_info *devinfo,1962const brw_inst *inst)1963{1964struct string error_msg = { .str = NULL, .len = 0 };19651966/* From Wa_1604601757:1967*1968* "When multiplying a DW and any lower precision integer, source modifier1969* is not supported."1970*/1971if (devinfo->ver >= 12 &&1972brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL) {1973enum brw_reg_type exec_type = execution_type(devinfo, inst);1974const bool src0_valid = type_sz(brw_inst_src0_type(devinfo, inst)) == 4 ||1975brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE ||1976!(brw_inst_src0_negate(devinfo, inst) ||1977brw_inst_src0_abs(devinfo, inst));1978const bool src1_valid = type_sz(brw_inst_src1_type(devinfo, inst)) == 4 ||1979brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE ||1980!(brw_inst_src1_negate(devinfo, inst) ||1981brw_inst_src1_abs(devinfo, inst));19821983ERROR_IF(!brw_reg_type_is_floating_point(exec_type) &&1984type_sz(exec_type) == 4 && !(src0_valid && src1_valid),1985"When multiplying a DW and any lower precision integer, source "1986"modifier is not supported.");1987}19881989if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CMP ||1990brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CMPN) {1991if (devinfo->ver <= 7) {1992/* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit1993* ISA) says:1994*1995* Accumulator cannot be destination, implicit or explicit. The1996* destination must be a general register or the null register.1997*1998* Page 77 of the Haswell PRM Volume 2b contains the same text. The1999* 965G PRMs contain similar text.2000*2001* Page 864 (page 880 of the PDF) of the Broadwell PRM Volume 7 says:2002*2003* For the cmp and cmpn instructions, remove the accumulator2004* restrictions.2005*/2006ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&2007brw_inst_dst_da_reg_nr(devinfo, inst) != BRW_ARF_NULL,2008"Accumulator cannot be destination, implicit or explicit.");2009}20102011/* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA)2012* says:2013*2014* If the destination is the null register, the {Switch} instruction2015* option must be used.2016*2017* Page 77 of the Haswell PRM Volume 2b contains the same text.2018*/2019if (devinfo->ver == 7) {2020ERROR_IF(dst_is_null(devinfo, inst) &&2021brw_inst_thread_control(devinfo, inst) != BRW_THREAD_SWITCH,2022"If the destination is the null register, the {Switch} "2023"instruction option must be used.");2024}2025}20262027return error_msg;2028}20292030bool2031brw_validate_instruction(const struct intel_device_info *devinfo,2032const brw_inst *inst, int offset,2033struct disasm_info *disasm)2034{2035struct string error_msg = { .str = NULL, .len = 0 };20362037if (is_unsupported_inst(devinfo, inst)) {2038ERROR("Instruction not supported on this Gen");2039} else {2040CHECK(invalid_values);20412042if (error_msg.str == NULL) {2043CHECK(sources_not_null);2044CHECK(send_restrictions);2045CHECK(alignment_supported);2046CHECK(general_restrictions_based_on_operand_types);2047CHECK(general_restrictions_on_region_parameters);2048CHECK(special_restrictions_for_mixed_float_mode);2049CHECK(region_alignment_rules);2050CHECK(vector_immediate_restrictions);2051CHECK(special_requirements_for_handling_double_precision_data_types);2052CHECK(instruction_restrictions);2053}2054}20552056if (error_msg.str && disasm) {2057disasm_insert_error(disasm, offset, error_msg.str);2058}2059free(error_msg.str);20602061return error_msg.len == 0;2062}20632064bool2065brw_validate_instructions(const struct intel_device_info *devinfo,2066const void *assembly, int start_offset, int end_offset,2067struct disasm_info *disasm)2068{2069bool valid = true;20702071for (int src_offset = start_offset; src_offset < end_offset;) {2072const brw_inst *inst = assembly + src_offset;2073bool is_compact = brw_inst_cmpt_control(devinfo, inst);2074unsigned inst_size = is_compact ? sizeof(brw_compact_inst)2075: sizeof(brw_inst);2076brw_inst uncompacted;20772078if (is_compact) {2079brw_compact_inst *compacted = (void *)inst;2080brw_uncompact_instruction(devinfo, &uncompacted, compacted);2081inst = &uncompacted;2082}20832084bool v = brw_validate_instruction(devinfo, inst, src_offset, disasm);2085valid = valid && v;20862087src_offset += inst_size;2088}20892090return valid;2091}209220932094