Path: blob/21.2-virgl/src/compiler/nir/nir_constant_expressions.py
4546 views
from __future__ import print_function12import re3from nir_opcodes import opcodes4from nir_opcodes import type_has_size, type_size, type_sizes, type_base_type56def type_add_size(type_, size):7if type_has_size(type_):8return type_9return type_ + str(size)1011def op_bit_sizes(op):12sizes = None13if not type_has_size(op.output_type):14sizes = set(type_sizes(op.output_type))1516for input_type in op.input_types:17if not type_has_size(input_type):18if sizes is None:19sizes = set(type_sizes(input_type))20else:21sizes = sizes.intersection(set(type_sizes(input_type)))2223return sorted(list(sizes)) if sizes is not None else None2425def get_const_field(type_):26if type_size(type_) == 1:27return 'b'28elif type_base_type(type_) == 'bool':29return 'i' + str(type_size(type_))30elif type_ == "float16":31return "u16"32else:33return type_base_type(type_)[0] + str(type_size(type_))3435template = """\36/*37* Copyright (C) 2014 Intel Corporation38*39* Permission is hereby granted, free of charge, to any person obtaining a40* copy of this software and associated documentation files (the "Software"),41* to deal in the Software without restriction, including without limitation42* the rights to use, copy, modify, merge, publish, distribute, sublicense,43* and/or sell copies of the Software, and to permit persons to whom the44* Software is furnished to do so, subject to the following conditions:45*46* The above copyright notice and this permission notice (including the next47* paragraph) shall be included in all copies or substantial portions of the48* Software.49*50* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR51* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,52* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL53* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER54* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING55* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS56* IN THE SOFTWARE.57*58* Authors:59* Jason Ekstrand ([email protected])60*/6162#include <math.h>63#include "util/rounding.h" /* for _mesa_roundeven */64#include "util/half_float.h"65#include "util/double.h"66#include "util/softfloat.h"67#include "util/bigmath.h"68#include "nir_constant_expressions.h"6970#define MAX_UINT_FOR_SIZE(bits) (UINT64_MAX >> (64 - (bits)))7172/**73* \brief Checks if the provided value is a denorm and flushes it to zero.74*/75static void76constant_denorm_flush_to_zero(nir_const_value *value, unsigned bit_size)77{78switch(bit_size) {79case 64:80if (0 == (value->u64 & 0x7ff0000000000000))81value->u64 &= 0x8000000000000000;82break;83case 32:84if (0 == (value->u32 & 0x7f800000))85value->u32 &= 0x80000000;86break;87case 16:88if (0 == (value->u16 & 0x7c00))89value->u16 &= 0x8000;90}91}9293/**94* Evaluate one component of packSnorm4x8.95*/96static uint8_t97pack_snorm_1x8(float x)98{99/* From section 8.4 of the GLSL 4.30 spec:100*101* packSnorm4x8102* ------------103* The conversion for component c of v to fixed point is done as104* follows:105*106* packSnorm4x8: round(clamp(c, -1, +1) * 127.0)107*108* We must first cast the float to an int, because casting a negative109* float to a uint is undefined.110*/111return (uint8_t) (int)112_mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);113}114115/**116* Evaluate one component of packSnorm2x16.117*/118static uint16_t119pack_snorm_1x16(float x)120{121/* From section 8.4 of the GLSL ES 3.00 spec:122*123* packSnorm2x16124* -------------125* The conversion for component c of v to fixed point is done as126* follows:127*128* packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)129*130* We must first cast the float to an int, because casting a negative131* float to a uint is undefined.132*/133return (uint16_t) (int)134_mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);135}136137/**138* Evaluate one component of unpackSnorm4x8.139*/140static float141unpack_snorm_1x8(uint8_t u)142{143/* From section 8.4 of the GLSL 4.30 spec:144*145* unpackSnorm4x8146* --------------147* The conversion for unpacked fixed-point value f to floating point is148* done as follows:149*150* unpackSnorm4x8: clamp(f / 127.0, -1, +1)151*/152return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);153}154155/**156* Evaluate one component of unpackSnorm2x16.157*/158static float159unpack_snorm_1x16(uint16_t u)160{161/* From section 8.4 of the GLSL ES 3.00 spec:162*163* unpackSnorm2x16164* ---------------165* The conversion for unpacked fixed-point value f to floating point is166* done as follows:167*168* unpackSnorm2x16: clamp(f / 32767.0, -1, +1)169*/170return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);171}172173/**174* Evaluate one component packUnorm4x8.175*/176static uint8_t177pack_unorm_1x8(float x)178{179/* From section 8.4 of the GLSL 4.30 spec:180*181* packUnorm4x8182* ------------183* The conversion for component c of v to fixed point is done as184* follows:185*186* packUnorm4x8: round(clamp(c, 0, +1) * 255.0)187*/188return (uint8_t) (int)189_mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);190}191192/**193* Evaluate one component packUnorm2x16.194*/195static uint16_t196pack_unorm_1x16(float x)197{198/* From section 8.4 of the GLSL ES 3.00 spec:199*200* packUnorm2x16201* -------------202* The conversion for component c of v to fixed point is done as203* follows:204*205* packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)206*/207return (uint16_t) (int)208_mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);209}210211/**212* Evaluate one component of unpackUnorm4x8.213*/214static float215unpack_unorm_1x8(uint8_t u)216{217/* From section 8.4 of the GLSL 4.30 spec:218*219* unpackUnorm4x8220* --------------221* The conversion for unpacked fixed-point value f to floating point is222* done as follows:223*224* unpackUnorm4x8: f / 255.0225*/226return (float) u / 255.0f;227}228229/**230* Evaluate one component of unpackUnorm2x16.231*/232static float233unpack_unorm_1x16(uint16_t u)234{235/* From section 8.4 of the GLSL ES 3.00 spec:236*237* unpackUnorm2x16238* ---------------239* The conversion for unpacked fixed-point value f to floating point is240* done as follows:241*242* unpackUnorm2x16: f / 65535.0243*/244return (float) u / 65535.0f;245}246247/**248* Evaluate one component of packHalf2x16.249*/250static uint16_t251pack_half_1x16(float x)252{253return _mesa_float_to_half(x);254}255256/**257* Evaluate one component of unpackHalf2x16.258*/259static float260unpack_half_1x16_flush_to_zero(uint16_t u)261{262if (0 == (u & 0x7c00))263u &= 0x8000;264return _mesa_half_to_float(u);265}266267/**268* Evaluate one component of unpackHalf2x16.269*/270static float271unpack_half_1x16(uint16_t u)272{273return _mesa_half_to_float(u);274}275276/* Some typed vector structures to make things like src0.y work */277typedef int8_t int1_t;278typedef uint8_t uint1_t;279typedef float float16_t;280typedef float float32_t;281typedef double float64_t;282typedef bool bool1_t;283typedef bool bool8_t;284typedef bool bool16_t;285typedef bool bool32_t;286typedef bool bool64_t;287% for type in ["float", "int", "uint", "bool"]:288% for width in type_sizes(type):289struct ${type}${width}_vec {290${type}${width}_t x;291${type}${width}_t y;292${type}${width}_t z;293${type}${width}_t w;294${type}${width}_t e;295${type}${width}_t f;296${type}${width}_t g;297${type}${width}_t h;298${type}${width}_t i;299${type}${width}_t j;300${type}${width}_t k;301${type}${width}_t l;302${type}${width}_t m;303${type}${width}_t n;304${type}${width}_t o;305${type}${width}_t p;306};307% endfor308% endfor309310<%def name="evaluate_op(op, bit_size, execution_mode)">311<%312output_type = type_add_size(op.output_type, bit_size)313input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]314%>315316## For each non-per-component input, create a variable srcN that317## contains x, y, z, and w elements which are filled in with the318## appropriately-typed values.319% for j in range(op.num_inputs):320% if op.input_sizes[j] == 0:321<% continue %>322% elif "src" + str(j) not in op.const_expr:323## Avoid unused variable warnings324<% continue %>325%endif326327const struct ${input_types[j]}_vec src${j} = {328% for k in range(op.input_sizes[j]):329% if input_types[j] == "int1":330/* 1-bit integers use a 0/-1 convention */331-(int1_t)_src[${j}][${k}].b,332% elif input_types[j] == "float16":333_mesa_half_to_float(_src[${j}][${k}].u16),334% else:335_src[${j}][${k}].${get_const_field(input_types[j])},336% endif337% endfor338% for k in range(op.input_sizes[j], 16):3390,340% endfor341};342% endfor343344% if op.output_size == 0:345## For per-component instructions, we need to iterate over the346## components and apply the constant expression one component347## at a time.348for (unsigned _i = 0; _i < num_components; _i++) {349## For each per-component input, create a variable srcN that350## contains the value of the current (_i'th) component.351% for j in range(op.num_inputs):352% if op.input_sizes[j] != 0:353<% continue %>354% elif "src" + str(j) not in op.const_expr:355## Avoid unused variable warnings356<% continue %>357% elif input_types[j] == "int1":358/* 1-bit integers use a 0/-1 convention */359const int1_t src${j} = -(int1_t)_src[${j}][_i].b;360% elif input_types[j] == "float16":361const float src${j} =362_mesa_half_to_float(_src[${j}][_i].u16);363% else:364const ${input_types[j]}_t src${j} =365_src[${j}][_i].${get_const_field(input_types[j])};366% endif367% endfor368369## Create an appropriately-typed variable dst and assign the370## result of the const_expr to it. If const_expr already contains371## writes to dst, just include const_expr directly.372% if "dst" in op.const_expr:373${output_type}_t dst;374375${op.const_expr}376% else:377${output_type}_t dst = ${op.const_expr};378% endif379380## Store the current component of the actual destination to the381## value of dst.382% if output_type == "int1" or output_type == "uint1":383/* 1-bit integers get truncated */384_dst_val[_i].b = dst & 1;385% elif output_type.startswith("bool"):386## Sanitize the C value to a proper NIR 0/-1 bool387_dst_val[_i].${get_const_field(output_type)} = -(int)dst;388% elif output_type == "float16":389if (nir_is_rounding_mode_rtz(execution_mode, 16)) {390_dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst);391} else {392_dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst);393}394% else:395_dst_val[_i].${get_const_field(output_type)} = dst;396% endif397398% if op.name != "fquantize2f16" and type_base_type(output_type) == "float":399% if type_has_size(output_type):400if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) {401constant_denorm_flush_to_zero(&_dst_val[_i], ${type_size(output_type)});402}403% else:404if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) {405constant_denorm_flush_to_zero(&_dst_val[i], bit_size);406}407%endif408% endif409}410% else:411## In the non-per-component case, create a struct dst with412## appropriately-typed elements x, y, z, and w and assign the result413## of the const_expr to all components of dst, or include the414## const_expr directly if it writes to dst already.415struct ${output_type}_vec dst;416417% if "dst" in op.const_expr:418${op.const_expr}419% else:420## Splat the value to all components. This way expressions which421## write the same value to all components don't need to explicitly422## write to dest.423dst.x = dst.y = dst.z = dst.w = ${op.const_expr};424% endif425426## For each component in the destination, copy the value of dst to427## the actual destination.428% for k in range(op.output_size):429% if output_type == "int1" or output_type == "uint1":430/* 1-bit integers get truncated */431_dst_val[${k}].b = dst.${"xyzwefghijklmnop"[k]} & 1;432% elif output_type.startswith("bool"):433## Sanitize the C value to a proper NIR 0/-1 bool434_dst_val[${k}].${get_const_field(output_type)} = -(int)dst.${"xyzwefghijklmnop"[k]};435% elif output_type == "float16":436if (nir_is_rounding_mode_rtz(execution_mode, 16)) {437_dst_val[${k}].u16 = _mesa_float_to_float16_rtz(dst.${"xyzwefghijklmnop"[k]});438} else {439_dst_val[${k}].u16 = _mesa_float_to_float16_rtne(dst.${"xyzwefghijklmnop"[k]});440}441% else:442_dst_val[${k}].${get_const_field(output_type)} = dst.${"xyzwefghijklmnop"[k]};443% endif444445% if op.name != "fquantize2f16" and type_base_type(output_type) == "float":446% if type_has_size(output_type):447if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) {448constant_denorm_flush_to_zero(&_dst_val[${k}], ${type_size(output_type)});449}450% else:451if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) {452constant_denorm_flush_to_zero(&_dst_val[${k}], bit_size);453}454% endif455% endif456% endfor457% endif458</%def>459460% for name, op in sorted(opcodes.items()):461% if op.name == "fsat":462#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))463#pragma optimize("", off) /* Temporary work-around for MSVC compiler bug, present in VS2019 16.9.2 */464#endif465% endif466static void467evaluate_${name}(nir_const_value *_dst_val,468UNUSED unsigned num_components,469${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size,470UNUSED nir_const_value **_src,471UNUSED unsigned execution_mode)472{473% if op_bit_sizes(op) is not None:474switch (bit_size) {475% for bit_size in op_bit_sizes(op):476case ${bit_size}: {477${evaluate_op(op, bit_size, execution_mode)}478break;479}480% endfor481482default:483unreachable("unknown bit width");484}485% else:486${evaluate_op(op, 0, execution_mode)}487% endif488}489% if op.name == "fsat":490#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))491#pragma optimize("", on) /* Temporary work-around for MSVC compiler bug, present in VS2019 16.9.2 */492#endif493% endif494% endfor495496void497nir_eval_const_opcode(nir_op op, nir_const_value *dest,498unsigned num_components, unsigned bit_width,499nir_const_value **src,500unsigned float_controls_execution_mode)501{502switch (op) {503% for name in sorted(opcodes.keys()):504case nir_op_${name}:505evaluate_${name}(dest, num_components, bit_width, src, float_controls_execution_mode);506return;507% endfor508default:509unreachable("shouldn't get here");510}511}"""512513from mako.template import Template514515print(Template(template).render(opcodes=opcodes, type_sizes=type_sizes,516type_base_type=type_base_type,517type_size=type_size,518type_has_size=type_has_size,519type_add_size=type_add_size,520op_bit_sizes=op_bit_sizes,521get_const_field=get_const_field))522523524