Path: blob/21.2-virgl/src/compiler/nir/nir_gather_xfb_info.c
4545 views
/*1* Copyright © 2018 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "nir_xfb_info.h"2425#include <util/u_math.h>2627static void28add_var_xfb_varying(nir_xfb_info *xfb,29nir_xfb_varyings_info *varyings,30unsigned buffer,31unsigned offset,32const struct glsl_type *type)33{34if (varyings == NULL)35return;3637nir_xfb_varying_info *varying = &varyings->varyings[varyings->varying_count++];3839varying->type = type;40varying->buffer = buffer;41varying->offset = offset;42xfb->buffers[buffer].varying_count++;43}444546static nir_xfb_info *47nir_xfb_info_create(void *mem_ctx, uint16_t output_count)48{49return rzalloc_size(mem_ctx, nir_xfb_info_size(output_count));50}5152static size_t53nir_xfb_varyings_info_size(uint16_t varying_count)54{55return sizeof(nir_xfb_info) + sizeof(nir_xfb_varying_info) * varying_count;56}5758static nir_xfb_varyings_info *59nir_xfb_varyings_info_create(void *mem_ctx, uint16_t varying_count)60{61return rzalloc_size(mem_ctx, nir_xfb_varyings_info_size(varying_count));62}6364static void65add_var_xfb_outputs(nir_xfb_info *xfb,66nir_xfb_varyings_info *varyings,67nir_variable *var,68unsigned buffer,69unsigned *location,70unsigned *offset,71const struct glsl_type *type,72bool varying_added)73{74/* If this type contains a 64-bit value, align to 8 bytes */75if (glsl_type_contains_64bit(type))76*offset = ALIGN_POT(*offset, 8);7778if (glsl_type_is_array_or_matrix(type) && !var->data.compact) {79unsigned length = glsl_get_length(type);8081const struct glsl_type *child_type = glsl_get_array_element(type);82if (!glsl_type_is_array(child_type) &&83!glsl_type_is_struct(child_type)) {8485add_var_xfb_varying(xfb, varyings, buffer, *offset, type);86varying_added = true;87}8889for (unsigned i = 0; i < length; i++)90add_var_xfb_outputs(xfb, varyings, var, buffer, location, offset,91child_type, varying_added);92} else if (glsl_type_is_struct_or_ifc(type)) {93unsigned length = glsl_get_length(type);94for (unsigned i = 0; i < length; i++) {95const struct glsl_type *child_type = glsl_get_struct_field(type, i);96add_var_xfb_outputs(xfb, varyings, var, buffer, location, offset,97child_type, varying_added);98}99} else {100assert(buffer < NIR_MAX_XFB_BUFFERS);101if (xfb->buffers_written & (1 << buffer)) {102assert(xfb->buffers[buffer].stride == var->data.xfb.stride);103assert(xfb->buffer_to_stream[buffer] == var->data.stream);104} else {105xfb->buffers_written |= (1 << buffer);106xfb->buffers[buffer].stride = var->data.xfb.stride;107xfb->buffer_to_stream[buffer] = var->data.stream;108}109110assert(var->data.stream < NIR_MAX_XFB_STREAMS);111xfb->streams_written |= (1 << var->data.stream);112113unsigned comp_slots;114if (var->data.compact) {115/* This only happens for clip/cull which are float arrays */116assert(glsl_without_array(type) == glsl_float_type());117assert(var->data.location == VARYING_SLOT_CLIP_DIST0 ||118var->data.location == VARYING_SLOT_CLIP_DIST1);119comp_slots = glsl_get_length(type);120} else {121comp_slots = glsl_get_component_slots(type);122123UNUSED unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);124assert(attrib_slots == glsl_count_attribute_slots(type, false));125126/* Ensure that we don't have, for instance, a dvec2 with a127* location_frac of 2 which would make it crass a location boundary128* even though it fits in a single slot. However, you can have a129* dvec3 which crosses the slot boundary with a location_frac of 2.130*/131assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) ==132attrib_slots);133}134135assert(var->data.location_frac + comp_slots <= 8);136uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac;137unsigned comp_offset = var->data.location_frac;138139if (!varying_added) {140add_var_xfb_varying(xfb, varyings, buffer, *offset, type);141}142143while (comp_mask) {144nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];145146output->buffer = buffer;147output->offset = *offset;148output->location = *location;149output->component_mask = comp_mask & 0xf;150output->component_offset = comp_offset;151152*offset += util_bitcount(output->component_mask) * 4;153(*location)++;154comp_mask >>= 4;155comp_offset = 0;156}157}158}159160static int161compare_xfb_varying_offsets(const void *_a, const void *_b)162{163const nir_xfb_varying_info *a = _a, *b = _b;164165if (a->buffer != b->buffer)166return a->buffer - b->buffer;167168return a->offset - b->offset;169}170171static int172compare_xfb_output_offsets(const void *_a, const void *_b)173{174const nir_xfb_output_info *a = _a, *b = _b;175176return a->offset - b->offset;177}178179nir_xfb_info *180nir_gather_xfb_info(const nir_shader *shader, void *mem_ctx)181{182return nir_gather_xfb_info_with_varyings(shader, mem_ctx, NULL);183}184185nir_xfb_info *186nir_gather_xfb_info_with_varyings(const nir_shader *shader,187void *mem_ctx,188nir_xfb_varyings_info **varyings_info_out)189{190assert(shader->info.stage == MESA_SHADER_VERTEX ||191shader->info.stage == MESA_SHADER_TESS_EVAL ||192shader->info.stage == MESA_SHADER_GEOMETRY);193194/* Compute the number of outputs we have. This is simply the number of195* cumulative locations consumed by all the variables. If a location is196* represented by multiple variables, then they each count separately in197* number of outputs. This is only an estimate as some variables may have198* an xfb_buffer but not an output so it may end up larger than we need but199* it should be good enough for allocation.200*/201unsigned num_outputs = 0;202unsigned num_varyings = 0;203nir_xfb_varyings_info *varyings_info = NULL;204nir_foreach_shader_out_variable(var, shader) {205if (var->data.explicit_xfb_buffer) {206num_outputs += glsl_count_attribute_slots(var->type, false);207num_varyings += glsl_varying_count(var->type);208}209}210if (num_outputs == 0 || num_varyings == 0)211return NULL;212213nir_xfb_info *xfb = nir_xfb_info_create(mem_ctx, num_outputs);214if (varyings_info_out != NULL) {215*varyings_info_out = nir_xfb_varyings_info_create(mem_ctx, num_varyings);216varyings_info = *varyings_info_out;217}218219/* Walk the list of outputs and add them to the array */220nir_foreach_shader_out_variable(var, shader) {221if (!var->data.explicit_xfb_buffer)222continue;223224unsigned location = var->data.location;225226/* In order to know if we have a array of blocks can't be done just by227* checking if we have an interface type and is an array, because due228* splitting we could end on a case were we received a split struct229* that contains an array.230*/231bool is_array_block = var->interface_type != NULL &&232glsl_type_is_array(var->type) &&233glsl_without_array(var->type) == var->interface_type;234235if (var->data.explicit_offset && !is_array_block) {236unsigned offset = var->data.offset;237add_var_xfb_outputs(xfb, varyings_info, var, var->data.xfb.buffer,238&location, &offset, var->type, false);239} else if (is_array_block) {240assert(glsl_type_is_struct_or_ifc(var->interface_type));241242unsigned aoa_size = glsl_get_aoa_size(var->type);243const struct glsl_type *itype = var->interface_type;244unsigned nfields = glsl_get_length(itype);245for (unsigned b = 0; b < aoa_size; b++) {246for (unsigned f = 0; f < nfields; f++) {247int foffset = glsl_get_struct_field_offset(itype, f);248const struct glsl_type *ftype = glsl_get_struct_field(itype, f);249if (foffset < 0) {250location += glsl_count_attribute_slots(ftype, false);251continue;252}253254unsigned offset = foffset;255add_var_xfb_outputs(xfb, varyings_info, var, var->data.xfb.buffer + b,256&location, &offset, ftype, false);257}258}259}260}261262/* Everything is easier in the state setup code if outputs and varyings are263* sorted in order of output offset (and buffer for varyings).264*/265qsort(xfb->outputs, xfb->output_count, sizeof(xfb->outputs[0]),266compare_xfb_output_offsets);267268if (varyings_info != NULL) {269qsort(varyings_info->varyings, varyings_info->varying_count,270sizeof(varyings_info->varyings[0]),271compare_xfb_varying_offsets);272}273274#ifndef NDEBUG275/* Finally, do a sanity check */276unsigned max_offset[NIR_MAX_XFB_BUFFERS] = {0};277for (unsigned i = 0; i < xfb->output_count; i++) {278assert(xfb->outputs[i].offset >= max_offset[xfb->outputs[i].buffer]);279assert(xfb->outputs[i].component_mask != 0);280unsigned slots = util_bitcount(xfb->outputs[i].component_mask);281max_offset[xfb->outputs[i].buffer] = xfb->outputs[i].offset + slots * 4;282}283#endif284285return xfb;286}287288289