Path: blob/21.2-virgl/src/panfrost/lib/pan_encoder.h
4560 views
/*1* Copyright (C) 2019 Collabora, Ltd.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors (Collabora):23* Alyssa Rosenzweig <[email protected]>24*/2526#ifndef __PAN_ENCODER_H27#define __PAN_ENCODER_H2829#include <stdbool.h>30#include "util/format/u_format.h"31#include "pan_bo.h"32#include "midgard_pack.h"3334/* Indices for named (non-XFB) varyings that are present. These are packed35* tightly so they correspond to a bitfield present (P) indexed by (1 <<36* PAN_VARY_*). This has the nice property that you can lookup the buffer index37* of a given special field given a shift S by:38*39* idx = popcount(P & ((1 << S) - 1))40*41* That is... look at all of the varyings that come earlier and count them, the42* count is the new index since plus one. Likewise, the total number of special43* buffers required is simply popcount(P)44*/4546enum pan_special_varying {47PAN_VARY_GENERAL = 0,48PAN_VARY_POSITION = 1,49PAN_VARY_PSIZ = 2,50PAN_VARY_PNTCOORD = 3,51PAN_VARY_FACE = 4,52PAN_VARY_FRAGCOORD = 5,5354/* Keep last */55PAN_VARY_MAX,56};5758/* Tiler structure size computation */5960struct panfrost_device;6162unsigned63panfrost_tiler_get_polygon_list_size(const struct panfrost_device *dev,64unsigned fb_width, unsigned fb_height,65bool has_draws);6667unsigned68panfrost_tiler_header_size(unsigned width, unsigned height, unsigned mask, bool hierarchy);6970unsigned71panfrost_tiler_full_size(unsigned width, unsigned height, unsigned mask, bool hierarchy);7273unsigned74panfrost_choose_hierarchy_mask(75unsigned width, unsigned height,76unsigned vertex_count, bool hierarchy);7778/* Stack sizes */7980unsigned81panfrost_get_stack_shift(unsigned stack_size);8283unsigned84panfrost_get_total_stack_size(85unsigned thread_size,86unsigned threads_per_core,87unsigned core_count);8889const char * panfrost_model_name(unsigned gpu_id);9091/* Attributes / instancing */9293unsigned94panfrost_padded_vertex_count(unsigned vertex_count);9596unsigned97panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags);9899/* Records for gl_VertexID and gl_InstanceID use special encodings on Midgard */100101static inline void102panfrost_vertex_id(unsigned padded_count,103struct mali_attribute_buffer_packed *attr,104bool instanced)105{106pan_pack(attr, ATTRIBUTE_VERTEX_ID, cfg) {107if (instanced) {108cfg.divisor_r = __builtin_ctz(padded_count);109cfg.divisor_p = padded_count >> (cfg.divisor_r + 1);110} else {111/* Large values so the modulo is a no-op */112cfg.divisor_r = 0x1F;113cfg.divisor_p = 0x4;114}115}116}117118static inline void119panfrost_instance_id(unsigned padded_count,120struct mali_attribute_buffer_packed *attr,121bool instanced)122{123pan_pack(attr, ATTRIBUTE_INSTANCE_ID, cfg) {124if (!instanced || padded_count <= 1) {125/* Divide by large number to force to 0 */126cfg.divisor_p = ((1u << 31) - 1);127cfg.divisor_r = 0x1F;128cfg.divisor_e = 0x1;129} else if(util_is_power_of_two_or_zero(padded_count)) {130/* Can't underflow since padded_count >= 2 */131cfg.divisor_r = __builtin_ctz(padded_count) - 1;132} else {133cfg.divisor_p =134panfrost_compute_magic_divisor(padded_count,135&cfg.divisor_r, &cfg.divisor_e);136}137}138}139140/* Sampler comparison functions are flipped in OpenGL from the hardware, so we141* need to be able to flip accordingly */142143static inline enum mali_func144panfrost_flip_compare_func(enum mali_func f)145{146switch (f) {147case MALI_FUNC_LESS: return MALI_FUNC_GREATER;148case MALI_FUNC_GREATER: return MALI_FUNC_LESS;149case MALI_FUNC_LEQUAL: return MALI_FUNC_GEQUAL;150case MALI_FUNC_GEQUAL: return MALI_FUNC_LEQUAL;151default: return f;152}153154}155156/* Compute shaders are invoked with a gl_NumWorkGroups X/Y/Z triplet. Vertex157* shaders are invoked as (1, vertex_count, instance_count). Compute shaders158* also have a gl_WorkGroupSize X/Y/Z triplet. These 6 values are packed159* together in a dynamic bitfield, packed by this routine. */160161static inline void162panfrost_pack_work_groups_compute(163struct mali_invocation_packed *out,164unsigned num_x, unsigned num_y, unsigned num_z,165unsigned size_x, unsigned size_y, unsigned size_z,166bool quirk_graphics, bool indirect_dispatch)167{168/* The values needing packing, in order, and the corresponding shifts.169* Indicies into shift are off-by-one to make the logic easier */170171unsigned values[6] = { size_x, size_y, size_z, num_x, num_y, num_z };172unsigned shifts[7] = { 0 };173uint32_t packed = 0;174175for (unsigned i = 0; i < 6; ++i) {176/* Must be positive, otherwise we underflow */177assert(values[i] >= 1);178179/* OR it in, shifting as required */180packed |= ((values[i] - 1) << shifts[i]);181182/* How many bits did we use? */183unsigned bit_count = util_logbase2_ceil(values[i]);184185/* Set the next shift accordingly */186shifts[i + 1] = shifts[i] + bit_count;187}188189pan_pack(out, INVOCATION, cfg) {190cfg.invocations = packed;191cfg.size_y_shift = shifts[1];192cfg.size_z_shift = shifts[2];193cfg.workgroups_x_shift = shifts[3];194195if (!indirect_dispatch) {196/* Leave zero for the dispatch shader */197cfg.workgroups_y_shift = shifts[4];198cfg.workgroups_z_shift = shifts[5];199}200201/* Quirk: for non-instanced graphics, the blob sets202* workgroups_z_shift = 32. This doesn't appear to matter to203* the hardware, but it's good to be bit-identical. */204205if (quirk_graphics && (num_z <= 1))206cfg.workgroups_z_shift = 32;207208/* For graphics, set to the minimum efficient value. For209* compute, must equal the workgroup X shift for barriers to210* function correctly */211212cfg.thread_group_split = quirk_graphics ?213MALI_SPLIT_MIN_EFFICIENT : cfg.workgroups_x_shift;214}215}216217/* Format conversion */218static inline enum mali_z_internal_format219panfrost_get_z_internal_format(enum pipe_format fmt)220{221switch (fmt) {222case PIPE_FORMAT_Z16_UNORM:223case PIPE_FORMAT_Z16_UNORM_S8_UINT:224return MALI_Z_INTERNAL_FORMAT_D16;225case PIPE_FORMAT_Z24_UNORM_S8_UINT:226case PIPE_FORMAT_Z24X8_UNORM:227return MALI_Z_INTERNAL_FORMAT_D24;228case PIPE_FORMAT_Z32_FLOAT:229case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:230return MALI_Z_INTERNAL_FORMAT_D32;231default:232unreachable("Unsupported depth/stencil format.");233}234}235236#endif237238239