Path: blob/21.2-virgl/src/panfrost/lib/pan_attributes.c
4560 views
/*1* Copyright (C) 2019 Collabora, Ltd.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22*/2324#include "util/u_math.h"25#include "pan_encoder.h"2627/* This file handles attribute descriptors. The28* bulk of the complexity is from instancing. See mali_job for29* notes on how this works. But basically, for small vertex30* counts, we have a lookup table, and for large vertex counts,31* we look at the high bits as a heuristic. This has to match32* exactly how the hardware calculates this (which is why the33* algorithm is so weird) or else instancing will break. */3435/* Given an odd number (of the form 2k + 1), compute k */36#define ODD(odd) ((odd - 1) >> 1)3738static unsigned39panfrost_small_padded_vertex_count(unsigned idx)40{41if (idx < 10)42return idx;43else44return (idx + 1) & ~1;45}4647static unsigned48panfrost_large_padded_vertex_count(uint32_t vertex_count)49{50/* First, we have to find the highest set one */51unsigned highest = 32 - __builtin_clz(vertex_count);5253/* Using that, we mask out the highest 4-bits */54unsigned n = highest - 4;55unsigned nibble = (vertex_count >> n) & 0xF;5657/* Great, we have the nibble. Now we can just try possibilities. Note58* that we don't care about the bottom most bit in most cases, and we59* know the top bit must be 1 */6061unsigned middle_two = (nibble >> 1) & 0x3;6263switch (middle_two) {64case 0b00:65if (!(nibble & 1))66return (1 << n) * 9;67else68return (1 << (n + 1)) * 5;69case 0b01:70return (1 << (n + 2)) * 3;71case 0b10:72return (1 << (n + 1)) * 7;73case 0b11:74return (1 << (n + 4));75default:76return 0; /* unreachable */77}78}7980unsigned81panfrost_padded_vertex_count(unsigned vertex_count)82{83if (vertex_count < 20)84return panfrost_small_padded_vertex_count(vertex_count);85else86return panfrost_large_padded_vertex_count(vertex_count);87}8889/* The much, much more irritating case -- instancing is enabled. See90* panfrost_job.h for notes on how this works */9192unsigned93panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags)94{95/* We have a NPOT divisor. Here's the fun one (multipling by96* the inverse and shifting) */9798/* floor(log2(d)) */99unsigned shift = util_logbase2(hw_divisor);100101/* m = ceil(2^(32 + shift) / d) */102uint64_t shift_hi = 32 + shift;103uint64_t t = 1ll << shift_hi;104double t_f = t;105double hw_divisor_d = hw_divisor;106double m_f = ceil(t_f / hw_divisor_d);107unsigned m = m_f;108109/* Default case */110uint32_t magic_divisor = m;111112/* e = 2^(shift + 32) % d */113uint64_t e = t % hw_divisor;114115/* Apply round-down algorithm? e <= 2^shift?. XXX: The blob116* seems to use a different condition */117if (e <= (1ll << shift)) {118magic_divisor = m - 1;119*extra_flags = 1;120}121122/* Top flag implicitly set */123assert(magic_divisor & (1u << 31));124magic_divisor &= ~(1u << 31);125*o_shift = shift;126127return magic_divisor;128}129130131