Path: blob/21.2-virgl/src/gallium/drivers/freedreno/freedreno_util.h
4570 views
/*1* Copyright (C) 2012 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#ifndef FREEDRENO_UTIL_H_27#define FREEDRENO_UTIL_H_2829#include "drm/freedreno_drmif.h"30#include "drm/freedreno_ringbuffer.h"3132#include "pipe/p_format.h"33#include "pipe/p_state.h"34#include "util/compiler.h"35#include "util/half_float.h"36#include "util/log.h"37#include "util/u_debug.h"38#include "util/u_dynarray.h"39#include "util/u_math.h"40#include "util/u_pack_color.h"4142#include "adreno_common.xml.h"43#include "adreno_pm4.xml.h"44#include "disasm.h"4546#ifdef __cplusplus47extern "C" {48#endif4950enum adreno_rb_depth_format fd_pipe2depth(enum pipe_format format);51enum pc_di_index_size fd_pipe2index(enum pipe_format format);52enum pipe_format fd_gmem_restore_format(enum pipe_format format);53enum adreno_rb_blend_factor fd_blend_factor(unsigned factor);54enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode);55enum adreno_stencil_op fd_stencil_op(unsigned op);5657#define A3XX_MAX_MIP_LEVELS 145859#define A2XX_MAX_RENDER_TARGETS 160#define A3XX_MAX_RENDER_TARGETS 461#define A4XX_MAX_RENDER_TARGETS 862#define A5XX_MAX_RENDER_TARGETS 863#define A6XX_MAX_RENDER_TARGETS 86465#define MAX_RENDER_TARGETS A6XX_MAX_RENDER_TARGETS6667/* clang-format off */68enum fd_debug_flag {69FD_DBG_MSGS = BITFIELD_BIT(0),70FD_DBG_DISASM = BITFIELD_BIT(1),71FD_DBG_DCLEAR = BITFIELD_BIT(2),72FD_DBG_DDRAW = BITFIELD_BIT(3),73FD_DBG_NOSCIS = BITFIELD_BIT(4),74FD_DBG_DIRECT = BITFIELD_BIT(5),75FD_DBG_NOBYPASS = BITFIELD_BIT(6),76FD_DBG_PERF = BITFIELD_BIT(7),77FD_DBG_NOBIN = BITFIELD_BIT(8),78FD_DBG_NOGMEM = BITFIELD_BIT(9),79FD_DBG_SERIALC = BITFIELD_BIT(10),80FD_DBG_SHADERDB = BITFIELD_BIT(11),81FD_DBG_FLUSH = BITFIELD_BIT(12),82FD_DBG_DEQP = BITFIELD_BIT(13),83FD_DBG_INORDER = BITFIELD_BIT(14),84FD_DBG_BSTAT = BITFIELD_BIT(15),85FD_DBG_NOGROW = BITFIELD_BIT(16),86FD_DBG_LRZ = BITFIELD_BIT(17),87FD_DBG_NOINDR = BITFIELD_BIT(18),88FD_DBG_NOBLIT = BITFIELD_BIT(19),89FD_DBG_HIPRIO = BITFIELD_BIT(20),90FD_DBG_TTILE = BITFIELD_BIT(21),91FD_DBG_PERFC = BITFIELD_BIT(22),92FD_DBG_NOUBWC = BITFIELD_BIT(23),93FD_DBG_NOLRZ = BITFIELD_BIT(24),94FD_DBG_NOTILE = BITFIELD_BIT(25),95FD_DBG_LAYOUT = BITFIELD_BIT(26),96FD_DBG_NOFP16 = BITFIELD_BIT(27),97FD_DBG_NOHW = BITFIELD_BIT(28),98};99/* clang-format on */100101extern int fd_mesa_debug;102extern bool fd_binning_enabled;103104#define FD_DBG(category) unlikely(fd_mesa_debug &FD_DBG_##category)105106#include <unistd.h>107#include <sys/types.h>108109#define DBG(fmt, ...) \110do { \111if (FD_DBG(MSGS)) \112mesa_logi("%5d: %s:%d: " fmt, gettid(), __FUNCTION__, __LINE__, \113##__VA_ARGS__); \114} while (0)115116#define perf_debug_ctx(ctx, ...) \117do { \118if (FD_DBG(PERF)) \119mesa_logw(__VA_ARGS__); \120struct fd_context *__c = (ctx); \121if (__c) \122pipe_debug_message(&__c->debug, PERF_INFO, __VA_ARGS__); \123} while (0)124125#define perf_debug(...) perf_debug_ctx(NULL, __VA_ARGS__)126127#define perf_time_ctx(ctx, limit_ns, fmt, ...) \128for (struct __perf_time_state __s = \129{ \130.t = -__perf_get_time(ctx), \131}; \132!__s.done; ({ \133__s.t += __perf_get_time(ctx); \134__s.done = true; \135if (__s.t > (limit_ns)) { \136perf_debug_ctx(ctx, fmt " (%.03f ms)", ##__VA_ARGS__, \137(double)__s.t / 1000000.0); \138} \139}))140141#define perf_time(limit_ns, fmt, ...) \142perf_time_ctx(NULL, limit_ns, fmt, ##__VA_ARGS__)143144struct __perf_time_state {145int64_t t;146bool done;147};148149/* static inline would be nice here, except 'struct fd_context' is not150* defined yet:151*/152#define __perf_get_time(ctx) \153((FD_DBG(PERF) || ({ \154struct fd_context *__c = (ctx); \155unlikely(__c && __c->debug.debug_message); \156})) \157? os_time_get_nano() \158: 0)159160struct fd_context;161162/**163* A psuedo-variable for defining where various parts of the fd_context164* can be safely accessed.165*166* With threaded_context, certain pctx funcs are called from gallium167* front-end/state-tracker (eg. CSO creation), while others are called168* from the driver thread. Things called from driver thread can safely169* access anything in the ctx, while things called from the fe/st thread170* must limit themselves to "safe" things (ie. ctx->screen is safe as it171* is immutable, but the blitter_context is not).172*/173extern lock_cap_t fd_context_access_cap;174175/**176* Make the annotation a bit less verbose.. mark fields which should only177* be accessed by driver-thread with 'dt'178*/179#define dt guarded_by(fd_context_access_cap)180181/**182* Annotation for entry-point functions only called in driver thread.183*184* For static functions, apply the annotation to the function declaration.185* Otherwise apply to the function prototype.186*/187#define in_dt assert_cap(fd_context_access_cap)188189/**190* Annotation for internal functions which are only called from entry-191* point functions (with 'in_dt' annotation) or other internal functions192* with the 'assert_dt' annotation.193*194* For static functions, apply the annotation to the function declaration.195* Otherwise apply to the function prototype.196*/197#define assert_dt requires_cap(fd_context_access_cap)198199/**200* Special helpers for context access outside of driver thread. For ex,201* pctx->get_query_result() is not called on driver thread, but the202* query is guaranteed to be flushed, or the driver thread queue is203* guaranteed to be flushed.204*205* Use with caution!206*/207static inline void208fd_context_access_begin(struct fd_context *ctx)209acquire_cap(fd_context_access_cap)210{211}212213static inline void214fd_context_access_end(struct fd_context *ctx) release_cap(fd_context_access_cap)215{216}217218/* for conditionally setting boolean flag(s): */219#define COND(bool, val) ((bool) ? (val) : 0)220221#define CP_REG(reg) ((0x4 << 16) | ((unsigned int)((reg) - (0x2000))))222223static inline uint32_t224DRAW(enum pc_di_primtype prim_type, enum pc_di_src_sel source_select,225enum pc_di_index_size index_size, enum pc_di_vis_cull_mode vis_cull_mode,226uint8_t instances)227{228return (prim_type << 0) | (source_select << 6) | ((index_size & 1) << 11) |229((index_size >> 1) << 13) | (vis_cull_mode << 9) | (1 << 14) |230(instances << 24);231}232233static inline uint32_t234DRAW_A20X(enum pc_di_primtype prim_type,235enum pc_di_face_cull_sel faceness_cull_select,236enum pc_di_src_sel source_select, enum pc_di_index_size index_size,237bool pre_fetch_cull_enable, bool grp_cull_enable, uint16_t count)238{239return (prim_type << 0) | (source_select << 6) |240(faceness_cull_select << 8) | ((index_size & 1) << 11) |241((index_size >> 1) << 13) | (pre_fetch_cull_enable << 14) |242(grp_cull_enable << 15) | (count << 16);243}244245/* for tracking cmdstream positions that need to be patched: */246struct fd_cs_patch {247uint32_t *cs;248uint32_t val;249};250#define fd_patch_num_elements(buf) ((buf)->size / sizeof(struct fd_cs_patch))251#define fd_patch_element(buf, i) \252util_dynarray_element(buf, struct fd_cs_patch, i)253254static inline enum pipe_format255pipe_surface_format(struct pipe_surface *psurf)256{257if (!psurf)258return PIPE_FORMAT_NONE;259return psurf->format;260}261262static inline bool263fd_surface_half_precision(const struct pipe_surface *psurf)264{265enum pipe_format format;266267if (!psurf)268return true;269270format = psurf->format;271272/* colors are provided in consts, which go through cov.f32f16, which will273* break these values274*/275if (util_format_is_pure_integer(format))276return false;277278/* avoid losing precision on 32-bit float formats */279if (util_format_is_float(format) &&280util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) ==28132)282return false;283284return true;285}286287static inline unsigned288fd_sampler_first_level(const struct pipe_sampler_view *view)289{290if (view->target == PIPE_BUFFER)291return 0;292return view->u.tex.first_level;293}294295static inline unsigned296fd_sampler_last_level(const struct pipe_sampler_view *view)297{298if (view->target == PIPE_BUFFER)299return 0;300return view->u.tex.last_level;301}302303static inline bool304fd_half_precision(struct pipe_framebuffer_state *pfb)305{306unsigned i;307308for (i = 0; i < pfb->nr_cbufs; i++)309if (!fd_surface_half_precision(pfb->cbufs[i]))310return false;311312return true;313}314315static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx);316317/* like OUT_RING() but appends a cmdstream patch point to 'buf' */318static inline void319OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data, struct util_dynarray *buf)320{321if (LOG_DWORDS) {322DBG("ring[%p]: OUT_RINGP %04x: %08x", ring,323(uint32_t)(ring->cur - ring->start), data);324}325util_dynarray_append(buf, struct fd_cs_patch,326((struct fd_cs_patch){327.cs = ring->cur++,328.val = data,329}));330}331332static inline void333__OUT_IB(struct fd_ringbuffer *ring, bool prefetch,334struct fd_ringbuffer *target)335{336if (target->cur == target->start)337return;338339unsigned count = fd_ringbuffer_cmd_count(target);340341/* for debug after a lock up, write a unique counter value342* to scratch6 for each IB, to make it easier to match up343* register dumps to cmdstream. The combination of IB and344* DRAW (scratch7) is enough to "triangulate" the particular345* draw that caused lockup.346*/347emit_marker(ring, 6);348349for (unsigned i = 0; i < count; i++) {350uint32_t dwords;351OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD,3522);353dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;354assert(dwords > 0);355OUT_RING(ring, dwords);356OUT_PKT2(ring);357}358359emit_marker(ring, 6);360}361362static inline void363__OUT_IB5(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)364{365if (target->cur == target->start)366return;367368unsigned count = fd_ringbuffer_cmd_count(target);369370for (unsigned i = 0; i < count; i++) {371uint32_t dwords;372OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);373dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;374assert(dwords > 0);375OUT_RING(ring, dwords);376}377}378379/* CP_SCRATCH_REG4 is used to hold base address for query results: */380// XXX annoyingly scratch regs move on a5xx.. and additionally different381// packet types.. so freedreno_query_hw is going to need a bit of382// rework..383#define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4384385#ifdef DEBUG386#define __EMIT_MARKER 1387#else388#define __EMIT_MARKER 0389#endif390391static inline void392emit_marker(struct fd_ringbuffer *ring, int scratch_idx)393{394extern int32_t marker_cnt;395unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx;396assert(reg != HW_QUERY_BASE_REG);397if (reg == HW_QUERY_BASE_REG)398return;399if (__EMIT_MARKER) {400OUT_WFI5(ring);401OUT_PKT0(ring, reg, 1);402OUT_RING(ring, p_atomic_inc_return(&marker_cnt));403}404}405406static inline uint32_t407pack_rgba(enum pipe_format format, const float *rgba)408{409union util_color uc;410util_pack_color(rgba, format, &uc);411return uc.ui[0];412}413414/*415* swap - swap value of @a and @b416*/417#define swap(a, b) \418do { \419__typeof(a) __tmp = (a); \420(a) = (b); \421(b) = __tmp; \422} while (0)423424#define BIT(bit) (1u << bit)425426/*427* a3xx+ helpers:428*/429430static inline enum a3xx_msaa_samples431fd_msaa_samples(unsigned samples)432{433switch (samples) {434default:435debug_assert(0);436#if defined(NDEBUG) || defined(DEBUG)437FALLTHROUGH;438#endif439case 0:440case 1:441return MSAA_ONE;442case 2:443return MSAA_TWO;444case 4:445return MSAA_FOUR;446case 8:447return MSAA_EIGHT;448}449}450451/*452* a4xx+ helpers:453*/454455static inline enum a4xx_state_block456fd4_stage2shadersb(gl_shader_stage type)457{458switch (type) {459case MESA_SHADER_VERTEX:460return SB4_VS_SHADER;461case MESA_SHADER_FRAGMENT:462return SB4_FS_SHADER;463case MESA_SHADER_COMPUTE:464case MESA_SHADER_KERNEL:465return SB4_CS_SHADER;466default:467unreachable("bad shader type");468return (enum a4xx_state_block) ~0;469}470}471472static inline enum a4xx_index_size473fd4_size2indextype(unsigned index_size)474{475switch (index_size) {476case 1:477return INDEX4_SIZE_8_BIT;478case 2:479return INDEX4_SIZE_16_BIT;480case 4:481return INDEX4_SIZE_32_BIT;482}483DBG("unsupported index size: %d", index_size);484assert(0);485return INDEX4_SIZE_32_BIT;486}487488#ifdef __cplusplus489}490#endif491492#endif /* FREEDRENO_UTIL_H_ */493494495