Path: blob/21.2-virgl/src/freedreno/vulkan/tu_cs.h
4565 views
/*1* Copyright © 2019 Google LLC2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*/22#ifndef TU_CS_H23#define TU_CS_H2425#include "tu_private.h"2627#include "adreno_pm4.xml.h"2829#include "freedreno_pm4.h"3031void32tu_cs_init(struct tu_cs *cs,33struct tu_device *device,34enum tu_cs_mode mode,35uint32_t initial_size);3637void38tu_cs_init_external(struct tu_cs *cs, uint32_t *start, uint32_t *end);3940void41tu_cs_finish(struct tu_cs *cs);4243void44tu_cs_begin(struct tu_cs *cs);4546void47tu_cs_end(struct tu_cs *cs);4849VkResult50tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs);5152VkResult53tu_cs_alloc(struct tu_cs *cs,54uint32_t count,55uint32_t size,56struct tu_cs_memory *memory);5758struct tu_cs_entry59tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs);6061static inline struct tu_draw_state62tu_cs_end_draw_state(struct tu_cs *cs, struct tu_cs *sub_cs)63{64struct tu_cs_entry entry = tu_cs_end_sub_stream(cs, sub_cs);65return (struct tu_draw_state) {66.iova = entry.bo->iova + entry.offset,67.size = entry.size / sizeof(uint32_t),68};69}7071VkResult72tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size);7374static inline struct tu_draw_state75tu_cs_draw_state(struct tu_cs *sub_cs, struct tu_cs *cs, uint32_t size)76{77struct tu_cs_memory memory;7879/* TODO: clean this up */80tu_cs_alloc(sub_cs, size, 1, &memory);81tu_cs_init_external(cs, memory.map, memory.map + size);82tu_cs_begin(cs);83tu_cs_reserve_space(cs, size);8485return (struct tu_draw_state) {86.iova = memory.iova,87.size = size,88};89}9091void92tu_cs_reset(struct tu_cs *cs);9394VkResult95tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target);9697/**98* Get the size of the command packets emitted since the last call to99* tu_cs_add_entry.100*/101static inline uint32_t102tu_cs_get_size(const struct tu_cs *cs)103{104return cs->cur - cs->start;105}106107/**108* Return true if there is no command packet emitted since the last call to109* tu_cs_add_entry.110*/111static inline uint32_t112tu_cs_is_empty(const struct tu_cs *cs)113{114return tu_cs_get_size(cs) == 0;115}116117/**118* Discard all entries. This allows \a cs to be reused while keeping the119* existing BOs and command packets intact.120*/121static inline void122tu_cs_discard_entries(struct tu_cs *cs)123{124assert(cs->mode == TU_CS_MODE_GROW);125cs->entry_count = 0;126}127128/**129* Get the size needed for tu_cs_emit_call.130*/131static inline uint32_t132tu_cs_get_call_size(const struct tu_cs *cs)133{134assert(cs->mode == TU_CS_MODE_GROW);135/* each CP_INDIRECT_BUFFER needs 4 dwords */136return cs->entry_count * 4;137}138139/**140* Assert that we did not exceed the reserved space.141*/142static inline void143tu_cs_sanity_check(const struct tu_cs *cs)144{145assert(cs->start <= cs->cur);146assert(cs->cur <= cs->reserved_end);147assert(cs->reserved_end <= cs->end);148}149150/**151* Emit a uint32_t value into a command stream, without boundary checking.152*/153static inline void154tu_cs_emit(struct tu_cs *cs, uint32_t value)155{156assert(cs->cur < cs->reserved_end);157*cs->cur = value;158++cs->cur;159}160161/**162* Emit an array of uint32_t into a command stream, without boundary checking.163*/164static inline void165tu_cs_emit_array(struct tu_cs *cs, const uint32_t *values, uint32_t length)166{167assert(cs->cur + length <= cs->reserved_end);168memcpy(cs->cur, values, sizeof(uint32_t) * length);169cs->cur += length;170}171172/**173* Get the size of the remaining space in the current BO.174*/175static inline uint32_t176tu_cs_get_space(const struct tu_cs *cs)177{178return cs->end - cs->cur;179}180181static inline void182tu_cs_reserve(struct tu_cs *cs, uint32_t reserved_size)183{184if (cs->mode != TU_CS_MODE_GROW) {185assert(tu_cs_get_space(cs) >= reserved_size);186assert(cs->reserved_end == cs->end);187return;188}189190if (tu_cs_get_space(cs) >= reserved_size &&191cs->entry_count < cs->entry_capacity) {192cs->reserved_end = cs->cur + reserved_size;193return;194}195196ASSERTED VkResult result = tu_cs_reserve_space(cs, reserved_size);197/* TODO: set this error in tu_cs and use it */198assert(result == VK_SUCCESS);199}200201/**202* Emit a type-4 command packet header into a command stream.203*/204static inline void205tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt)206{207tu_cs_reserve(cs, cnt + 1);208tu_cs_emit(cs, pm4_pkt4_hdr(regindx, cnt));209}210211/**212* Emit a type-7 command packet header into a command stream.213*/214static inline void215tu_cs_emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)216{217tu_cs_reserve(cs, cnt + 1);218tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt));219}220221static inline void222tu_cs_emit_wfi(struct tu_cs *cs)223{224tu_cs_emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0);225}226227static inline void228tu_cs_emit_qw(struct tu_cs *cs, uint64_t value)229{230tu_cs_emit(cs, (uint32_t) value);231tu_cs_emit(cs, (uint32_t) (value >> 32));232}233234static inline void235tu_cs_emit_write_reg(struct tu_cs *cs, uint16_t reg, uint32_t value)236{237tu_cs_emit_pkt4(cs, reg, 1);238tu_cs_emit(cs, value);239}240241/**242* Emit a CP_INDIRECT_BUFFER command packet.243*/244static inline void245tu_cs_emit_ib(struct tu_cs *cs, const struct tu_cs_entry *entry)246{247assert(entry->bo);248assert(entry->size && entry->offset + entry->size <= entry->bo->size);249assert(entry->size % sizeof(uint32_t) == 0);250assert(entry->offset % sizeof(uint32_t) == 0);251252tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);253tu_cs_emit_qw(cs, entry->bo->iova + entry->offset);254tu_cs_emit(cs, entry->size / sizeof(uint32_t));255}256257/* for compute which isn't using SET_DRAW_STATE */258static inline void259tu_cs_emit_state_ib(struct tu_cs *cs, struct tu_draw_state state)260{261if (state.size) {262tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);263tu_cs_emit_qw(cs, state.iova);264tu_cs_emit(cs, state.size);265}266}267268/**269* Emit a CP_INDIRECT_BUFFER command packet for each entry in the target270* command stream.271*/272static inline void273tu_cs_emit_call(struct tu_cs *cs, const struct tu_cs *target)274{275assert(target->mode == TU_CS_MODE_GROW);276for (uint32_t i = 0; i < target->entry_count; i++)277tu_cs_emit_ib(cs, target->entries + i);278}279280/* Helpers for bracketing a large sequence of commands of unknown size inside281* a CP_COND_REG_EXEC packet.282*/283static inline void284tu_cond_exec_start(struct tu_cs *cs, uint32_t cond_flags)285{286assert(cs->mode == TU_CS_MODE_GROW);287assert(!cs->cond_flags && cond_flags);288289tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);290tu_cs_emit(cs, cond_flags);291292cs->cond_flags = cond_flags;293cs->cond_dwords = cs->cur;294295/* Emit dummy DWORD field here */296tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(0));297}298#define CP_COND_EXEC_0_RENDER_MODE_GMEM \299(CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_GMEM)300#define CP_COND_EXEC_0_RENDER_MODE_SYSMEM \301(CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_SYSMEM)302303static inline void304tu_cond_exec_end(struct tu_cs *cs)305{306assert(cs->cond_flags);307308cs->cond_flags = 0;309/* Subtract one here to account for the DWORD field itself. */310*cs->cond_dwords = cs->cur - cs->cond_dwords - 1;311}312313#define fd_reg_pair tu_reg_value314#define __bo_type struct tu_bo *315316#include "a6xx.xml.h"317#include "a6xx-pack.xml.h"318319#define __assert_eq(a, b) \320do { \321if ((a) != (b)) { \322fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \323assert((a) == (b)); \324} \325} while (0)326327#define __ONE_REG(i, regs) \328do { \329if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) { \330__assert_eq(regs[0].reg + i, regs[i].reg); \331if (regs[i].bo) { \332uint64_t v = regs[i].bo->iova + regs[i].bo_offset; \333v >>= regs[i].bo_shift; \334v |= regs[i].value; \335\336*p++ = v; \337*p++ = v >> 32; \338} else { \339*p++ = regs[i].value; \340if (regs[i].is_address) \341*p++ = regs[i].value >> 32; \342} \343} \344} while (0)345346/* Emits a sequence of register writes in order using a pkt4. This will check347* (at runtime on a !NDEBUG build) that the registers were actually set up in348* order in the code.349*350* Note that references to buffers aren't automatically added to the CS,351* unlike in freedreno. We are clever in various places to avoid duplicating352* the reference add work.353*354* Also, 64-bit address registers don't have a way (currently) to set a 64-bit355* address without having a reference to a BO, since the .dword field in the356* register's struct is only 32-bit wide. We should fix this in the pack357* codegen later.358*/359#define tu_cs_emit_regs(cs, ...) do { \360const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \361unsigned count = ARRAY_SIZE(regs); \362\363STATIC_ASSERT(count > 0); \364STATIC_ASSERT(count <= 16); \365\366tu_cs_emit_pkt4((cs), regs[0].reg, count); \367uint32_t *p = (cs)->cur; \368__ONE_REG( 0, regs); \369__ONE_REG( 1, regs); \370__ONE_REG( 2, regs); \371__ONE_REG( 3, regs); \372__ONE_REG( 4, regs); \373__ONE_REG( 5, regs); \374__ONE_REG( 6, regs); \375__ONE_REG( 7, regs); \376__ONE_REG( 8, regs); \377__ONE_REG( 9, regs); \378__ONE_REG(10, regs); \379__ONE_REG(11, regs); \380__ONE_REG(12, regs); \381__ONE_REG(13, regs); \382__ONE_REG(14, regs); \383__ONE_REG(15, regs); \384(cs)->cur = p; \385} while (0)386387#endif /* TU_CS_H */388389390