Path: blob/21.2-virgl/src/intel/common/intel_aux_map.c
4547 views
/*1* Copyright (c) 2018 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223/**24* The aux map provides a multi-level lookup of the main surface address which25* ends up providing information about the auxiliary surface data, including26* the address where the auxiliary data resides.27*28* The 48-bit VMA (GPU) address of the main surface is split to do the address29* lookup:30*31* 48 bit address of main surface32* +--------+--------+--------+------+33* | 47:36 | 35:24 | 23:16 | 15:0 |34* | L3-idx | L2-idx | L1-idx | ... |35* +--------+--------+--------+------+36*37* The GFX_AUX_TABLE_BASE_ADDR points to a buffer. The L3 Table Entry is38* located by indexing into this buffer as a uint64_t array using the L3-idx39* value. The 64-bit L3 entry is defined as:40*41* +-------+-------------+------+---+42* | 63:48 | 47:15 | 14:1 | 0 |43* | ... | L2-tbl-addr | ... | V |44* +-------+-------------+------+---+45*46* If the `V` (valid) bit is set, then the L2-tbl-addr gives the address for47* the level-2 table entries, with the lower address bits filled with zero.48* The L2 Table Entry is located by indexing into this buffer as a uint64_t49* array using the L2-idx value. The 64-bit L2 entry is similar to the L350* entry, except with 2 additional address bits:51*52* +-------+-------------+------+---+53* | 63:48 | 47:13 | 12:1 | 0 |54* | ... | L1-tbl-addr | ... | V |55* +-------+-------------+------+---+56*57* If the `V` bit is set, then the L1-tbl-addr gives the address for the58* level-1 table entries, with the lower address bits filled with zero. The L159* Table Entry is located by indexing into this buffer as a uint64_t array60* using the L1-idx value. The 64-bit L1 entry is defined as:61*62* +--------+------+-------+-------+-------+---------------+-----+---+63* | 63:58 | 57 | 56:54 | 53:52 | 51:48 | 47:8 | 7:1 | 0 |64* | Format | Y/Cr | Depth | TM | ... | aux-data-addr | ... | V |65* +--------+------+-------+-------+-------+---------------+-----+---+66*67* Where:68* - Format: See `get_format_encoding`69* - Y/Cr: 0=Y(Luma), 1=Cr(Chroma)70* - (bit) Depth: See `get_bpp_encoding`71* - TM (Tile-mode): 0=Ys, 1=Y, 2=rsvd, 3=rsvd72* - aux-data-addr: VMA/GPU address for the aux-data73* - V: entry is valid74*/7576#include "intel_aux_map.h"77#include "intel_gem.h"7879#include "dev/intel_device_info.h"80#include "isl/isl.h"8182#include "drm-uapi/i915_drm.h"83#include "util/list.h"84#include "util/ralloc.h"85#include "util/u_atomic.h"86#include "main/macros.h"8788#include <inttypes.h>89#include <stdlib.h>90#include <stdio.h>91#include <pthread.h>9293static const bool aux_map_debug = false;9495struct aux_map_buffer {96struct list_head link;97struct intel_buffer *buffer;98};99100struct intel_aux_map_context {101void *driver_ctx;102pthread_mutex_t mutex;103struct intel_mapped_pinned_buffer_alloc *buffer_alloc;104uint32_t num_buffers;105struct list_head buffers;106uint64_t level3_base_addr;107uint64_t *level3_map;108uint32_t tail_offset, tail_remaining;109uint32_t state_num;110};111112static bool113add_buffer(struct intel_aux_map_context *ctx)114{115struct aux_map_buffer *buf = ralloc(ctx, struct aux_map_buffer);116if (!buf)117return false;118119const uint32_t size = 0x100000;120buf->buffer = ctx->buffer_alloc->alloc(ctx->driver_ctx, size);121if (!buf->buffer) {122ralloc_free(buf);123return false;124}125126assert(buf->buffer->map != NULL);127128list_addtail(&buf->link, &ctx->buffers);129ctx->tail_offset = 0;130ctx->tail_remaining = size;131p_atomic_inc(&ctx->num_buffers);132133return true;134}135136static void137advance_current_pos(struct intel_aux_map_context *ctx, uint32_t size)138{139assert(ctx->tail_remaining >= size);140ctx->tail_remaining -= size;141ctx->tail_offset += size;142}143144static bool145align_and_verify_space(struct intel_aux_map_context *ctx, uint32_t size,146uint32_t align)147{148if (ctx->tail_remaining < size)149return false;150151struct aux_map_buffer *tail =152list_last_entry(&ctx->buffers, struct aux_map_buffer, link);153uint64_t gpu = tail->buffer->gpu + ctx->tail_offset;154uint64_t aligned = align64(gpu, align);155156if ((aligned - gpu) + size > ctx->tail_remaining) {157return false;158} else {159if (aligned - gpu > 0)160advance_current_pos(ctx, aligned - gpu);161return true;162}163}164165static void166get_current_pos(struct intel_aux_map_context *ctx, uint64_t *gpu, uint64_t **map)167{168assert(!list_is_empty(&ctx->buffers));169struct aux_map_buffer *tail =170list_last_entry(&ctx->buffers, struct aux_map_buffer, link);171if (gpu)172*gpu = tail->buffer->gpu + ctx->tail_offset;173if (map)174*map = (uint64_t*)((uint8_t*)tail->buffer->map + ctx->tail_offset);175}176177static bool178add_sub_table(struct intel_aux_map_context *ctx, uint32_t size,179uint32_t align, uint64_t *gpu, uint64_t **map)180{181if (!align_and_verify_space(ctx, size, align)) {182if (!add_buffer(ctx))183return false;184UNUSED bool aligned = align_and_verify_space(ctx, size, align);185assert(aligned);186}187get_current_pos(ctx, gpu, map);188memset(*map, 0, size);189advance_current_pos(ctx, size);190return true;191}192193uint32_t194intel_aux_map_get_state_num(struct intel_aux_map_context *ctx)195{196return p_atomic_read(&ctx->state_num);197}198199struct intel_aux_map_context *200intel_aux_map_init(void *driver_ctx,201struct intel_mapped_pinned_buffer_alloc *buffer_alloc,202const struct intel_device_info *devinfo)203{204struct intel_aux_map_context *ctx;205if (devinfo->ver < 12)206return NULL;207208ctx = ralloc(NULL, struct intel_aux_map_context);209if (!ctx)210return NULL;211212if (pthread_mutex_init(&ctx->mutex, NULL))213return NULL;214215ctx->driver_ctx = driver_ctx;216ctx->buffer_alloc = buffer_alloc;217ctx->num_buffers = 0;218list_inithead(&ctx->buffers);219ctx->tail_offset = 0;220ctx->tail_remaining = 0;221ctx->state_num = 0;222223if (add_sub_table(ctx, 32 * 1024, 32 * 1024, &ctx->level3_base_addr,224&ctx->level3_map)) {225if (aux_map_debug)226fprintf(stderr, "AUX-MAP L3: 0x%"PRIx64", map=%p\n",227ctx->level3_base_addr, ctx->level3_map);228p_atomic_inc(&ctx->state_num);229return ctx;230} else {231ralloc_free(ctx);232return NULL;233}234}235236void237intel_aux_map_finish(struct intel_aux_map_context *ctx)238{239if (!ctx)240return;241242pthread_mutex_destroy(&ctx->mutex);243list_for_each_entry_safe(struct aux_map_buffer, buf, &ctx->buffers, link) {244ctx->buffer_alloc->free(ctx->driver_ctx, buf->buffer);245list_del(&buf->link);246p_atomic_dec(&ctx->num_buffers);247ralloc_free(buf);248}249250ralloc_free(ctx);251}252253uint64_t254intel_aux_map_get_base(struct intel_aux_map_context *ctx)255{256/**257* This get initialized in intel_aux_map_init, and never changes, so there is258* no need to lock the mutex.259*/260return ctx->level3_base_addr;261}262263static struct aux_map_buffer *264find_buffer(struct intel_aux_map_context *ctx, uint64_t addr)265{266list_for_each_entry(struct aux_map_buffer, buf, &ctx->buffers, link) {267if (buf->buffer->gpu <= addr && buf->buffer->gpu_end > addr) {268return buf;269}270}271return NULL;272}273274static uint64_t *275get_u64_entry_ptr(struct intel_aux_map_context *ctx, uint64_t addr)276{277struct aux_map_buffer *buf = find_buffer(ctx, addr);278assert(buf);279uintptr_t map_offset = addr - buf->buffer->gpu;280return (uint64_t*)((uint8_t*)buf->buffer->map + map_offset);281}282283static uint8_t284get_bpp_encoding(enum isl_format format)285{286if (isl_format_is_yuv(format)) {287switch (format) {288case ISL_FORMAT_YCRCB_NORMAL:289case ISL_FORMAT_YCRCB_SWAPY:290case ISL_FORMAT_PLANAR_420_8: return 3;291case ISL_FORMAT_PLANAR_420_12: return 2;292case ISL_FORMAT_PLANAR_420_10: return 1;293case ISL_FORMAT_PLANAR_420_16: return 0;294default:295unreachable("Unsupported format!");296return 0;297}298} else {299switch (isl_format_get_layout(format)->bpb) {300case 16: return 0;301case 8: return 4;302case 32: return 5;303case 64: return 6;304case 128: return 7;305default:306unreachable("Unsupported bpp!");307return 0;308}309}310}311312#define INTEL_AUX_MAP_ENTRY_Y_TILED_BIT (0x1ull << 52)313314uint64_t315intel_aux_map_format_bits(enum isl_tiling tiling, enum isl_format format,316uint8_t plane)317{318if (aux_map_debug)319fprintf(stderr, "AUX-MAP entry %s, bpp_enc=%d\n",320isl_format_get_name(format),321isl_format_get_aux_map_encoding(format));322323assert(isl_tiling_is_any_y(tiling));324325uint64_t format_bits =326((uint64_t)isl_format_get_aux_map_encoding(format) << 58) |327((uint64_t)(plane > 0) << 57) |328((uint64_t)get_bpp_encoding(format) << 54) |329INTEL_AUX_MAP_ENTRY_Y_TILED_BIT;330331assert((format_bits & INTEL_AUX_MAP_FORMAT_BITS_MASK) == format_bits);332333return format_bits;334}335336uint64_t337intel_aux_map_format_bits_for_isl_surf(const struct isl_surf *isl_surf)338{339assert(!isl_format_is_planar(isl_surf->format));340return intel_aux_map_format_bits(isl_surf->tiling, isl_surf->format, 0);341}342343static void344get_aux_entry(struct intel_aux_map_context *ctx, uint64_t address,345uint32_t *l1_index_out, uint64_t *l1_entry_addr_out,346uint64_t **l1_entry_map_out)347{348uint32_t l3_index = (address >> 36) & 0xfff;349uint64_t *l3_entry = &ctx->level3_map[l3_index];350351uint64_t *l2_map;352if ((*l3_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {353uint64_t l2_gpu;354if (add_sub_table(ctx, 32 * 1024, 32 * 1024, &l2_gpu, &l2_map)) {355if (aux_map_debug)356fprintf(stderr, "AUX-MAP L3[0x%x]: 0x%"PRIx64", map=%p\n",357l3_index, l2_gpu, l2_map);358} else {359unreachable("Failed to add L2 Aux-Map Page Table!");360}361*l3_entry = (l2_gpu & 0xffffffff8000ULL) | 1;362} else {363uint64_t l2_addr = intel_canonical_address(*l3_entry & ~0x7fffULL);364l2_map = get_u64_entry_ptr(ctx, l2_addr);365}366uint32_t l2_index = (address >> 24) & 0xfff;367uint64_t *l2_entry = &l2_map[l2_index];368369uint64_t l1_addr, *l1_map;370if ((*l2_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {371if (add_sub_table(ctx, 8 * 1024, 8 * 1024, &l1_addr, &l1_map)) {372if (aux_map_debug)373fprintf(stderr, "AUX-MAP L2[0x%x]: 0x%"PRIx64", map=%p\n",374l2_index, l1_addr, l1_map);375} else {376unreachable("Failed to add L1 Aux-Map Page Table!");377}378*l2_entry = (l1_addr & 0xffffffffe000ULL) | 1;379} else {380l1_addr = intel_canonical_address(*l2_entry & ~0x1fffULL);381l1_map = get_u64_entry_ptr(ctx, l1_addr);382}383uint32_t l1_index = (address >> 16) & 0xff;384if (l1_index_out)385*l1_index_out = l1_index;386if (l1_entry_addr_out)387*l1_entry_addr_out = l1_addr + l1_index * sizeof(*l1_map);388if (l1_entry_map_out)389*l1_entry_map_out = &l1_map[l1_index];390}391392static void393add_mapping(struct intel_aux_map_context *ctx, uint64_t address,394uint64_t aux_address, uint64_t format_bits,395bool *state_changed)396{397if (aux_map_debug)398fprintf(stderr, "AUX-MAP 0x%"PRIx64" => 0x%"PRIx64"\n", address,399aux_address);400401uint32_t l1_index;402uint64_t *l1_entry;403get_aux_entry(ctx, address, &l1_index, NULL, &l1_entry);404405const uint64_t l1_data =406(aux_address & INTEL_AUX_MAP_ADDRESS_MASK) |407format_bits |408INTEL_AUX_MAP_ENTRY_VALID_BIT;409410const uint64_t current_l1_data = *l1_entry;411if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {412assert((aux_address & 0xffULL) == 0);413if (aux_map_debug)414fprintf(stderr, "AUX-MAP L1[0x%x] 0x%"PRIx64" -> 0x%"PRIx64"\n",415l1_index, current_l1_data, l1_data);416/**417* We use non-zero bits in 63:1 to indicate the entry had been filled418* previously. If these bits are non-zero and they don't exactly match419* what we want to program into the entry, then we must force the420* aux-map tables to be flushed.421*/422if (current_l1_data != 0 && \423(current_l1_data | INTEL_AUX_MAP_ENTRY_VALID_BIT) != l1_data)424*state_changed = true;425*l1_entry = l1_data;426} else {427if (aux_map_debug)428fprintf(stderr, "AUX-MAP L1[0x%x] is already marked valid!\n",429l1_index);430assert(*l1_entry == l1_data);431}432}433434uint64_t *435intel_aux_map_get_entry(struct intel_aux_map_context *ctx,436uint64_t address,437uint64_t *entry_address)438{439pthread_mutex_lock(&ctx->mutex);440uint64_t *l1_entry_map;441get_aux_entry(ctx, address, NULL, entry_address, &l1_entry_map);442pthread_mutex_unlock(&ctx->mutex);443444return l1_entry_map;445}446447void448intel_aux_map_add_mapping(struct intel_aux_map_context *ctx, uint64_t address,449uint64_t aux_address, uint64_t main_size_B,450uint64_t format_bits)451{452bool state_changed = false;453pthread_mutex_lock(&ctx->mutex);454uint64_t map_addr = address;455uint64_t dest_aux_addr = aux_address;456assert(align64(address, INTEL_AUX_MAP_MAIN_PAGE_SIZE) == address);457assert(align64(aux_address, INTEL_AUX_MAP_AUX_PAGE_SIZE) == aux_address);458while (map_addr - address < main_size_B) {459add_mapping(ctx, map_addr, dest_aux_addr, format_bits, &state_changed);460map_addr += INTEL_AUX_MAP_MAIN_PAGE_SIZE;461dest_aux_addr += INTEL_AUX_MAP_AUX_PAGE_SIZE;462}463pthread_mutex_unlock(&ctx->mutex);464if (state_changed)465p_atomic_inc(&ctx->state_num);466}467468/**469* We mark the leaf entry as invalid, but we don't attempt to cleanup the470* other levels of translation mappings. Since we attempt to re-use VMA471* ranges, hopefully this will not lead to unbounded growth of the translation472* tables.473*/474static void475remove_mapping(struct intel_aux_map_context *ctx, uint64_t address,476bool *state_changed)477{478uint32_t l3_index = (address >> 36) & 0xfff;479uint64_t *l3_entry = &ctx->level3_map[l3_index];480481uint64_t *l2_map;482if ((*l3_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {483return;484} else {485uint64_t l2_addr = intel_canonical_address(*l3_entry & ~0x7fffULL);486l2_map = get_u64_entry_ptr(ctx, l2_addr);487}488uint32_t l2_index = (address >> 24) & 0xfff;489uint64_t *l2_entry = &l2_map[l2_index];490491uint64_t *l1_map;492if ((*l2_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {493return;494} else {495uint64_t l1_addr = intel_canonical_address(*l2_entry & ~0x1fffULL);496l1_map = get_u64_entry_ptr(ctx, l1_addr);497}498uint32_t l1_index = (address >> 16) & 0xff;499uint64_t *l1_entry = &l1_map[l1_index];500501const uint64_t current_l1_data = *l1_entry;502const uint64_t l1_data = current_l1_data & ~1ull;503504if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {505return;506} else {507if (aux_map_debug)508fprintf(stderr, "AUX-MAP [0x%x][0x%x][0x%x] L1 entry removed!\n",509l3_index, l2_index, l1_index);510/**511* We use non-zero bits in 63:1 to indicate the entry had been filled512* previously. In the unlikely event that these are all zero, we force a513* flush of the aux-map tables.514*/515if (unlikely(l1_data == 0))516*state_changed = true;517*l1_entry = l1_data;518}519}520521void522intel_aux_map_unmap_range(struct intel_aux_map_context *ctx, uint64_t address,523uint64_t size)524{525bool state_changed = false;526pthread_mutex_lock(&ctx->mutex);527if (aux_map_debug)528fprintf(stderr, "AUX-MAP remove 0x%"PRIx64"-0x%"PRIx64"\n", address,529address + size);530531uint64_t map_addr = address;532assert(align64(address, INTEL_AUX_MAP_MAIN_PAGE_SIZE) == address);533while (map_addr - address < size) {534remove_mapping(ctx, map_addr, &state_changed);535map_addr += 64 * 1024;536}537pthread_mutex_unlock(&ctx->mutex);538if (state_changed)539p_atomic_inc(&ctx->state_num);540}541542uint32_t543intel_aux_map_get_num_buffers(struct intel_aux_map_context *ctx)544{545return p_atomic_read(&ctx->num_buffers);546}547548void549intel_aux_map_fill_bos(struct intel_aux_map_context *ctx, void **driver_bos,550uint32_t max_bos)551{552assert(p_atomic_read(&ctx->num_buffers) >= max_bos);553uint32_t i = 0;554list_for_each_entry(struct aux_map_buffer, buf, &ctx->buffers, link) {555if (i >= max_bos)556return;557driver_bos[i++] = buf->buffer->driver_bo;558}559}560561562